In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model
import pickle

## Prediction

In [2]:
# Load the saved model
loaded_model = load_model("savedmodel/")

In [3]:
df = pd.read_csv("data/eval_dataset.csv")
df.drop(['aisle_id', 'department_id'], axis=1, inplace=True)
df.head()

Unnamed: 0,product_id,order_dow,order_hour_of_day,product_name
0,1,5,9,Organic Egg Whites
1,2,5,9,Carrots
2,3,5,9,Garlic Powder
3,4,5,17,Unsweetened Chocolate Almond Breeze Almond Milk
4,5,5,17,Unsweetened Almondmilk


In [4]:
# Load the dictionary from the pickle file
with open('dictionary.pkl', 'rb') as file:
    loaded_dictionary = pickle.load(file)

# View the contents of the loaded dictionary
for key, values in loaded_dictionary.items():
    product_id = key
    aisle_id, department_id = values

In [5]:
# User defined values for day of week and hour of day
dow = 3
hour_of_day = 13

In [6]:
dict = {
    'product_id': [],
    'order_dow': [],
    'order_hour_of_day': [],
    'aisle_id':[],
    'department_id': []
}

In [7]:
for key, (value1, value2) in loaded_dictionary.items():
    dict['product_id'].append(key)
    dict['order_dow'].append(dow)
    dict['order_hour_of_day'].append(hour_of_day)
    dict['aisle_id'].append(value1)
    dict['department_id'].append(value2)

In [8]:
for key in dict:
    dict[key] = np.array(dict[key]).astype('int64')

In [9]:
# Use the loaded model for predictions
pred_ans = loaded_model.predict(dict, batch_size=256)



In [10]:
# Selecting indices of the items with highest recommendation values
# Flatten the array to get a 1-dimensional array
flattened_arr = pred_ans.flatten()

# Get the indices that would sort the array in descending order
sorted_indices = np.argsort(flattened_arr)[::-1]

# Get the top 10 indices
top_10_indices = sorted_indices[:10]

# Print the top 10 indices
print("Top 10 indices:", top_10_indices)

Top 10 indices: [428 631 437 112 146  39 141 638  35 695]


In [11]:
product_list  = []
for index in top_10_indices:
    product_name = df.loc[df['product_id'] == index, 'product_name'].iloc[0]
    product_list.append(product_name)
    
product_list

['Turmeric Root',
 'Fresh Frozen Wild Blueberries',
 'Goo Berry Pie Probugs Kefir',
 'Ultra Soft Bathroom Tissue, Double Rolls',
 'Organic Original Hommus',
 'Original Popcorn',
 'Blood Oranges',
 'Tangerine',
 'Pinto Beans No Salt Added',
 'Red Peppers']

In [15]:
original = df.loc[(df['order_dow'] == dow) & (df['order_hour_of_day'] == hour_of_day), 'product_name'].tolist()
original

['Whole Organic Omega 3 Milk',
 'Organic Heavy Whipping Cream',
 'Salted Butter',
 'Organic Strawberry Fruit Spread',
 'Cultured Low Fat Buttermilk',
 'Organic Whole Milk',
 'Kid Z Bar Organic Honey Graham Energy Snack',
 'Organic Yokids Lemonade/Blueberry Variety Pack Yogurt Squeezers Tubes',
 'Organic Blackberries',
 'Banana',
 'Pure Baking Soda',
 'Semi-Sweet Chocolate Premium Baking Chips',
 'Large Lemon',
 'Honeycrisp Apple',
 'Peach on the Bottom Nonfat Greek Yogurt',
 'Deli Fresh Smoked Turkey Breast',
 'Banana',
 'Ground Cinnamon',
 'Genuine Chocolate Flavor Syrup',
 'Jet Puffed Miniature Marshmallows',
 'Chocolate Bar Milk Stevia Sweetened Salted Almond',
 'Natural Chicken & Sage Breakfast Sausage',
 'Organic Cucumber',
 'Organic Grade A Free Range Large Brown Eggs',
 'Gluten Free Covered Pretzels Yogurt',
 'Super Seed Crispy Crackers',
 'Organic Fat Free Milk',
 'Organic Italian Parsley Bunch',
 'Chicken Pad Thai With Rice Noodles',
 'Yellow Onions',
 'Flaky Biscuits',
 'Orga

## Evaluation

In [17]:
def evaluate_recommendations(true_items, recommended_items, K):
    # Precision@K
    precision = len(set(true_items) & set(recommended_items[:K])) / float(K)
    
    # Recall@K
    recall = len(set(true_items) & set(recommended_items[:K])) / float(len(true_items))
    
    # nDCG@K
    dcg = 0.0
    idcg = 0.0
    for i in range(K):
        item = recommended_items[i]
        if item in true_items:
            relevance = 1.0 / np.log2(i + 2)
            dcg += relevance
        idcg += 1.0 / np.log2(i + 2)
    ndcg = dcg / idcg
    
    # Item coverage@K
    item_coverage = len(set(recommended_items[:K])) / float(len(set(original)))
    
    # MAP@K
    relevant_items = set(true_items)
    avg_precision = 0.0
    num_hits = 0
    for i in range(K):
        item = recommended_items[i]
        if item in relevant_items:
            num_hits += 1
            avg_precision += num_hits / float(i + 1)
    mean_avg_precision = avg_precision / min(len(relevant_items), K)
    
    # Hit Rate@K
    hit_rate = len(set(recommended_items[:K]) & set(true_items)) / float(len(true_items))
    
    return precision, recall, ndcg, item_coverage, mean_avg_precision, hit_rate

In [19]:
print("precision@K, recall@K, ndcg@K, item_coverage@K, MAP@K, hitrate@K are: ")
evaluate_recommendations(original, product_list, 9)

precision@K, recall@K, ndcg@K, item_coverage@K, MAP@K, hitrate@K are: 


(0.2222222222222222,
 0.00062015503875969,
 0.16927654709047393,
 0.006493506493506494,
 0.05396825396825397,
 0.00062015503875969)