This notebook tries to build a very simple recommender system which combines both content based CF and user based CF system.

In [62]:
import pandas as pd
import numpy as np
import random
from scipy.sparse import coo_matrix
import pickle
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import cross_validate,train_test_split,KFold
from surprise import SVD,SVDpp, NMF,SlopeOne,CoClustering
from surprise import accuracy
from collections import defaultdict

my_seed = 0
random.seed(my_seed)

In [2]:
file = open("output/SVD_algo.pkl",'rb')
SVD_algo = pickle.load(file)

In [3]:
file = open("output/recipes_names.pkl",'rb')
rep_names = pickle.load(file)

In [4]:
file = open("output/rep_mtx.pkl",'rb')
rep_U = pickle.load(file)

here iids refer to recipe unique IDs

In [43]:
def get_recipe_similar_score(iids, U = rep_U):
    users_to_rec = [iid for iid in range(U.shape[0]) if iid not in iids]
   
    user_sim_score = []

    for user in users_to_rec:
        user_sim_score.append(float(np.mean([np.dot(U[userid],U[user]) for userid in iids])))

    return users_to_rec,user_sim_score

In [35]:
def get_users_pred_score(iids,algo = SVD_algo,uid = 226571):
    
    # create the list to search in
    iid_to_test = [iid for iid in range(231637) if iid not in iids]
    # build data for surprise
    test_set = [[uid,iid,4.] for iid in iid_to_test]
    # predict
    predictions = algo.test(test_set)
    #get prediction
    pred_ratings = [pred.est for pred in predictions]
    # return top_n indexes
    return pred_ratings

In [50]:
def hybrid_model_reco(iids,n_reco = 10):
    reco_id,rep_sim_score = get_recipe_similar_score(iids)
    
    pred_ratings = get_users_pred_score(iids)
    
    final_rating = [(ss+pr)*0.5 for ss,pr in zip(rep_sim_score,pred_ratings)]
    
    final_rating = zip(reco_id,final_rating)
    
    final_rec = [i[0] for i in sorted(final_rating,key=lambda x: x[1],reverse=True)]

    return final_rec[:n_reco]

In [None]:
def translate_recipe_names(results,rep_names = rep_names):
    return [pretty_text(rep_names[r]) for r in results]

def pretty_text (text):
    ''' This function takes in text and try to put it in a human readable format by putting back \' and making it capitalize
    '''
    text = text.replace(" s ","\'s ")
    text_split = text.split(" ")
    #print(text_split)
    text_split = [t.strip().capitalize() for t in text_split if t != '']
    #print(text_split)
    return " ".join(text_split)

In [61]:
iids = [23,56,34,111]
translate_recipe_names(iids)

['Butter Madeira Cake',
 'Cheese Rolls',
 'Chicken Breasts Saltimbocca',
 'Swiss Crab Melt']

In [52]:
results = hybrid_model_reco(iids)

In [60]:
translate_recipe_names(results)

['Mexican Stack Up Rsc',
 'Ragu Shuka Ragu',
 'Simply Irresistible Tropical Potato Salad Sp5',
 'Vegan Truffles',
 'Crunchy Valley Chicken Rsc',
 'Blue Ribbon Carrot Cake With Buttermilk Glaze',
 'Boursin Homemade',
 'Rumbledethumps Celtic Potato Cabbage Cheese Gratin',
 'Cheesy Chicken Pot Biscuit Cups Low Fat Low Cal',
 'Lemon Pie']

In [67]:
l = [random.randint(0,len(rep_names)) for i in range(12)]

In [69]:
translate_recipe_names(l)

['Premium Black Bean Soup',
 'Wasabi Popcorn',
 'Three Cheese Jalapeno Crescent Pinwheels',
 'Thai Style Broccoli With Garlic',
 'Fruit Flips',
 'Hawaiian Ham And Swiss Sandwich',
 'Sugar Free Flax Seed Meal Your Choice Muffins',
 'Sweet And Sour Bean Salad',
 'Grilled Halibut Simply Delicious',
 'Chilled Cafe Latte',
 'Canadian Potato Salad',
 'Bistec A La Mexicana']

In [70]:
l

[100989,
 198693,
 110250,
 10612,
 67873,
 134027,
 127383,
 106151,
 205468,
 217540,
 79511,
 124937]