## Import Libraries

In [73]:
import pandas as pd
import pickle as pkl

## Read in Dataframes / Pickle Files

In [82]:
with open('df_subset.pkl', 'rb') as f:
    df_subset = pkl.load(f) # deserialize using load()

with open('apriori-rules.pkl', 'rb') as f:
    apriori_rules = pkl.load(f) # deserialize using load()

with open('cosine-similarities.pkl', 'rb') as f:
    cosine_sim3 = pkl.load(f) # deserialize using load()


with open('top_30_cosine_similarity.pkl', 'rb') as f:
    cosine_sim_30 = pkl.load(f) # deserialize using load()

In [146]:


df_subset[df_subset['id'] == 492410.0]['Unnamed: 0'].values[0]


5856

In [75]:
# list of all recipes that have a corresponding relation to another recipe (collaborative-filtering)
recipe_rules = list(apriori_rules['Base Product'].unique())
print(recipe_rules)

[486261, 495124, 495275, 495577, 494631, 514965, 494435, 487073, 494972, 494609, 494784, 495271, 494671, 494303, 495152, 487492, 487593, 495967, 487551, 496573, 495134, 487039, 514423, 487669, 493958, 518145, 518069, 496591, 475041, 496730, 518068, 518151, 493963, 496552, 497295, 497130, 497382, 497236, 497261, 518143, 487568, 494953, 475780, 475819, 518229, 496767]


In [76]:
def recommendation_engine(user_rated_recipes):
    for k, v in user_rated_recipes.items(): 
        if not v < 3:
            print(k, '>', v)

In [77]:
#dummy data
user_rated_recipes = {5856:2, 5991:4, 2004:5}

recommendation_engine(user_rated_recipes)

5991 > 4
2004 > 5


In [92]:
cosine_sim_30.head()

Unnamed: 0,id,id_1,score_1,id_2,score_2,id_3,score_3,id_4,score_4,id_5,...,id_26,score_26,id_27,score_27,id_28,score_28,id_29,score_29,id_30,score_30
0,537716.0,495705.0,0.331132,496709.0,0.301285,504083.0,0.300203,481997.0,0.293728,507069.0,...,502902.0,0.211165,496026.0,0.208722,489223.0,0.207061,481261.0,0.205652,498991.0,0.204586
1,537671.0,534266.0,0.525129,537089.0,0.446348,491099.0,0.380623,477064.0,0.378137,536506.0,...,483838.0,0.281753,488299.0,0.279957,502876.0,0.279513,488222.0,0.279091,528565.0,0.278193
2,537543.0,492410.0,0.526627,491474.0,0.425138,491511.0,0.413226,511697.0,0.400864,506726.0,...,509676.0,0.26207,518616.0,0.261922,511060.0,0.259648,482360.0,0.258343,532257.0,0.258129
3,537485.0,489383.0,0.389825,506922.0,0.343086,481124.0,0.340749,496040.0,0.336974,479412.0,...,488862.0,0.199942,483144.0,0.199496,504628.0,0.195071,500101.0,0.193232,511351.0,0.189071
4,537459.0,485835.0,0.655296,481351.0,0.360493,491868.0,0.328418,490045.0,0.312653,485911.0,...,492833.0,0.186912,517500.0,0.183236,521678.0,0.181751,476715.0,0.180648,482222.0,0.179698


In [116]:
# Select columns containing ids and scores
id_columns = [col for col in cosine_sim_30.columns if col.startswith('id_')]
score_columns = [col for col in cosine_sim_30.columns if col.startswith('score_')]

# print(id_columns)
# Create a list of arrays containing id-score pairs

output = []

for id_col, score_col in zip(id_columns, score_columns):
    recipe_id = cosine_sim_30[cosine_sim_30['id'] == 537716][id_col].values[0]
    recipe_score =cosine_sim_30[cosine_sim_30['id'] == 537716][score_col].values[0]

    output.append([recipe_id, recipe_score])

print(output)

[[495705.0, 0.3311317648985719], [496709.0, 0.3012850075575148], [504083.0, 0.30020257912990145], [481997.0, 0.2937280205124082], [507069.0, 0.2782486245103265], [536930.0, 0.27011530637153414], [512751.0, 0.24863281385325003], [485412.0, 0.24636516515267604], [526499.0, 0.24471774606941976], [487506.0, 0.24172091938318407], [494609.0, 0.2384475997936501], [504430.0, 0.23675288264882227], [476284.0, 0.2335288978351407], [519699.0, 0.22700816088849035], [523551.0, 0.22640955300220095], [489623.0, 0.2255103113669273], [480731.0, 0.2251324822926818], [501526.0, 0.22226243205944934], [482138.0, 0.22156943222156572], [493084.0, 0.22030036922649002], [512390.0, 0.2193011177146628], [500788.0, 0.2183478784912224], [493413.0, 0.2180890737626205], [512719.0, 0.21618358682910555], [489046.0, 0.21164900100675604], [502902.0, 0.211164608879898], [496026.0, 0.20872169391617723], [489223.0, 0.2070608966407159], [481261.0, 0.20565225313265018], [498991.0, 0.20458616195807822]]


## Recommendation Engine

In [164]:
def get_recommendations(recipe_index):

    # Fill first with apriori rules (collaborative filtering), then cosine sim rules (content filtering)
    remaining = 30 - min(30,len(apriori_rules[apriori_rules['Base Product'] == recipe_index]))

    # Select columns containing ids and scores
    id_columns = [col for col in cosine_sim_30.columns if col.startswith('id_')]
    score_columns = [col for col in cosine_sim_30.columns if col.startswith('score_')]

    # list to contain pairs of ids and similarity scores
    output = []

    # iterate through ids and scores together
    for id_col, score_col in zip(id_columns, score_columns):

        # recipe ids
        recipe_id = cosine_sim_30[cosine_sim_30['id'] == recipe_index][id_col].values[0]

        # get index in dataframe from recipe id
        df_id = df_subset[df_subset['id'] == recipe_id]['Unnamed: 0'].values[0]

        # recipe similarity score
        recipe_score =cosine_sim_30[cosine_sim_30['id'] == recipe_index][score_col].values[0]

        # append the pair of df index and rceipe score to list
        output.append([df_id, recipe_score])


    # Get the scores of the remaining (0-30) most similar recipes
    output = output[0:remaining]

    # Get the recipe indices and corresponding recipe similarity score
    recipe_indices = [i[0] for i in output]
    recipe_similarity_scores = [i[1] for i in output]

    recommendations_df = df_subset['name'].iloc[recipe_indices].to_frame().reset_index()
    recommendations_df['score'] = recipe_similarity_scores

# Return the top 10 most similar recipes
    return recommendations_df

In [165]:
get_recommendations(537543)  

Unnamed: 0,index,name,score
0,5856,gluten free gingerbread cake,0.526627
1,5991,gingerbread spice blend,0.425138
2,5983,tartine s soft glazed gingerbread,0.413226
3,2004,gingerbread smoothie,0.400864
4,2670,pumpkin gingerbread w caramel sauce,0.400677
5,4646,homemade brown sugar,0.336926
6,1281,ginger snaps,0.327445
7,5891,crunchy gingerbread cookies,0.32402
8,7079,applesauce apple butter,0.320176
9,7455,cardamom ginger crunch,0.31711
