Aggregated Recommendation Engine

## Import Libraries

In [25]:
import pandas as pd
import pickle as pkl

## Read in Dataframes / Pickle Files

In [26]:
with open('df_subset.pkl', 'rb') as f:
    df_subset = pkl.load(f) # deserialize using load()

with open('apriori_rules.pkl', 'rb') as f:
    apriori_rules = pkl.load(f) # deserialize using load()

with open('cosine-similarities.pkl', 'rb') as f:
    cosine_sim3 = pkl.load(f) # deserialize using load()

with open('top_30_cosine_similarity.pkl', 'rb') as f:
    cosine_sim_30 = pkl.load(f) # deserialize using load()

In [36]:
df_subset[df_subset['id'] == 494784.0]

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,URL
5349,lemon granola chicken salad rsc,494784,25,2677925,2013-02-05,"['weeknight', '30-minutes-or-less', 'time-to-m...","[631.2, 55.0, 40.0, 27.0, 69.0, 36.0, 14.0]",16,"['1', 'in large skillet heat about 1 / 4 cup o...","ready, set, cook! hidden valley contest entry....","['chicken breasts', 'egg', 'hidden valley orig...",9,https://img.sndimg.com/food/image/upload/f_aut...


In [28]:
df_subset[df_subset['id'] == 492410.0].index[0]

5843

In [29]:
# list of all recipes that have a corresponding relation to another recipe (collaborative-filtering)
recipe_rules = list(apriori_rules['Base Product'].unique())
print(recipe_rules)

[486261, 495124, 495275, 495577, 494631, 487073, 494435, 514965, 494609, 494972, 494671, 494303, 495271, 494784, 487492, 495152, 496573, 487593, 495967, 487551, 495134, 487039, 514423, 496591, 493958, 487669, 518145, 518069, 496730, 518068, 518151, 475041, 493963, 497130, 497295, 497382, 496552, 497236, 497261, 518143, 494953, 487568, 475819, 475780, 518229, 496767]


In [30]:
apriori_rules

Unnamed: 0,Base Product,Add Product,Support
13,486261,486267,0.004384
258,495124,495577,0.003313
293,495275,495577,0.002923
309,495577,496573,0.002728
257,495124,495275,0.002728
...,...,...,...
237,494784,514423,0.001072
238,494784,518145,0.001072
240,494953,495577,0.001072
115,487669,496573,0.001072


In [31]:
df = apriori_rules[apriori_rules['Base Product'] == 494784]


# iterate through rows of the dataframe
for index, row in df.iterrows():
    print(row['Base Product'], row['Support'])

494784.0 0.0021434138737334374
494784.0 0.0020459859703819173
494784.0 0.0020459859703819173
494784.0 0.0018511301636788776
494784.0 0.0018511301636788776
494784.0 0.0018511301636788776
494784.0 0.001656274356975838
494784.0 0.001656274356975838
494784.0 0.0013639906469212783
494784.0 0.0012665627435697584
494784.0 0.0012665627435697584
494784.0 0.0012665627435697584
494784.0 0.0010717069368667187
494784.0 0.0010717069368667187


In [32]:
apriori_rules

Unnamed: 0,Base Product,Add Product,Support
13,486261,486267,0.004384
258,495124,495577,0.003313
293,495275,495577,0.002923
309,495577,496573,0.002728
257,495124,495275,0.002728
...,...,...,...
237,494784,514423,0.001072
238,494784,518145,0.001072
240,494953,495577,0.001072
115,487669,496573,0.001072


In [33]:
apriori_rules[apriori_rules['Base Product'] == 486261]['Base Product'].values

array([486261])

## Recommendation Engine

In [34]:
def get_recommendations(recipe_index=494784):

    # Fill first with apriori rules (collaborative filtering), then cosine sim rules (content filtering)
    remaining = 30 - min(30,len(apriori_rules[apriori_rules['Base Product'] == recipe_index]))

    recipe_apriori_df = pd.DataFrame(columns = ['index', 'name', 'score'])

    if remaining < 30:

        df = apriori_rules[apriori_rules['Base Product'] == recipe_index]
        
        # iterate through rows of the dataframe
        for index, row in df.iterrows():

            apriori_recipe_id = row['Add Product']
            
            df_id = df_subset[df_subset['id'] == apriori_recipe_id].index[0]
            recipe_name = df_subset[df_subset['id'] == apriori_recipe_id]['name'].values[0]
            recipe_score = row['Support']
            

            values = [df_id, recipe_name, recipe_score]

            recipe_apriori_df.loc[len(recipe_apriori_df)] = values

    recipe_apriori_df['type'] = 'apriori'

    # Select columns containing ids and scores
    id_columns = [col for col in cosine_sim_30.columns if col.startswith('id_')]
    score_columns = [col for col in cosine_sim_30.columns if col.startswith('score_')]

    # list to contain pairs of ids and similarity scores
    output = []

    # iterate through ids and scores together
    for id_col, score_col in zip(id_columns, score_columns):

        # recipe ids
        recipe_id = cosine_sim_30[cosine_sim_30['id'] == recipe_index][id_col].values[0]

        # get index in dataframe from recipe id
        df_id = df_subset[df_subset['id'] == recipe_id].index[0]

        # recipe similarity score
        recipe_score =cosine_sim_30[cosine_sim_30['id'] == recipe_index][score_col].values[0]

        # append the pair of df index and rceipe score to list
        output.append([df_id, recipe_score])


    # Get the scores of the remaining (0-30) most similar recipes
    output = output[0:remaining]

    # Get the recipe indices and corresponding recipe similarity score
    recipe_indices = [i[0] for i in output]
    recipe_similarity_scores = [i[1] for i in output]

    recommendations_df = df_subset['name'].iloc[recipe_indices].to_frame().reset_index()
    recommendations_df['score'] = recipe_similarity_scores
    recommendations_df['type'] = 'cosine_sim'
  

# Return the top 10 most similar recipes
    return pd.concat([recipe_apriori_df, recommendations_df]) 




In [35]:
get_recommendations()  

Unnamed: 0,index,name,score,type
0,5327,chicken ranchiladas rsc,0.002143,apriori
1,5165,shredded potato baskets with cheese and bacon ...,0.002046,apriori
2,5273,peppered buffalo ranch shrimp pizza rsc,0.002046,apriori
3,5298,warm roasted root vegetable and chicken salad ...,0.001851,apriori
4,5297,the hidden valley pizza rsc,0.001851,apriori
5,4983,cheesy potato italian sausage balls 5fix,0.001851,apriori
6,5097,divine mashed potatoes w fontina sage brussels...,0.001656,apriori
7,5006,crab prosciutto green onion potato cakes ...,0.001656,apriori
8,4969,simply dilly icous egg sandwich 5fix,0.001364,apriori
9,4870,quick and easy crabby caesar salads 5fix,0.001267,apriori
