## Import Libraries

In [2]:
import pandas as pd
import pickle as pkl

## Read in Dataframes / Pickle Files

In [9]:
with open('df_subset.pkl', 'rb') as f:
    df_subset = pkl.load(f) # deserialize using load()

with open('apriori_rules.pkl', 'rb') as f:
    apriori_rules = pkl.load(f) # deserialize using load()

with open('cosine-similarities.pkl', 'rb') as f:
    cosine_sim3 = pkl.load(f) # deserialize using load()

with open('top_30_cosine_similarity.pkl', 'rb') as f:
    cosine_sim_30 = pkl.load(f) # deserialize using load()

In [146]:


df_subset[df_subset['id'] == 492410.0]['Unnamed: 0'].values[0]


5856

In [75]:
# list of all recipes that have a corresponding relation to another recipe (collaborative-filtering)
recipe_rules = list(apriori_rules['Base Product'].unique())
print(recipe_rules)

[486261, 495124, 495275, 495577, 494631, 514965, 494435, 487073, 494972, 494609, 494784, 495271, 494671, 494303, 495152, 487492, 487593, 495967, 487551, 496573, 495134, 487039, 514423, 487669, 493958, 518145, 518069, 496591, 475041, 496730, 518068, 518151, 493963, 496552, 497295, 497130, 497382, 497236, 497261, 518143, 487568, 494953, 475780, 475819, 518229, 496767]


In [76]:
def recommendation_engine(user_rated_recipes):
    for k, v in user_rated_recipes.items(): 
        if not v < 3:
            print(k, '>', v)

In [77]:
#dummy data
user_rated_recipes = {5856:2, 5991:4, 2004:5}

recommendation_engine(user_rated_recipes)

5991 > 4
2004 > 5


In [174]:
apriori_rules

Unnamed: 0,Base Product,Add Product,Support
0,486261,"(486267,)",0.004377
1,495124,"(495577,)",0.003307
2,495275,"(495577,)",0.002918
3,495124,"(495275,)",0.002724
4,495577,"(496573,)",0.002724
...,...,...,...
1510,496767,"(495275,)",0.001070
1511,494303,"(496552, 494435)",0.001070
1512,497295,"(495275,)",0.001070
1513,487492,"(487669, 494303)",0.001070


In [30]:
df = apriori_rules[apriori_rules['Base Product'] == 494784]


# iterate through rows of the dataframe
for index, row in df.iterrows():
    print(row['Base Product'], row['Support'])

494784.0 0.0021400778210116
494784.0 0.0020428015564202
494784.0 0.0020428015564202
494784.0 0.0018482490272373
494784.0 0.0018482490272373
494784.0 0.0018482490272373
494784.0 0.0016536964980544
494784.0 0.0016536964980544
494784.0 0.0013618677042801
494784.0 0.0012645914396887
494784.0 0.0012645914396887
494784.0 0.0012645914396887
494784.0 0.0010700389105058
494784.0 0.0010700389105058


In [10]:
apriori_rules

Index(['Unnamed: 0', 'Base Product', 'Add Product', 'Support'], dtype='object')

In [19]:
apriori_rules[apriori_rules['Base Product'] == 486261]['Base Product'].values

494784

## Recommendation Engine

In [78]:
def get_recommendations(recipe_index=494784):

    # Fill first with apriori rules (collaborative filtering), then cosine sim rules (content filtering)
    remaining = 30 - min(30,len(apriori_rules[apriori_rules['Base Product'] == recipe_index]))

    recipe_apriori_df = pd.DataFrame(columns = ['index', 'name', 'score'])

    if remaining < 30:

        df = apriori_rules[apriori_rules['Base Product'] == recipe_index]
        


        # iterate through rows of the dataframe
        for index, row in df.iterrows():

            
            apriori_recipe_id = row['Add Product']
            
            df_id = df_subset[df_subset['id'] == apriori_recipe_id]['Unnamed: 0'].values[0]
            recipe_name = df_subset[df_subset['id'] == apriori_recipe_id]['name'].values[0]
            recipe_score = row['Support']
            

            values = [df_id, recipe_name, recipe_score]

            recipe_apriori_df.loc[len(recipe_apriori_df)] = values

    recipe_apriori_df['type'] = 'apriori'
        
        

    # Select columns containing ids and scores
    id_columns = [col for col in cosine_sim_30.columns if col.startswith('id_')]
    score_columns = [col for col in cosine_sim_30.columns if col.startswith('score_')]

    # list to contain pairs of ids and similarity scores
    output = []

    # iterate through ids and scores together
    for id_col, score_col in zip(id_columns, score_columns):

        # recipe ids
        recipe_id = cosine_sim_30[cosine_sim_30['id'] == recipe_index][id_col].values[0]

        # get index in dataframe from recipe id
        df_id = df_subset[df_subset['id'] == recipe_id]['Unnamed: 0'].values[0]

        # recipe similarity score
        recipe_score =cosine_sim_30[cosine_sim_30['id'] == recipe_index][score_col].values[0]

        # append the pair of df index and rceipe score to list
        output.append([df_id, recipe_score])


    # Get the scores of the remaining (0-30) most similar recipes
    output = output[0:remaining]

    # Get the recipe indices and corresponding recipe similarity score
    recipe_indices = [i[0] for i in output]
    recipe_similarity_scores = [i[1] for i in output]

    recommendations_df = df_subset['name'].iloc[recipe_indices].to_frame().reset_index()
    recommendations_df['score'] = recipe_similarity_scores
    recommendations_df['type'] = 'cosine_sim'
  

# Return the top 10 most similar recipes
    return pd.concat([recipe_apriori_df, recommendations_df]) 




In [79]:
get_recommendations()  

Index(['index', 'name', 'score', 'type'], dtype='object')
Index(['index', 'name', 'score', 'type'], dtype='object')


Unnamed: 0,index,name,score,type
0,5340,chicken ranchiladas rsc,0.00214,apriori
1,5178,shredded potato baskets with cheese and bacon ...,0.002043,apriori
2,5286,peppered buffalo ranch shrimp pizza rsc,0.002043,apriori
3,5311,warm roasted root vegetable and chicken salad ...,0.001848,apriori
4,5310,the hidden valley pizza rsc,0.001848,apriori
5,4985,cheesy potato italian sausage balls 5fix,0.001848,apriori
6,5110,divine mashed potatoes w fontina sage brussels...,0.001654,apriori
7,5012,crab prosciutto green onion potato cakes ...,0.001654,apriori
8,4971,simply dilly icous egg sandwich 5fix,0.001362,apriori
9,4869,quick and easy crabby caesar salads 5fix,0.001265,apriori
