In [2]:
#data import
import numpy as np
import pandas as pd

interactions= pd.read_csv("interactions_processed.csv", header= 0)
r1= pd.read_csv("recipes_processed_1.csv", header= 0)
r2= pd.read_csv("recipes_processed_2.csv", header= 0)
r3= pd.read_csv("recipes_processed_3.csv", header= 0)
recipes= r1.merge(r2, on= "id")
recipes= recipes.merge(r3, on= "id")
ingredients= pd.read_json("recipes_processed_key.json")
ingredients= ingredients["ingredients"]



In [3]:
#concept

personalValues= np.array([5, 3.7, 0, 2.1, 0.7, 3.2]) # Personal avg ratings for each ingredient derived from personal ratings
                                                     # Vector of length n = |ingridients|, as in n is the number of ALL ingredients in the food.com system

recipeIngredients= np.array([ [1,0,0,0,0,0],  #recipe X represented as its ingredients on a n*n matrix, where 1 at index [j,j] denotes j'th ingredient
                              [0,0,0,0,0,0],  #being present in the recipe
                              [0,0,1,0,0,0], 
                              [0,0,0,1,0,0], 
                              [0,0,0,0,1,0], 
                              [0,0,0,0,0,0]])

recipeVector= np.array([1,0,1,1,1,0]) #Alternative vector representation of a recipe. Vector of length n

# compute recommendation score as matrix x vector
scores= np.dot(personalValues,recipeIngredients) # compute score vector of recipe X
scoreCollapsed= np.sum(scores) # collapse score vector to a single number

# vector x vector alternative
print(np.dot(personalValues, recipeVector))

print(scores)
print(scoreCollapsed)

7.800000000000001
[5.  0.  0.  2.1 0.7 0. ]
7.8


In [4]:
#functions

from ast import literal_eval #parses .csv string "lists" to actual python lists

def parseReviews(userID, interactions, recipes, ingredients): #parses a user's submitted into a vector of average rating for each ingredient
    #print(recipes.dtypes)
    data= interactions[interactions["user_id"]== userID]
    data= data.merge(recipes, left_on= "recipe_id", right_on= "id")
    #print(data.head())
    personalValues= np.array([0]*len(ingredients))
    valuesIncremented= np.array([0]*len(ingredients))
    
    for i in range(len(data)):
        #print(data.loc[i,"ingredients"])
        for ingredient in literal_eval(data.loc[i,"ingredients"]):
            #print(ingredient)
            valuesIncremented[ingredient]= valuesIncremented[ingredient]+1
            added= (data.loc[i,"rating"]-personalValues[ingredient])/valuesIncremented[ingredient]
            personalValues[ingredient]= personalValues[ingredient]+added
    return personalValues # outputs a vector of length n = |ingredients|
    


def vectorizeRecipes(recipes, ingredients): # parses the recipes data into binary vectors of ingredients in each recipe

    recipesVectorized= []
    for i in range(len(recipes)):
        recipeVector= np.array([0]*len(ingredients))
        for ingredient in literal_eval(recipes.loc[i,"ingredients"]):
            recipeVector[ingredient]=1
        
        recipesVectorized.append((recipes.loc[i, "id"],recipeVector))
    return recipesVectorized # outputs a length m = |recipes| list of vectors of length n 


def generateRecommendations(personalVector, recipeVectors): #generates the personal recommendations for an user
    ratings= []
    for recipe in recipeVectors:
        rating= np.dot(personalVector, recipe[1])
        ratings.append((recipe[0], rating))
    ratings.sort(key= lambda x: x[1], reverse= True)
    return ratings # outputs a length m list of 2-tuples formatted as (recipeID, score)

In [5]:
#warning - slow
#run vectorization of recipes only once
recipesV= vectorizeRecipes(recipes, ingredients)

In [6]:
#run individual analysis
personalV= parseReviews(38094,interactions, recipes, ingredients)
personalRecommendations= generateRecommendations(personalV, recipesV )
print(personalRecommendations[0:50])

[(284814, 104), (161329, 103), (80044, 102), (333555, 97), (505050, 97), (74395, 96), (131978, 94), (397591, 93), (376675, 90), (116469, 88), (222092, 87), (183279, 85), (64944, 84), (441210, 82), (53815, 81), (315588, 81), (47943, 81), (475822, 81), (207220, 81), (224374, 81), (499336, 80), (102955, 80), (194754, 80), (453970, 80), (19629, 80), (456621, 79), (157639, 79), (271526, 79), (437161, 79), (283476, 78), (155194, 78), (211502, 78), (395716, 78), (125221, 78), (252539, 78), (334992, 78), (231794, 78), (110887, 77), (69861, 77), (125495, 77), (111615, 77), (398376, 77), (420718, 77), (238146, 77), (26793, 77), (295029, 77), (383366, 76), (451774, 76), (286652, 76), (430168, 76)]
