In [2]:
#data import
import numpy as np
import pandas as pd

interactions= pd.read_csv("interactions_processed.csv", header= 0)
r1= pd.read_csv("recipes_processed_1.csv", header= 0)
r2= pd.read_csv("recipes_with_new_ingredients.csv", header= 0)
r3= pd.read_csv("recipes_improved_tags.csv", header= 0)
recipes= r1.merge(r2, on= "id")
recipes= recipes.merge(r3, on= "id")
ingredients= pd.read_json("recipes_processed_key.json")["ingredients"]
tags= pd.read_json("recipes_processed_key.json")["tags"]




In [3]:
#concept

personalValues= np.array([5, 3.7, 0, 2.1, 0.7, 3.2]) # Personal avg ratings for each ingredient derived from personal ratings
                                                     # Vector of length n = |ingridients|, as in n is the number of ALL ingredients in the food.com system

recipeIngredients= np.array([ [1,0,0,0,0,0],  #recipe X represented as its ingredients on a n*n matrix, where 1 at index [j,j] denotes j'th ingredient
                              [0,0,0,0,0,0],  #being present in the recipe
                              [0,0,1,0,0,0], 
                              [0,0,0,1,0,0], 
                              [0,0,0,0,1,0], 
                              [0,0,0,0,0,0]])

recipeVector= np.array([1,0,1,1,1,0]) #Alternative vector representation of a recipe. Vector of length n

# compute recommendation score as matrix x vector
scores= np.dot(personalValues,recipeIngredients) # compute score vector of recipe X
scoreCollapsed= np.sum(scores) # collapse score vector to a single number

# vector x vector alternative
print(np.dot(personalValues, recipeVector))

print(scores)
print(scoreCollapsed)

7.800000000000001
[5.  0.  0.  2.1 0.7 0. ]
7.8


In [17]:
#functions

from ast import literal_eval #parses .csv string "lists" to actual python lists

def parseReviews(userID, interactions, recipes, ingredients, tags): #parses a user's submitted into a vector of average rating for each ingredient
    #print(recipes.dtypes)
    data= interactions[interactions["user_id"]== userID]
    data= data.merge(recipes, left_on= "recipe_id", right_on= "id")
    #print(data.head())
    personalIngredients= np.array([0]*len(ingredients))
    ingredientsIncremented= np.array([0]*len(ingredients))
    personalTags= np.array([0]*len(tags))
    tagsIncremented= np.array([0]*len(tags))
    
    for i in range(len(data)):
        #print(data.loc[i,"ingredients"])
        for ingredient in literal_eval(data.loc[i,"ingredients"]):
            #print(ingredient)
            ingredientsIncremented[ingredient]= ingredientsIncremented[ingredient]+1
            added= (data.loc[i,"rating"]-personalIngredients[ingredient])/ingredientsIncremented[ingredient]
            personalIngredients[ingredient]= personalIngredients[ingredient]+added

    for i in range(len(data)):
        #print(data.loc[i,"ingredients"])
        for tag in literal_eval(data.loc[i,"tags"]):
            #print(ingredient)
            tagsIncremented[tag]= tagsIncremented[tag]+1
            added= (data.loc[i,"rating"]-personalTags[tag])/tagsIncremented[tag]
            personalTags[tag]= personalTags[tag]+added
    return personalIngredients, personalTags # outputs a vector of length n = |ingredients|
    


def vectorizeRecipes(recipes, ingredients, tags): # parses the recipes data into binary vectors of ingredients in each recipe

    recipesIngredientsVectorized= []
    for i in range(len(recipes)):
        recipeIngredients= np.array([0]*(len(ingredients)))
        count=0
        for ingredient in literal_eval(recipes.loc[i,"ingredients"]):
            recipeIngredients[ingredient]=1
            count=count+1
        recipesIngredientsVectorized.append((recipes.loc[i, "name"],recipeIngredients, count))

    recipesTagsVectorized= []
    for i in range(len(recipes)):
        recipeTags= np.array([0]*(len(tags)))
        count=1
        for tag in literal_eval(recipes.loc[i,"tags"]):
            recipeTags[tag]=1
            count=count+1
        recipesTagsVectorized.append((recipes.loc[i, "name"],recipeTags, count))

    return recipesIngredientsVectorized, recipesTagsVectorized # outputs a length m = |recipes| list of vectors of length n 

#here

def generateRecommendations(personalIngredients, personalTags, recipeIngredientVectors, recipeTagVectors): #generates the personal recommendations for an user
    ratings= []
    for recipe in range(len(recipeIngredientVectors)):
        iRating= np.dot(personalIngredients, recipeIngredientVectors[recipe][1])/ recipeIngredientVectors[recipe][2]
        tRating= np.dot(personalTags, recipeTagVectors[recipe][1])/ (recipeTagVectors[recipe][2])
        ratings.append((recipeIngredientVectors[recipe][0], (iRating+tRating)/2))
    
    ratings.sort(key= lambda x: x[1], reverse= True)
    return ratings # outputs a length m list of 2-tuples formatted as (recipeID, score)



In [5]:
#warning - slow
#run vectorization of recipes only once
import time
start_time = time.time()

recipesV= vectorizeRecipes(recipes, ingredients, tags)

print("--- %s seconds ---" % (time.time() - start_time))

--- 135.87946820259094 seconds ---


In [19]:
#run individual analysis
user=361457
start_time = time.time()

personalV= parseReviews(user,interactions, recipes, ingredients,tags)
personalRecommendations= generateRecommendations(personalV[0],personalV[1], recipesV[0], recipesV[1] )
print(personalRecommendations[0:50])

print("--- %s seconds ---" % (time.time() - start_time))

[('hawaiian kielbasa', 2.9347826086956523), ('italian sausag potato', 2.083333333333333), ('kielbasa appet 2 ingredi', 1.9285714285714286), ('jamon sangria sangria ham', 1.81875), ('easiest bake spam', 1.8125), ('bake bean sandwich', 1.8), ('crock pot pork tenderloin appl', 1.7922077922077921), ('brown sugar crust cheesecak', 1.75), ('special pork bean', 1.75), ('littl bacon smoki', 1.71875), ('pork loin mustard glaze', 1.71875), ('taco fri', 1.7045454545454546), ('bourbon sausag dog', 1.7035714285714285), ('easiest mouth water rib', 1.7), ('appl sausag appet', 1.6875), ('sausag bacon bite', 1.6666666666666665), ('cheerwin holiday parti punch', 1.65), ('appl orchard bean bake', 1.625), ('awesom lil smoki', 1.625), ('bacon dog', 1.625), ('chicken bacon bbq 3000 popper penzey', 1.625), ('delici bake ham', 1.625), ('toilet cleaner', 1.625), ('rosi cinnamon appl 2', 1.6071428571428572), ('north carolina bbq', 1.6022727272727273), ('fantabul easi bake bean', 1.6), ('wick easi beef rice', 1.