In [2]:
#data import
import numpy as np
import pandas as pd

interactions= pd.read_csv("interactions_processed.csv", header= 0)
r1= pd.read_csv("recipes_processed_1.csv", header= 0)
r2= pd.read_csv("recipes_with_new_ingredients.csv", header= 0)
r3= pd.read_csv("recipes_improved_tags.csv", header= 0)
recipes= r1.merge(r2, on= "id")
recipes= recipes.merge(r3, on= "id")
ingredients= pd.read_json("new_ingredient_ids.json", orient="index")[0]
tags= pd.read_json("recipes_processed_key.json")["tags"]


#print(recipes.index)

In [3]:
#concept

personalValues= np.array([5, 3.7, 0, 2.1, 0.7, 3.2]) # Personal avg ratings for each ingredient derived from personal ratings
                                                     # Vector of length n = |ingridients|, as in n is the number of ALL ingredients in the food.com system



recipeVector= np.array([1,0,1,1,1,0]) #Vector representation of a recipe. Vector of length n

# vector x vector score computation
print(np.dot(personalValues, recipeVector))



7.800000000000001


In [4]:
#functions

from ast import literal_eval #parses .csv string "lists" to actual python lists

def parseReviews(userID, interactions, recipes, ingredients, tags): #parses a user's submitted into vectors of average rating for each ingredient and tag
    #print(recipes.dtypes)
    data= interactions[interactions["user_id"]== userID]
    data= data.merge(recipes, left_on= "recipe_id", right_on= "id")
    print(data["name"].head(15))
    personalIngredients= np.array([0]*len(ingredients), dtype= np.float32)
    ingredientsIncremented= np.array([0]*len(ingredients), dtype=np.uint8)
    personalTags= np.array([0]*len(tags), dtype=np.float32)
    tagsIncremented= np.array([0]*len(tags), dtype=np.uint8)
    
    for i in range(len(data)): 
        for ingredient in literal_eval(data.loc[i,"ingredients"]): #process ingredients
            #print(ingredient)
            ingredientsIncremented[ingredient]= ingredientsIncremented[ingredient]+1
            added= (data.loc[i,"rating"]-personalIngredients[ingredient])/ingredientsIncremented[ingredient]
            personalIngredients[ingredient]= personalIngredients[ingredient]+added

    #for i in range(len(data)):
        for tag in literal_eval(data.loc[i,"tags"]): #process tags
            tagsIncremented[tag]= tagsIncremented[tag]+1
            added= (data.loc[i,"rating"]-personalTags[tag])/tagsIncremented[tag]
            personalTags[tag]= personalTags[tag]+added
    
    return personalIngredients, personalTags # outputs 2 vectors of length n = |ingredients| and length t = |tags| respectively.
    


def vectorizeRecipes(recipes, ingredients, tags): # parses the recipes data into binary matrices of ingredients and tags in each recipe
    nIngredients=np.array([0]*len(recipes), dtype= np.uint8)
    nTags=np.array([0]*len(recipes), dtype= np.uint8)
    recipesIngredientsVectorized= np.zeros((np.shape(recipes)[0],np.shape(ingredients)[0]),np.float32)
    recipesTagsVectorized= np.zeros((np.shape(recipes)[0],np.shape(tags)[0]),np.float32)
    recipes= recipes.assign(index= [n for n in range(len(recipes))])
    
    for i in range(len(recipes)):
        #process ingredients
        count=0
        ing=literal_eval(recipes.loc[i,"ingredients"])
        for j in range(len(ing)):
            recipesIngredientsVectorized[i,ing[j]]=1
            count=count+1
        nIngredients[i]= count

    for i in range(len(recipes)):
        #process tags
        count2=1
        ts=literal_eval(recipes.loc[i,"tags"])
        for j in range(len(ts)):
            recipesTagsVectorized[i,ts[j]]=1
            count2=count2+1
        nTags[i]= count2

    return recipesIngredientsVectorized, recipesTagsVectorized, nIngredients, nTags #  outputs two matrices of with dimensions mxn and mxt, and two length m = |recipes| vectors.


def generateRecommendations(recipes, personalIngredients, personalTags, recipeIngredientVectors, recipeTagVectors, nIngredients, nTags): #generates the personal recommendations for a user
    iRatings= np.dot(recipeIngredientVectors, personalIngredients)/ nIngredients
    tRatings= np.dot(recipeTagVectors, personalTags) / nTags
    #print("calc done")
    
    ratings=(iRatings+tRatings)/2
    recommend= np.argsort(ratings)[::-1][:25]
    l=[]
    for i in range(25):
        l.append((recipes.loc[recommend[i], "name"], ratings[recommend[i]]))
    return l


In [5]:
#warning - slow
#run vectorization of recipes only once
#import time
#start_time = time.time()

recipesV= vectorizeRecipes(recipes, ingredients, tags)
recipesV=(np.copy(recipesV[0], order="F"), np.copy(recipesV[1],order="F"), recipesV[2], recipesV[3])

#print("--- %s seconds ---" % (time.time() - start_time))

In [6]:
#run individual analysis
user=54216
#start_time = time.time()

personalV= parseReviews(user,interactions, recipes, ingredients,tags)
personalRecommendations= generateRecommendations(recipes, personalV[0],personalV[1], recipesV[0],recipesV[1],recipesV[2], recipesV[3] )
print(personalRecommendations[0:50])

#print("--- %s seconds ---" % (time.time() - start_time))
#print(recipesV[0].flags)

0         tuscan bread salad
1    roast crock pot chicken
Name: name, dtype: object
[('tuscan bread salad', 4.607639), ('blast chicken', 4.1208334), ('roast crock pot chicken', 4.017544), ('bake chicken onion garlic rosemari', 3.8552632), ('rosemari infus oil', 3.75), ('open peski pistachio nut', 3.732143), ('tuscan style pepper chicken', 3.717647), ('basic pesto recip', 3.6904764), ('brais balsam chicken garlic onion', 3.609375), ('easiest chicken recip', 3.6086955), ('boneless leg lamb', 3.594737), ('mediterranean herb oil', 3.578125), ('roast red potato rosemari', 3.575), ('roast chicken italian season', 3.5661764), ('roman roast veget', 3.53125), ('chicken vinegar mark bittman', 3.525), ('process pimiento pepper', 3.5192308), ('oven grill steak mark bittman', 3.5166667), ('wine reduct frozen cube', 3.5131578), ('south beach parmesan cracker', 3.5131578), ('simpl balsam vinaigrett', 3.5125), ('portabella mushroom', 3.4887218), ('herb marin ostrich', 3.478125), ('basil mint pesto', 3