# Recipe Recommender

Creating a Count Vectorizer

Calculating Cosine Similarity

Making a recommendation

#### Import Libraries

In [1]:
import numpy as np
import pandas as pd

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

import matplotlib.pyplot as plt
import seaborn as sns

from ast import literal_eval

from src.modules.feature_engineering import clean_data, create_soup, create_string_feature
from src.modules.recommender import get_recommendations, search_recipes

import pickle

#### Load Data

In [2]:
raw_recipes = pd.read_csv('./data/rrecipe_recommender/RAW_recipes.csv')
raw_interactions = pd.read_csv('./data/recipe_recommender/RAW_interactions.csv')

In [3]:
# Convert strings into python types. CSV has lists saved as strings, with literal_eval we convert that string in lists

features = ['tags', 'steps', 'ingredients']
for feature in features:
    raw_recipes[feature] = raw_recipes[feature].apply(literal_eval)

#### Subset of recipes - Get a list of recipes based on rules below
- Recipes that have more than 20 ratings
- Recipes that have a rating average of 4 or more

In [4]:
# Get count of all ratings by recipe id
recipe_ratings_cnt = raw_interactions[["recipe_id", "rating"]].groupby("recipe_id").count().sort_values('rating', ascending=False).reset_index()
recipe_ratings_cnt.columns = ["id", "rating_count"]

# Select only recipes with more than 20 ratings
filter = recipe_ratings_cnt['rating_count'] > 20
recipe_ratings_cnt = recipe_ratings_cnt[filter]

In [5]:
# Get recipes that have ratings of 4 or more
recipe_ratings_mean = raw_interactions[raw_interactions["recipe_id"].isin(recipe_ratings_cnt["id"])] \
                [["recipe_id", "rating"]].groupby("recipe_id").mean().sort_values('rating', ascending=False).reset_index()
recipe_ratings_mean.columns = ["id", "rating_mean"]

# get recipes with more than 20 ratings
filter = recipe_ratings_mean['rating_mean'] >= 4
recipe_ratings_mean = recipe_ratings_mean[filter]

In [6]:
# subset of recipes based on filters
recipes = raw_recipes[raw_recipes["id"].isin(recipe_ratings_mean["id"])]
recipes = recipes.reset_index(drop=True)

In [7]:
# subset of interactions based on filters
interactions = raw_interactions[raw_interactions["recipe_id"].isin(recipe_ratings_mean["id"])]
interactions = interactions.reset_index(drop=True)

#### Group all reviews by recipe id and join with recipes dataframe

In [8]:
# group reviews by
interactions_group_review = []
for (recipe_id, group) in interactions[["recipe_id", "review"]].groupby("recipe_id"):
    #reviews_tmp = []
    reviews_tmp = [review for review in group['review']]
    #for review in group["review"]:
    #    reviews_tmp.append(review)
    interactions_group_review.append([recipe_id, reviews_tmp])
    
new_interactions = pd.DataFrame(interactions_group_review, columns=["id", "review"])

In [9]:
# join reviews to recipes
recipes = pd.merge(recipes, new_interactions, how="left", on="id")

#### Clean data and feature engineering

In [10]:
# Copy recipes dataframe to clean and add features
recommend_recipes = recipes.copy()

In [11]:
# Apply clean_data function to your features.
features = ['tags', 'description', 'ingredients']

for feature in features:
    recommend_recipes[feature] = recommend_recipes[feature].apply(clean_data)

In [12]:
# create new column ingredients_string
#test = recommend_recipes['soup'] = recommend_recipes.apply(create_string_feature, axis=1)
recommend_recipes['ingredients_string'] = recommend_recipes.ingredients.apply(create_string_feature)

# create new column tags_string
recommend_recipes['tags_string'] = recommend_recipes.tags.apply(create_string_feature)

In [13]:
# Create a new soup feature
recommend_recipes['soup'] = recommend_recipes.apply(create_soup, axis=1)

In [14]:
count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(recommend_recipes['soup'])



# from sklearn.feature_extraction import text 

# stop_words = text.ENGLISH_STOP_WORDS.union(my_additional_stop_words)
# stopwords.update(["made", "make", "added", "used", "thing", "makes", "use", "ingredient", "always",
#                   "thank", "think", "thanks", "making", "really", "using", "lot", "little", "posted",
#                   "use", "recipe", 'great', "good", "came", "way", "come", "served", "know", "even",
#                  "will", "next", "one", "time", "taste", "tried", "well", "want", "work", "yet", "cookbook",
#                  "much", "add", "delicious", "dish", "flavor", "recipes", "tried", "day", "cooking", "need",
#                   "another", "include", "take", "find", "prep", "cut", "may", "keep", "usually", "sure", "better",
#                   "wonderful", "eat", "comes", "nice", "best", "years ago", "adapted", "perfect",
#                   "without", "amount", "thought", "give", "tasty", "ingredients", "ingredient", "main", "make'",
#                  "loved", "love", "try", "enjoy", "found", "got", "serve", "cook"])



In [15]:
# Reset index of your main DataFrame and construct reverse mapping as before
recommend_recipes =recommend_recipes.reset_index()
indices = pd.Series(recommend_recipes.index, index=recommend_recipes['name'])

#### Calculate cosine similarity

In [16]:
cosine_sim = cosine_similarity(count_matrix, count_matrix)

#### Save files

In [17]:
# save the model to disk
filename = 'models/content-based_filtering/cosine_sim_recipes.pickle'
pickle.dump(cosine_sim, open(filename, 'wb'))

In [52]:
recommend_recipes.to_csv('./data/recipe_recommender/recommend_recipes.csv',index=False)
indices.to_csv('./data/recipe_recommender/indices.csv')

##### Search for ingredients in recipes

In [29]:
pd.set_option('max_colwidth', 1000)
features_list = ["", ""]
new_recipes, only_ingredients = search_recipes(recommend_recipes, "cauliflower", features_list)

Ingredients:  37
Tags:  37


In [30]:
print(len(only_ingredients))
print(len(new_recipes))

37
37


In [31]:
#def recipes():
if len(new_recipes) >= 1:
    print("With all characteristics")
    recipe = new_recipes.sample(n=1, random_state=1)
    # recommend 3 
elif len(only_ingredients) >= 1:
    print("entro a only ingredients")
    recipe = only_ingredients.sample(n=1, random_state=1) #Select random recipe
    #recommend 3 more and specify that it is only based on ingredients
else:
    print("return that no results were returned")

print(type(recipe))
recipe

With all characteristics
<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,index,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,review,ingredients_string,tags_string,soup
147,147,aloo gobi,84324,60,124813,2004-02-18,"[curries, 60-minutes-or-less, time-to-make, course, main-ingredient, cuisine, preparation, main-dish, vegetables, asian, indian, vegetarian, dietary, cauliflower]","[215.9, 11.0, 22.0, 13.0, 11.0, 5.0, 11.0]",13,"[heat vegetable oil in a large saucepan, add the chopped onion and one teaspoon of cumin seeds to the oil, stir together and cook until onions become creamy , golden , and translucent, add chopped coriander stalks , two teaspoons of turmeric , and one teaspoon of salt, add chopped chillis stir tomatoes into onion mixture, add ginger and garlic, mix thoroughly, add potatoes and cauliflower to the sauce plus a few tablespoons of water, ensure that the potatoes and cauliflower are coated with the curry sauce, cover and allow to simmer for twenty minutes, add two teaspoons of garam masala and stir, sprinkle chopped coriander leaves on top of the curry, turn off the heat , cover , and leave for as long as possible before serving]",great aloo gobi recipe from the film bend it like beckham,"[vegetable oil, onion, fresh coriander, green chili, cauliflower, potatoes, diced tomatoes, fresh ginger, fresh garlic, cumin seed, turmeric, salt, garam masala]",13,"[This is AWESOME!! Dad and I got together and made this for dinner for ourselves tonight. My kitchen smelt so good while this was stirring in the pot on the stovetop. Since the quantity of ginger and garlic was not mentioned, I used a 1 inch piece of fresh ginger and 4 cloves of fresh garlic(peeled, washed and finely chopped). I like to use all my veggies fresh as a personal preference and I try to avoid canned/tinned food, so I used 6 fresh tomatoes(4 Roma tomatoes and 2 Indian tomatoes). In addition to the green chilli, I also used 1/2 tsp. of red chilli powder. I used 3 1/2 cups of water to cook the tomatoes and cauliflower. As for the potatoes, I boiled the cubes of potatoes in water until they were tender and just had to be stirred into the rest of the curry. I used 2 tsps. of salt in total for this delicious curry. This was a very tasty curry and we enjoyed it immensely with Tilda Basmati white rice. Very good with plain yogurt on the side:-) Next time, I will use just 2 po...",vegetable oil onion fresh coriander green chili cauliflower potatoes diced tomatoes fresh ginger fresh garlic cumin seed turmeric salt garam masala,curries 60-minutes-or-less time-to-make course main-ingredient cuisine preparation main-dish vegetables asian indian vegetarian dietary cauliflower,vegetable oil onion fresh coriander green chili cauliflower potatoes diced tomatoes fresh ginger fresh garlic cumin seed turmeric salt garam masala curries 60-minutes-or-less time-to-make course main-ingredient cuisine preparation main-dish vegetables asian indian vegetarian dietary cauliflower great aloo gobi recipe from the film bend it like beckham


In [32]:
a = get_recommendations(recommend_recipes, recipe["name"].iloc[0], cosine_sim, indices)

aloo gobi


In [33]:
recipe["name"].iloc[0]

'aloo gobi'

In [34]:
a

150              aloo palak  indian potatoes   spinach
6313                                     turkish salad
3994    mattar paneer   indian peas with paneer cheese
Name: name, dtype: object

In [38]:
recommend_recipes.iloc[150]

index                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   

#### Search for tags in recipes

#### Test

##### Test-1

In [18]:
get_recommendations('stroganoff', cosine_sim, indices)

NameError: name 'indices' is not defined

In [91]:
recommend_recipes[recommend_recipes['name']== 'stroganoff']

Unnamed: 0,level_0,index,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,review,ingredients_string,tags_string,soup
5840,5840,5840,stroganoff,13906,25,23073,2001-11-07,"[30-minutes-or-less, time-to-make, course, mai...","[430.5, 50.0, 15.0, 30.0, 47.0, 71.0, 3.0]",7,"[brown beef in large skillet, add onions and c...",this recipe was used by both my mother and my ...,"[ground beef, onion, garlic powder, salt, pepp...",8,[Really tasty and filling. My hubby is looking...,ground beef onion garlic powder salt pepper co...,30-minutes-or-less time-to-make course main-in...,ground beef onion garlic powder salt pepper co...


##### Test-2

In [87]:
search_name = recommend_recipes[recommend_recipes['index']==7]["name"].iloc[0]

In [88]:
search_name

'the best  banana bread  or muffins'

In [192]:
recommend_recipes[recommend_recipes['name']== search_name]

Unnamed: 0,index,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,review,ingredients_string,tags_string,soup
7,7,the best banana bread or muffins,39363,70,51579,2002-09-03,"[weeknight, time-to-make, course, main-ingredi...","[175.7, 9.0, 65.0, 9.0, 5.0, 18.0, 9.0]",10,"[cream together butter and sugar, blend in egg...","okay, another banana bread recipe...but maybe ...","[butter, sugar, eggs, bananas, water, baking s...",11,[This is an excellent recipe!! Everyone loved...,butter sugar eggs bananas water baking soda sa...,weeknight time-to-make course main-ingredient ...,butter sugar eggs bananas water baking soda sa...


In [89]:
get_recommendations(search_name, cosine_sim, indices)

543                                    banana streusel muffins
2175                           double chocolate banana muffins
6068                                     the best banana bread
1468          chocolate chocolate chip sour cream banana bread
6564                          whole wheat honey banana muffins
1890    crispy cinnamon streusel banana nut cream cheese bread
1754                              cranberry banana oat muffins
530                                      banana crunch muffins
2236                                       easy banana muffins
493                                            baklava muffins
Name: name, dtype: object

##### Test-3

In [194]:
search_name = recommend_recipes[recommend_recipes['index']==34]["name"].iloc[0]

In [195]:
search_name

'get up   go  bars'

In [196]:
search_ingredients = recommend_recipes[recommend_recipes['name']== search_name]['ingredients'].iloc[0]

In [197]:
search_ingredients

['sugar',
 'light corn syrup',
 'peanut butter',
 'grape-nuts cereal',
 'rolled oats',
 'sliced almonds',
 'dried cranberries']

In [198]:
get_recommendations(search_name, cosine_sim, indices)

4690                    peanut butter marshmallow squares
4330                                 no bake granola bars
1228                                   chewy granola bars
2668                                 funky frito fruckies
4747                        perfect peanut butter cookies
2846                 grape nuts oatmeal cranberry cookies
6424    vegan peanut butter chocolate chip oatmeal coo...
4206        mud cookies   aka   chocolate no bake cookies
4329                                no bake fudge cookies
3971             mars bars rice krispies squares  no bake
Name: name, dtype: object

##### Test-4

In [199]:
search_name = recommend_recipes[recommend_recipes['index']==3644]["name"].iloc[0]

In [200]:
search_name

'kiwi quick bread'

In [201]:
search_ingredients = recommend_recipes[recommend_recipes['name']== search_name]['ingredients'].iloc[0]

In [202]:
search_ingredients

['all-purpose flour',
 'baking powder',
 'baking soda',
 'salt',
 'butter',
 'sugar',
 'eggs',
 'kiwi fruit']

In [203]:
get_recommendations(search_name, cosine_sim2, indices)

1890    crispy cinnamon streusel banana nut cream chee...
3867                                 low fat banana bread
2175                      double chocolate banana muffins
543                               banana streusel muffins
2614                  fresh plum or peach cobbler muffins
4786                     pineapple and sour cream muffins
7                      the best  banana bread  or muffins
675                                     best banana bread
4131                            moist banana walnut bread
837                                blueberry pudding loaf
Name: name, dtype: object

##### Test-5

In [212]:
search_name = recommend_recipes[recommend_recipes['index']==6389]["name"].iloc[0]

In [213]:
search_name

'v s grilled jerk pork tenderloin and pineapple mango kiwi salsa'

In [214]:
search_ingredients = recommend_recipes[recommend_recipes['name']== search_name]['ingredients'].iloc[0]

In [215]:
search_ingredients

['pork tenderloin',
 'onion',
 'scallion',
 'fresh thyme leave',
 'garlic cloves',
 'fresh orange juice',
 'fresh lime juice',
 'scotch bonnet peppers',
 'fresh ginger',
 'ground coriander',
 'fresh ground black pepper',
 'ground allspice',
 'cumin',
 'salt',
 'ground nutmeg',
 'ground cinnamon',
 'fresh pineapple',
 'fresh mango',
 'kiwi',
 'purple onion',
 'fresh cilantro',
 'lime juice',
 'rum',
 'jalapenos']

In [11]:
get_recommendations(search_name, cosine_sim, indices)

NameError: name 'get_recommendations' is not defined