In [278]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import ast
from sklearn.metrics.pairwise import cosine_similarity
import re

In [2]:
# Load the data
recipes = pd.read_csv('data/RAW_recipes.csv')
recipes.head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8


In [3]:
# Nutrition information (calories (#), total fat (PDV), sugar (PDV) , sodium (PDV) , protein (PDV) , saturated fat (PDV), total carbohydrates (PDV))

In [4]:
recipes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 231637 entries, 0 to 231636
Data columns (total 12 columns):
 #   Column          Non-Null Count   Dtype 
---  ------          --------------   ----- 
 0   name            231636 non-null  object
 1   id              231637 non-null  int64 
 2   minutes         231637 non-null  int64 
 3   contributor_id  231637 non-null  int64 
 4   submitted       231637 non-null  object
 5   tags            231637 non-null  object
 6   nutrition       231637 non-null  object
 7   n_steps         231637 non-null  int64 
 8   steps           231637 non-null  object
 9   description     226658 non-null  object
 10  ingredients     231637 non-null  object
 11  n_ingredients   231637 non-null  int64 
dtypes: int64(5), object(7)
memory usage: 21.2+ MB


In [5]:
recipes.describe()

Unnamed: 0,id,minutes,contributor_id,n_steps,n_ingredients
count,231637.0,231637.0,231637.0,231637.0,231637.0
mean,222014.708984,9398.546,5534885.0,9.765499,9.051153
std,141206.635626,4461963.0,99791410.0,5.995128,3.734796
min,38.0,0.0,27.0,0.0,1.0
25%,99944.0,20.0,56905.0,6.0,6.0
50%,207249.0,40.0,173614.0,9.0,9.0
75%,333816.0,65.0,398275.0,12.0,11.0
max,537716.0,2147484000.0,2002290000.0,145.0,43.0


In [6]:
recipes.isna().sum()

name                 1
id                   0
minutes              0
contributor_id       0
submitted            0
tags                 0
nutrition            0
n_steps              0
steps                0
description       4979
ingredients          0
n_ingredients        0
dtype: int64

In [7]:
# Going to drop nulls as I want description to be able to display to the user
recipes = recipes.dropna()
recipes.isna().sum()

name              0
id                0
minutes           0
contributor_id    0
submitted         0
tags              0
nutrition         0
n_steps           0
steps             0
description       0
ingredients       0
n_ingredients     0
dtype: int64

In [8]:
recipes.info()

<class 'pandas.core.frame.DataFrame'>
Index: 226657 entries, 0 to 231636
Data columns (total 12 columns):
 #   Column          Non-Null Count   Dtype 
---  ------          --------------   ----- 
 0   name            226657 non-null  object
 1   id              226657 non-null  int64 
 2   minutes         226657 non-null  int64 
 3   contributor_id  226657 non-null  int64 
 4   submitted       226657 non-null  object
 5   tags            226657 non-null  object
 6   nutrition       226657 non-null  object
 7   n_steps         226657 non-null  int64 
 8   steps           226657 non-null  object
 9   description     226657 non-null  object
 10  ingredients     226657 non-null  object
 11  n_ingredients   226657 non-null  int64 
dtypes: int64(5), object(7)
memory usage: 22.5+ MB


In [9]:
# Need to look at all the tags, to be used as content features
recipes['tags'] = recipes['tags'].apply(ast.literal_eval)
tags = recipes['tags'].explode()
tags

0         60-minutes-or-less
0               time-to-make
0                     course
0            main-ingredient
0                    cuisine
                 ...        
231636               dietary
231636          comfort-food
231636            taste-mood
231636                 sweet
231636    number-of-servings
Name: tags, Length: 4045919, dtype: object

In [10]:
len(tags)

4045919

In [491]:
tag_counts = tags.value_counts()
tags_filtered = tag_counts[tag_counts >  10000]
tags_filtered

tags
preparation        225568
time-to-make       220353
course             213602
main-ingredient    166456
dietary            160444
                    ...  
beverages           10913
sweet               10503
savory              10428
potluck             10313
potatoes            10052
Name: count, Length: 80, dtype: int64

In [12]:
len(tags.unique())

552

In [13]:
# Nutrition information (calories (#), total fat (PDV), sugar (PDV) , sodium (PDV) , protein (PDV) , saturated fat (PDV), total carbohydrates (PDV))
recipes['nutrition'] = recipes['nutrition'].apply(ast.literal_eval)
recipes[['calories', 'fat', 'sugar', 'sodium', 'protein', 'saturated_fat', 'carbohydrates']] = recipes['nutrition'].apply(pd.Series)
recipes = recipes.drop(columns=['nutrition'])
recipes.head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,n_steps,steps,description,ingredients,n_ingredients,calories,fat,sugar,sodium,protein,saturated_fat,carbohydrates
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"[60-minutes-or-less, time-to-make, course, mai...",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7,51.5,0.0,13.0,0.0,2.0,0.0,4.0
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"[30-minutes-or-less, time-to-make, course, mai...",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6,173.4,18.0,0.0,17.0,22.0,35.0,1.0
2,all in the kitchen chili,112140,130,196586,2005-02-25,"[time-to-make, course, preparation, main-dish,...",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13,269.8,22.0,32.0,48.0,39.0,27.0,5.0
3,alouette potatoes,59389,45,68585,2003-04-14,"[60-minutes-or-less, time-to-make, course, mai...",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11,368.1,17.0,10.0,2.0,14.0,8.0,20.0
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"[weeknight, time-to-make, course, main-ingredi...",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8,352.9,1.0,337.0,23.0,3.0,0.0,28.0


In [14]:
recipes['steps'] = recipes['steps'].apply(ast.literal_eval)
recipes['steps'][0]

['make a choice and proceed with recipe',
 'depending on size of squash , cut into half or fourths',
 'remove seeds',
 'for spicy squash , drizzle olive oil or melted butter over each cut squash piece',
 'season with mexican seasoning mix ii',
 'for sweet squash , drizzle melted honey , butter , grated piloncillo over each cut squash piece',
 'season with sweet mexican spice mix',
 'bake at 350 degrees , again depending on size , for 40 minutes up to an hour , until a fork can easily pierce the skin',
 'be careful not to burn the squash especially if you opt to use sugar or butter',
 'if you feel more comfortable , cover the squash with aluminum foil the first half hour , give or take , of baking',
 'if desired , season with salt']

In [15]:
recipes['ingredients'] = recipes['ingredients'].apply(ast.literal_eval)
recipes['ingredients'][0]

['winter squash',
 'mexican seasoning',
 'mixed spice',
 'honey',
 'butter',
 'olive oil',
 'salt']

In [16]:
ingredients = recipes['ingredients'].explode()
ing_counts = ingredients.value_counts()
ing_counts

ingredients
salt                         83781
butter                       53788
sugar                        43419
onion                        38168
water                        34060
                             ...  
low-sodium wheat crackers        1
capicola-mozzarella roll         1
citrus ponzu soy sauce           1
asiago cheese rolls              1
nepitella                        1
Name: count, Length: 14758, dtype: int64

In [498]:
ing_filetered = ing_counts[ing_counts > 2000]
ing_filetered

ingredients
salt                83781
butter              53788
sugar               43419
onion               38168
water               34060
                    ...  
beef broth           2045
shallot              2029
lemon, juice of      2019
ground coriander     2012
fresh mushrooms      2009
Name: count, Length: 173, dtype: int64

In [499]:
for ing in list(ing_filetered.index):
    if 'chicken' in ing:
        print(ing)

chicken broth
chicken stock
boneless skinless chicken breasts
chicken breasts
chicken


In [19]:
def check_values(row, values):
    return pd.Series({value: value in row for value in values})

In [500]:
new_cols_ing = recipes['ingredients'].apply(check_values, values=list(ing_filetered.index))
new_cols_ing

Unnamed: 0,salt,butter,sugar,onion,water,eggs,olive oil,flour,garlic cloves,milk,...,cooking spray,skim milk,cream,chicken,warm water,beef broth,shallot,"lemon, juice of",ground coriander,fresh mushrooms
0,True,True,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,True,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
2,True,False,False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,True,False,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,True,False,True,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
231632,False,False,True,True,False,False,True,False,True,False,...,False,False,False,False,False,False,False,False,False,False
231633,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
231634,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
231635,False,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [501]:
new_cols_tag = recipes['tags'].apply(check_values, values=list(tags_filtered.index))
new_cols_tag

Unnamed: 0,preparation,time-to-make,course,main-ingredient,dietary,easy,occasion,cuisine,low-in-something,main-dish,...,free-of-something,condiments-etc,high-in-something,soups-stews,technique,beverages,sweet,savory,potluck,potatoes
0,True,True,True,True,True,True,True,True,False,False,...,False,False,False,False,False,False,False,False,False,False
1,True,True,True,True,True,True,True,True,False,True,...,False,False,False,False,False,False,False,False,False,False
2,True,True,True,False,True,False,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
3,True,True,True,True,True,True,True,False,False,False,...,False,False,False,False,False,False,False,False,False,True
4,True,True,True,True,True,False,True,True,False,False,...,False,True,False,False,True,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
231632,True,True,True,True,False,True,False,True,False,False,...,False,False,False,True,False,False,False,False,False,False
231633,True,True,True,False,True,True,False,False,False,False,...,False,True,False,False,False,False,False,False,False,False
231634,True,True,True,True,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
231635,True,True,True,False,True,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False


In [502]:
tags_ings = pd.concat([new_cols_ing, new_cols_tag], axis=1)
tags_ings.head()

Unnamed: 0,salt,butter,sugar,onion,water,eggs,olive oil,flour,garlic cloves,milk,...,free-of-something,condiments-etc,high-in-something,soups-stews,technique,beverages,sweet,savory,potluck,potatoes
0,True,True,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,True,False,False,False,True,...,False,False,False,False,False,False,False,False,False,False
2,True,False,False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,True,False,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,True
4,True,False,True,False,False,False,False,False,False,False,...,False,True,False,False,True,False,False,False,False,False


In [23]:
recipes.head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,n_steps,steps,description,ingredients,n_ingredients,calories,fat,sugar,sodium,protein,saturated_fat,carbohydrates
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"[60-minutes-or-less, time-to-make, course, mai...",11,"[make a choice and proceed with recipe, depend...",autumn is my favorite time of year to cook! th...,"[winter squash, mexican seasoning, mixed spice...",7,51.5,0.0,13.0,0.0,2.0,0.0,4.0
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"[30-minutes-or-less, time-to-make, course, mai...",9,"[preheat oven to 425 degrees f, press dough in...",this recipe calls for the crust to be prebaked...,"[prepared pizza crust, sausage patty, eggs, mi...",6,173.4,18.0,0.0,17.0,22.0,35.0,1.0
2,all in the kitchen chili,112140,130,196586,2005-02-25,"[time-to-make, course, preparation, main-dish,...",6,"[brown ground beef in large pot, add chopped o...",this modified version of 'mom's' chili was a h...,"[ground beef, yellow onions, diced tomatoes, t...",13,269.8,22.0,32.0,48.0,39.0,27.0,5.0
3,alouette potatoes,59389,45,68585,2003-04-14,"[60-minutes-or-less, time-to-make, course, mai...",11,[place potatoes in a large pot of lightly salt...,"this is a super easy, great tasting, make ahea...","[spreadable cheese with garlic and herbs, new ...",11,368.1,17.0,10.0,2.0,14.0,8.0,20.0
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"[weeknight, time-to-make, course, main-ingredi...",5,"[mix all ingredients& boil for 2 1 / 2 hours ,...",my dh's amish mother raised him on this recipe...,"[tomato juice, apple cider vinegar, sugar, sal...",8,352.9,1.0,337.0,23.0,3.0,0.0,28.0


In [503]:
recipes_feat = recipes.drop(columns=['contributor_id', 'submitted', 'tags', 'steps', 'description',
                                     'ingredients', 'name', 'n_steps', 'n_ingredients', 'minutes'])
recipes_feat = pd.concat([recipes_feat, tags_ings], axis=1)
recipes_feat.head()

Unnamed: 0,id,calories,fat,sugar,sodium,protein,saturated_fat,carbohydrates,salt,butter,...,free-of-something,condiments-etc,high-in-something,soups-stews,technique,beverages,sweet,savory,potluck,potatoes
0,137739,51.5,0.0,13.0,0.0,2.0,0.0,4.0,True,True,...,False,False,False,False,False,False,False,False,False,False
1,31490,173.4,18.0,0.0,17.0,22.0,35.0,1.0,False,False,...,False,False,False,False,False,False,False,False,False,False
2,112140,269.8,22.0,32.0,48.0,39.0,27.0,5.0,True,False,...,False,False,False,False,False,False,False,False,False,False
3,59389,368.1,17.0,10.0,2.0,14.0,8.0,20.0,True,False,...,False,False,False,False,False,False,False,False,False,True
4,44061,352.9,1.0,337.0,23.0,3.0,0.0,28.0,True,False,...,False,True,False,False,True,False,False,False,False,False


In [25]:
recipes_feat['name']

0           arriba   baked winter squash mexican style
1                     a bit different  breakfast pizza
2                            all in the kitchen  chili
3                                   alouette  potatoes
4                   amish  tomato ketchup  for canning
                              ...                     
231632                                     zydeco soup
231633                                zydeco spice mix
231634                       zydeco ya ya deviled eggs
231635          cookies by design   cookies on a stick
231636    cookies by design   sugar shortbread cookies
Name: name, Length: 226657, dtype: object

In [435]:
duplicate_columns = recipes_feat.columns[recipes_feat.columns.duplicated()].tolist()
duplicate_columns

['sugar',
 'chicken',
 'cheese',
 'pasta',
 'potatoes',
 'eggs',
 'nuts',
 'rice',
 'tomatoes',
 'onions',
 'apples',
 'shrimp',
 'mushrooms',
 'yeast',
 'carrots',
 'corn',
 'strawberries',
 'broccoli',
 'bananas',
 'spinach',
 'bacon',
 'lemon',
 'coconut',
 'ham',
 'spaghetti',
 'asparagus',
 'blueberries',
 'cauliflower']

In [436]:
len(duplicate_columns)

28

In [437]:
column_counts = recipes_feat.columns.value_counts()
print("\nColumn name counts:")
print(column_counts)


Column name counts:
onions              2
corn                2
spinach             2
apples              2
spaghetti           2
                   ..
swiss cheese        1
almond extract      1
fresh mushrooms     1
ground coriander    1
wedding             1
Name: count, Length: 549, dtype: int64


In [504]:
recipes_feat_clean = recipes_feat.loc[:, ~recipes_feat.columns.duplicated()]

In [505]:
recipes_feat_clean.head()

Unnamed: 0,id,calories,fat,sugar,sodium,protein,saturated_fat,carbohydrates,salt,butter,...,served-hot,free-of-something,condiments-etc,high-in-something,soups-stews,technique,beverages,sweet,savory,potluck
0,137739,51.5,0.0,13.0,0.0,2.0,0.0,4.0,True,True,...,False,False,False,False,False,False,False,False,False,False
1,31490,173.4,18.0,0.0,17.0,22.0,35.0,1.0,False,False,...,False,False,False,False,False,False,False,False,False,False
2,112140,269.8,22.0,32.0,48.0,39.0,27.0,5.0,True,False,...,False,False,False,False,False,False,False,False,False,False
3,59389,368.1,17.0,10.0,2.0,14.0,8.0,20.0,True,False,...,True,False,False,False,False,False,False,False,False,False
4,44061,352.9,1.0,337.0,23.0,3.0,0.0,28.0,True,False,...,False,False,True,False,False,True,False,False,False,False


In [440]:
recipes_feat_clean.duplicated().sum()

np.int64(0)

In [441]:
column_counts = recipes_feat_clean.columns.value_counts()
print("\nColumn name counts:")
print(column_counts)


Column name counts:
id              1
for-1-or-2      1
healthy-2       1
comfort-food    1
european        1
               ..
warm water      1
chicken         1
cream           1
skim milk       1
wedding         1
Name: count, Length: 549, dtype: int64


In [442]:
# Save feature matrix as csv
recipes_feat_clean.to_parquet('data/recipes_feat.parquet')

In [452]:
recipes_new = recipes.drop(columns=['name', 'steps', 'description', 'contributor_id', 'submitted', 'n_ingredients', 
                                    'n_steps', 'minutes', 'calories', 'fat', 'sugar', 'sodium', 'protein',
                                    'saturated_fat', 'carbohydrates'])
recipes_new.head()

Unnamed: 0,id,tags,ingredients
0,137739,"[60-minutes-or-less, time-to-make, course, mai...","[winter squash, mexican seasoning, mixed spice..."
1,31490,"[30-minutes-or-less, time-to-make, course, mai...","[prepared pizza crust, sausage patty, eggs, mi..."
2,112140,"[time-to-make, course, preparation, main-dish,...","[ground beef, yellow onions, diced tomatoes, t..."
3,59389,"[60-minutes-or-less, time-to-make, course, mai...","[spreadable cheese with garlic and herbs, new ..."
4,44061,"[weeknight, time-to-make, course, main-ingredi...","[tomato juice, apple cider vinegar, sugar, sal..."


In [35]:
recipes_new.to_parquet('data/recipes.parquet')

In [418]:
recipes_steps = recipes.drop(columns=['contributor_id', 'submitted', 'n_ingredients', 'n_steps', 'minutes', 'calories', 'fat',
                                      'sugar', 'sodium', 'protein', 'saturated_fat', 'carbohydrates', 'tags'])
recipes_steps.head()

Unnamed: 0,name,id,steps,description,ingredients
0,arriba baked winter squash mexican style,137739,"[make a choice and proceed with recipe, depend...",autumn is my favorite time of year to cook! th...,"[winter squash, mexican seasoning, mixed spice..."
1,a bit different breakfast pizza,31490,"[preheat oven to 425 degrees f, press dough in...",this recipe calls for the crust to be prebaked...,"[prepared pizza crust, sausage patty, eggs, mi..."
2,all in the kitchen chili,112140,"[brown ground beef in large pot, add chopped o...",this modified version of 'mom's' chili was a h...,"[ground beef, yellow onions, diced tomatoes, t..."
3,alouette potatoes,59389,[place potatoes in a large pot of lightly salt...,"this is a super easy, great tasting, make ahea...","[spreadable cheese with garlic and herbs, new ..."
4,amish tomato ketchup for canning,44061,"[mix all ingredients& boil for 2 1 / 2 hours ,...",my dh's amish mother raised him on this recipe...,"[tomato juice, apple cider vinegar, sugar, sal..."


In [419]:
recipes_steps.to_parquet('data/recipes_steps.parquet')

In [371]:
ing_selected = ['chicken', 'onions', 'carrots']

In [372]:
def search_ing(row_ingredients, ing_selected):
    row_set = set(row_ingredients)
    search_set = set()
    for ing in ing_selected:
        for row_ing in row_ingredients:
            if row_ing in ing or ing in row_ing:
                search_set.add(row_ing)
    found = row_set.intersection(search_set)
    return list(found)
    

In [373]:
recipes_new['all_ings'] = recipes_new['ingredients'].apply(search_ing, ing_selected=ing_selected)
recipes_new.head()

Unnamed: 0,id,tags,ingredients,all_ings,match
0,137739,"[60-minutes-or-less, time-to-make, course, mai...","[winter squash, mexican seasoning, mixed spice...",[],False
1,31490,"[30-minutes-or-less, time-to-make, course, mai...","[prepared pizza crust, sausage patty, eggs, mi...",[],False
2,112140,"[time-to-make, course, preparation, main-dish,...","[ground beef, yellow onions, diced tomatoes, t...",[yellow onions],False
3,59389,"[60-minutes-or-less, time-to-make, course, mai...","[spreadable cheese with garlic and herbs, new ...",[],False
4,44061,"[weeknight, time-to-make, course, main-ingredi...","[tomato juice, apple cider vinegar, sugar, sal...",[],False


In [374]:
recipes_new[recipes_new['all_ings'].apply(len) >= len(ing_selected)]

Unnamed: 0,id,tags,ingredients,all_ings,match
29,44123,"[weeknight, time-to-make, course, main-ingredi...","[unsalted butter, carrot, onion, celery, brocc...","[smoked chicken, onion, carrot, chicken stock]",True
85,103948,"[time-to-make, course, main-ingredient, prepar...","[boneless skinless chicken thighs, all-purpose...","[boneless skinless chicken thighs, onion, chic...",True
134,69190,"[time-to-make, course, main-ingredient, cuisin...","[cream of chicken soup, cream of celery soup, ...","[cream of chicken soup, onions, carrots]",True
159,445026,"[weeknight, time-to-make, course, main-ingredi...","[vegetable oil, all-purpose flour, onion, gree...","[onion, low sodium chicken broth, cooked chicken]",True
165,269984,"[time-to-make, course, preparation, occasion, ...","[chicken breasts, baby carrots, celery, white ...","[chicken breasts, baby carrots, chicken stock]",True
...,...,...,...,...,...
231496,250050,"[60-minutes-or-less, time-to-make, course, mai...","[ziti pasta, chicken sausage, olive oil, zucch...","[onions, fat-free low-sodium chicken broth, ch...",True
231558,415406,"[weeknight, 60-minutes-or-less, time-to-make, ...","[potatoes, onions, chicken broth, chicken stoc...","[onions, chicken stock cube, chicken broth]",True
231579,464576,"[time-to-make, course, main-ingredient, cuisin...","[sweet italian sausage, onion, garlic cloves, ...","[onion, chicken broth, carrots]",True
231590,326419,"[bacon, 15-minutes-or-less, time-to-make, cour...","[bacon, onions, garlic cloves, celery, carrot,...","[onions, low-sodium low-fat chicken broth, car...",True


In [375]:
def check_ingredients_df(row):
    ingredients = ing_selected
    all_ings = row
    
    # If all_ings is empty, return False
    if not all_ings:
        return False
    
    # Join all ingredients into a single lowercase string
    ingredients_str = ' '.join(str(ing).lower() for ing in all_ings)
    
    # Check each item in all_ings
    for item in ingredients:
        item = item.strip('s')
        if item not in ingredients_str:
            return False
    
    # If we've made it through all items without returning False, return True
    return True


In [376]:
recipes_new['match'] = recipes_new['all_ings'].apply(check_ingredients_df)

In [377]:
recipes_new

Unnamed: 0,id,tags,ingredients,all_ings,match
0,137739,"[60-minutes-or-less, time-to-make, course, mai...","[winter squash, mexican seasoning, mixed spice...",[],False
1,31490,"[30-minutes-or-less, time-to-make, course, mai...","[prepared pizza crust, sausage patty, eggs, mi...",[],False
2,112140,"[time-to-make, course, preparation, main-dish,...","[ground beef, yellow onions, diced tomatoes, t...",[yellow onions],False
3,59389,"[60-minutes-or-less, time-to-make, course, mai...","[spreadable cheese with garlic and herbs, new ...",[],False
4,44061,"[weeknight, time-to-make, course, main-ingredi...","[tomato juice, apple cider vinegar, sugar, sal...",[],False
...,...,...,...,...,...
231632,486161,"[ham, 60-minutes-or-less, time-to-make, course...","[celery, onion, green sweet pepper, garlic clo...","[onion, low sodium chicken broth]",False
231633,493372,"[15-minutes-or-less, time-to-make, course, pre...","[paprika, salt, garlic powder, onion powder, d...",[],False
231634,308080,"[60-minutes-or-less, time-to-make, course, mai...","[hard-cooked eggs, mayonnaise, dijon mustard, ...",[],False
231635,298512,"[30-minutes-or-less, time-to-make, course, pre...","[butter, eagle brand condensed milk, light bro...",[],False


In [392]:
recipes_test = recipes_new[recipes_new['match'] == True]

In [397]:
recipes_test

Unnamed: 0,id,tags,ingredients,all_ings,match,tag_match
1361,111257,"[30-minutes-or-less, time-to-make, course, mai...","[chicken breast tenders, olive oil, butter, ru...","[onion, chicken breast tenders, chicken broth,...",True,True
1363,28670,"[lactose, 30-minutes-or-less, time-to-make, co...","[extra virgin olive oil, carrots, parsnip, oni...","[onion, chicken breast tenders, chicken stock,...",True,True
4762,85648,"[30-minutes-or-less, time-to-make, course, mai...","[butter, carrots, green onions, chicken broth,...","[chicken broth, green onions, cooked chicken, ...",True,True
9170,242393,"[30-minutes-or-less, time-to-make, course, mai...","[rice noodles, chicken, carrot, green bell pep...","[chicken, green onions, carrot]",True,True
9417,75654,"[30-minutes-or-less, time-to-make, course, mai...","[ramen noodles, sunflower seeds, slivered almo...","[green onions, carrot, boneless skinless chick...",True,True
...,...,...,...,...,...,...
223733,373053,"[30-minutes-or-less, time-to-make, course, mai...","[chicken breasts, pineapple juice, coconut mil...","[onion, chicken breasts, baby carrots, chicken...",True,True
227747,86461,"[30-minutes-or-less, time-to-make, course, mai...","[bok choy, red cabbage, carrot, cilantro, appl...","[green onions, carrot, cooked chicken]",True,True
228381,185775,"[30-minutes-or-less, time-to-make, course, mai...","[chicken breasts, thin spaghetti, fat free chi...","[fat free chicken broth, green onions, carrot,...",True,True
229569,353950,"[30-minutes-or-less, time-to-make, course, mai...","[rice, chicken thighs, onion, frozen broccoli ...","[onion, chicken thighs, frozen broccoli carrot...",True,True


In [393]:
def all_tags_present(item_tags, selected):
    return all(string in item_tags for string in selected)

In [394]:
recipes_test['tag_match'] = recipes_test['tags'].apply(all_tags_present, selected=['poultry', '30-minutes-or-less'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recipes_test['tag_match'] = recipes_test['tags'].apply(all_tags_present, selected=['poultry', '30-minutes-or-less'])


In [395]:
recipes_test = recipes_test[recipes_test['tag_match'] == True]

In [401]:
recipes_id = recipes_test['id'].values
recipes_id

array([111257,  28670,  85648, 242393,  75654, 512041, 154765, 350695,
       126137, 495366,  38838,  25344, 122536, 272825, 284101, 502492,
       205325, 130650, 192865, 298171, 395452, 197276, 230786, 263211,
        82503,  13334, 136175,  27800, 223273, 459462, 380859, 364039,
       275001, 195625, 477720, 320081, 138557, 146269, 151810,  99429,
       234223, 168166, 185427, 203518, 486282, 389536, 207439, 367935,
       403121, 148678, 143397, 439096, 167731,  23282, 350097, 272306,
       222005, 486780, 304772, 417193, 284265, 185367, 272427,  29979,
       243330, 213006, 125190, 298063, 194892, 372815,  58905,  89289,
       135142, 144104, 384863,  38221,  19230, 447472, 412883, 297691,
       243324,  62263, 280562, 406539,  17156, 237544,  30433,  36944,
       282484, 535072, 101096,  36172, 133711, 223812, 190712, 212198,
       422001, 488678, 381540, 228028, 487487, 154398, 210264, 273121,
       120558, 297336, 384435, 233158, 128399,  64584, 151845, 132682,
      

In [402]:
len(recipes_id)

134

In [403]:
recipes_steps

Unnamed: 0,name,id,steps,description
0,arriba baked winter squash mexican style,137739,"[make a choice and proceed with recipe, depend...",autumn is my favorite time of year to cook! th...
1,a bit different breakfast pizza,31490,"[preheat oven to 425 degrees f, press dough in...",this recipe calls for the crust to be prebaked...
2,all in the kitchen chili,112140,"[brown ground beef in large pot, add chopped o...",this modified version of 'mom's' chili was a h...
3,alouette potatoes,59389,[place potatoes in a large pot of lightly salt...,"this is a super easy, great tasting, make ahea..."
4,amish tomato ketchup for canning,44061,"[mix all ingredients& boil for 2 1 / 2 hours ,...",my dh's amish mother raised him on this recipe...
...,...,...,...,...
231632,zydeco soup,486161,"[heat oil in a 4-quart dutch oven, add celery ...",this is a delicious soup that i originally fou...
231633,zydeco spice mix,493372,[mix all ingredients together thoroughly],this spice mix will make your taste buds dance!
231634,zydeco ya ya deviled eggs,308080,"[in a bowl , combine the mashed yolks and mayo...","deviled eggs, cajun-style"
231635,cookies by design cookies on a stick,298512,[place melted butter in a large mixing bowl an...,"i've heard of the 'cookies by design' company,..."


In [406]:
recipes_steps_rec = recipes_steps[recipes_steps['id'].isin(recipes_id)]
recipes_steps_rec

Unnamed: 0,name,id,steps,description
1361,30 minute chicken and dumplings,111257,[dice tenders into bite size pieces and set as...,this is a rachel ray recipe i had in my file f...
1363,30 minute chicken noodle soup from foodtv ra...,28670,[place a large pot over moderate heat and add ...,from show of 5/16/02.
4762,amazingly simple chicken noodle soup,85648,"[cook carrots in melted butter 4 min, stir in ...","so simple, so tasty, so soothing!!! a classic ..."
9170,asian chicken noodle salad,242393,[prepare noodles according to package directio...,this is the first time i ever used rice noodle...
9417,asian ramen salad with chicken,75654,"[place crushed ramen noodles , sunflower seeds...",a nice crunchy salad with a great tangy dressi...
...,...,...,...,...
223733,viv s pina colada chicken with salsa,373053,[mark chicken breast on hot grill & cook until...,another recipe from our stay in puerto rico. ...
227747,winter chicken salad with chipotle cream dressing,86461,"[in a bowl , combine all ingredients but parsl...",an adopted recipe originally posted by mean ch...
228381,ww 2 point chicken noodle soup,185775,"[cut chicken into bite size pieces, saut in a ...",this recipe i got at a weight watchers meeting...
229569,yoshinoya style teriyaki chicken and vegetable...,353950,"[cook rice according to directions, slice onio...",i had food at a yoshinoya restaurant in califo...


In [409]:
rec = recipes_steps_rec[recipes_steps_rec['id'] == 85648]

In [415]:
rec['steps'].values[0]

['cook carrots in melted butter 4 min',
 'stir in onion , broth and bay leaf',
 'season with salt and pepper to taste',
 'bring to a boil , then simmer 5 min until carrots are almost tender',
 'return to a boil',
 'add noodles , chicken , peas and simmer until noodles are tender',
 'enjoy']

In [414]:
rec['name'].values[0]

'amazingly simple chicken noodle soup'

In [424]:
name = rec['name'].values[0].replace(' ', '-')
name

'amazingly-simple-chicken-noodle-soup'

In [425]:
link = f"https://www.food.com/recipe/{name}-{rec['id'].values[0]}"
link

'https://www.food.com/recipe/amazingly-simple-chicken-noodle-soup-85648'

In [511]:
rec_feat = recipes_feat_clean[recipes_feat_clean['id'] == 144230]
rec_feat = rec_feat.drop(columns=['id']).values.reshape(1, -1)

cosine_sim = cosine_similarity(rec_feat, recipes_feat_clean.drop(columns=['id']))
sim_scores = list(zip(recipes_feat_clean['id'].values, cosine_sim[0]))
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
sim_scores = sim_scores[0:6]
rec_indices = [i[0] for i in sim_scores]


In [512]:
sim_scores

[(np.int64(144230), np.float64(0.9999999999999997)),
 (np.int64(188904), np.float64(0.9999193030770934)),
 (np.int64(335165), np.float64(0.9999124010158112)),
 (np.int64(380464), np.float64(0.9998212309433744)),
 (np.int64(35264), np.float64(0.9997718227179002)),
 (np.int64(78321), np.float64(0.9997654888856027))]

In [508]:
recipes_new[recipes_new['id'].isin(rec_indices)]

Unnamed: 0,id,tags,ingredients
24627,188904,"[30-minutes-or-less, time-to-make, course, pre...","[black pepper, salt, butter, golden brown suga..."
29585,335165,"[bacon, 15-minutes-or-less, time-to-make, cour...","[broccoli florets, red onion, sharp cheddar ch..."
73280,78321,"[30-minutes-or-less, time-to-make, course, mai...","[vegetable oil, onion, white wine vinegar, hon..."
74889,35264,"[15-minutes-or-less, time-to-make, course, pre...","[coleslaw mix, red pepper, green onions, fresh..."
99149,380464,"[weeknight, 30-minutes-or-less, time-to-make, ...","[eggplants, chinese five spice powder, onion p..."


In [474]:
rec_indices

[np.int64(24841),
 np.int64(314126),
 np.int64(417682),
 np.int64(456939),
 np.int64(27121)]

In [466]:
recipes_new.shape

(226657, 3)

In [449]:
sim_scores

[(160502, np.float64(0.9988458013016529)),
 (61857, np.float64(0.9983187001400087)),
 (95262, np.float64(0.99830700479747)),
 (120834, np.float64(0.9982398338863303)),
 (11523, np.float64(0.9982158183362595))]

In [454]:
recipes_new.shape

(226657, 3)

In [509]:
recipes_steps[recipes_steps['id'].isin(rec_indices)]

Unnamed: 0,name,id,steps,description,ingredients
24627,black pepper almonds,188904,"[preheat oven to 350 degrees f, line large bak...",these are good with cocktails. recipe from bon...,"[black pepper, salt, butter, golden brown suga..."
29585,broccoli with cranberries salad,335165,"[for the salad ~ in a large salad bowl , mix a...",another recipe that comes from the 2004 cookbo...,"[broccoli florets, red onion, sharp cheddar ch..."
73280,dove s nest turkey red grape bleu cheese sa...,78321,"[to make dressing , combine the oil , onion , ...",this is another favorite from my favorite waxa...,"[vegetable oil, onion, white wine vinegar, hon..."
74889,easy asian coleslaw,35264,"[mix salad ingredients in large bowl, mix dres...",great change from the mayo type and very easy ...,"[coleslaw mix, red pepper, green onions, fresh..."
99149,grilled japanese eggplant with an asian vinaig...,380464,[wash eggplants and pat dry with paper towels ...,"high in fiber and low in calories, grilled egg...","[eggplants, chinese five spice powder, onion p..."


In [510]:
recipes_feat_clean[recipes_feat_clean['id'].isin(rec_indices)]

Unnamed: 0,id,calories,fat,sugar,sodium,protein,saturated_fat,carbohydrates,salt,butter,...,served-hot,free-of-something,condiments-etc,high-in-something,soups-stews,technique,beverages,sweet,savory,potluck
24627,188904,1136.4,135.0,180.0,79.0,61.0,81.0,24.0,True,True,...,False,False,False,True,False,False,False,False,False,False
29585,335165,546.2,57.0,91.0,38.0,33.0,44.0,13.0,True,False,...,False,False,False,False,False,False,False,False,False,False
73280,78321,603.4,74.0,103.0,35.0,34.0,46.0,10.0,False,False,...,False,False,False,False,False,False,False,False,False,False
74889,35264,236.8,27.0,37.0,15.0,12.0,15.0,5.0,False,False,...,False,False,False,False,False,False,False,False,False,False
99149,380464,450.9,49.0,74.0,31.0,19.0,33.0,12.0,False,False,...,False,False,False,False,False,False,False,False,True,False
