# Importing The Dependencies

In [1]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import string 
import re
import nltk
from nltk.tokenize import word_tokenize 
from nltk.probability import FreqDist
from nltk.corpus import stopwords
import pickle
import swifter

# Load Dataset

In [2]:
df = pd.read_csv("recipes_82k.csv")
df.head()

Unnamed: 0,category,cooking_method,cuisine,image,ingredients,prep_time,recipe_name,serves,tags
0,,['Set the racks in the middle and upper thirds...,['American'],https://www.skinnytaste.com/wp-content/uploads...,"['1 tablespoons extra virgin olive oil', '1 cu...",20 minutes,Mediterranean Sea Bass,4 servings,"Dairy Free, Gluten Free, Keto Recipes, Kid Fri..."
1,,['Place the eggs in the air fryer basket and c...,['American'],https://www.skinnytaste.com/wp-content/uploads...,"['4 large eggs', 'Salt (black pepper, everythi...",15 minutes,Air Fryer Hard Boiled Eggs,4 eggs,"Air Fryer Recipes, Dairy Free, Gluten Free, Ke..."
2,,"['Air Fryer directions:', 'Preheat air fryer t...",['American'],https://www.skinnytaste.com/wp-content/uploads...,"['olive oil spray', '4 about 5 ounce each salm...",5 minutes,Air Fryer Basil-Parmesan Salmon,4 servings,"Air Fryer Recipes, Gluten Free, Keto Recipes, ..."
3,,"['Preheat the oven to 400F.', 'Pour 2 tablespo...",['American'],https://www.skinnytaste.com/wp-content/uploads...,['1/2 cup freshly grated Parmesan (not pre-gra...,15 minutes,Everything Parmesan Crisps,4 servings,"Gluten Free, Keto Recipes, Kid Friendly, Low C..."
4,,['Cook potatoes in a large pot of salted water...,['American'],https://www.skinnytaste.com/wp-content/uploads...,['3 1/2 pounds new potatoes (about 10 peeled a...,10 minutes,Potato and Green Bean Salad,12 servings,"Dairy Free, Gluten Free, Kid Friendly, Vegetar..."


In [3]:
# number of rows and columns in the data frame
df.shape

(82245, 9)

# Pre-Processing

## Dimension reduction dataset

In [4]:
df['cuisine'].unique()

array(["['American']", nan, "['Italian']", "['Mexican']",
       "['Chinese', 'Japanese']", "['Indian']", "['American', 'Mexican']",
       "['American', 'Italian']", "['Asian', 'Hawaiian']",
       "['Mediterranean']", "['American', 'Latin']",
       "['American', 'Czech', 'Hungarian']",
       "['American', 'Cuban', 'Latin']", "['Latin']",
       "['Latin', 'Mexican']", "['Asian']", "['American', 'Argentinian']",
       "['American', 'Greek']", "['American', 'Tex Mex']", "['Tex Mex']",
       "['Cuban', 'Latin']", "['Asian', 'Chinese']", "['Brazilian']",
       '[]'], dtype=object)

In [5]:
df2 = df.loc[(df["cuisine"].isin(["['American']", "['Italian']", "['Mexican']",
       "['Chinese', 'Japanese']", "['Indian']", "['American', 'Mexican']",
       "['American', 'Italian']", "['Asian', 'Hawaiian']",
       "['Mediterranean']", "['American', 'Latin']",
       "['American', 'Czech', 'Hungarian']",
       "['American', 'Cuban', 'Latin']", "['Latin']",
       "['Latin', 'Mexican']", "['Asian']", "['American', 'Argentinian']",
       "['American', 'Greek']", "['American', 'Tex Mex']", "['Tex Mex']",
       "['Cuban', 'Latin']", "['Asian', 'Chinese']", "['Brazilian']"]))]
df2

Unnamed: 0,category,cooking_method,cuisine,image,ingredients,prep_time,recipe_name,serves,tags
0,,['Set the racks in the middle and upper thirds...,['American'],https://www.skinnytaste.com/wp-content/uploads...,"['1 tablespoons extra virgin olive oil', '1 cu...",20 minutes,Mediterranean Sea Bass,4 servings,"Dairy Free, Gluten Free, Keto Recipes, Kid Fri..."
1,,['Place the eggs in the air fryer basket and c...,['American'],https://www.skinnytaste.com/wp-content/uploads...,"['4 large eggs', 'Salt (black pepper, everythi...",15 minutes,Air Fryer Hard Boiled Eggs,4 eggs,"Air Fryer Recipes, Dairy Free, Gluten Free, Ke..."
2,,"['Air Fryer directions:', 'Preheat air fryer t...",['American'],https://www.skinnytaste.com/wp-content/uploads...,"['olive oil spray', '4 about 5 ounce each salm...",5 minutes,Air Fryer Basil-Parmesan Salmon,4 servings,"Air Fryer Recipes, Gluten Free, Keto Recipes, ..."
3,,"['Preheat the oven to 400F.', 'Pour 2 tablespo...",['American'],https://www.skinnytaste.com/wp-content/uploads...,['1/2 cup freshly grated Parmesan (not pre-gra...,15 minutes,Everything Parmesan Crisps,4 servings,"Gluten Free, Keto Recipes, Kid Friendly, Low C..."
4,,['Cook potatoes in a large pot of salted water...,['American'],https://www.skinnytaste.com/wp-content/uploads...,['3 1/2 pounds new potatoes (about 10 peeled a...,10 minutes,Potato and Green Bean Salad,12 servings,"Dairy Free, Gluten Free, Kid Friendly, Vegetar..."
...,...,...,...,...,...,...,...,...,...
1393,,['Brown meat on high heat in large saute pan a...,['American'],https://www.skinnytaste.com/wp-content/uploads...,"['1/2 large chopped onion', '2 cloves garlic (...",30 minutes,Cuban Picadillo,6 Servings,"Dairy Free, Freezer Meals, Gluten Free, Keto R..."
1394,,"['In a 5 quart saucepan, add all ingredients.'...",['American'],https://www.skinnytaste.com/wp-content/uploads...,"['1 large head cauliflower - chopped', '1/2 cu...",5 minutes,Dad's Cauliflower Soup,6 servings,"Dairy Free, Freezer Meals, Gluten Free, Kid Fr..."
1395,,"['Soak bean over night. Discard water.', 'In a...",['Brazilian'],https://www.skinnytaste.com/wp-content/uploads...,"['Ingredients:', '1 smoked ham hock (or smoked...",15 minutes,Brazilian Black Beans,8 servings,"Freezer Meals, Gluten Free, Pressure Cooker Re..."
1396,,"['Preheat broiler.', 'Line broiling pan with a...",['American'],https://www.skinnytaste.com/wp-content/uploads...,['1 tbsp finely chopped fresh oregano or 1 tsp...,,Broiled Tilapia Oreganata,4 servings,"Kid Friendly, Low Carb"


In [6]:
df2 = df2.drop(columns=['category', 'cuisine', 'prep_time', 'serves', 'tags'])
df2.head()

Unnamed: 0,cooking_method,image,ingredients,recipe_name
0,['Set the racks in the middle and upper thirds...,https://www.skinnytaste.com/wp-content/uploads...,"['1 tablespoons extra virgin olive oil', '1 cu...",Mediterranean Sea Bass
1,['Place the eggs in the air fryer basket and c...,https://www.skinnytaste.com/wp-content/uploads...,"['4 large eggs', 'Salt (black pepper, everythi...",Air Fryer Hard Boiled Eggs
2,"['Air Fryer directions:', 'Preheat air fryer t...",https://www.skinnytaste.com/wp-content/uploads...,"['olive oil spray', '4 about 5 ounce each salm...",Air Fryer Basil-Parmesan Salmon
3,"['Preheat the oven to 400F.', 'Pour 2 tablespo...",https://www.skinnytaste.com/wp-content/uploads...,['1/2 cup freshly grated Parmesan (not pre-gra...,Everything Parmesan Crisps
4,['Cook potatoes in a large pot of salted water...,https://www.skinnytaste.com/wp-content/uploads...,['3 1/2 pounds new potatoes (about 10 peeled a...,Potato and Green Bean Salad


## Case Folding

In [15]:
# lower ingredients
ing = df2.ingredients.str.lower()

print('Case Folding Result : \n')
print(ing.head())
print('\n\n\n')

Case Folding Result : 

0    ['1 tablespoons extra virgin olive oil', '1 cu...
1    ['4 large eggs', 'salt (black pepper, everythi...
2    ['olive oil spray', '4 about 5 ounce each salm...
3    ['1/2 cup freshly grated parmesan (not pre-gra...
4    ['3 1/2 pounds new potatoes (about 10 peeled a...
Name: ingredients, dtype: object






## Remove Character

In [11]:
#Remove Character Ingredients
ing2 = df2.ingredients.apply(str)

def remove_character(data):
    return re.sub(r"'", "", data)

ing2 = ing2.apply(remove_character)

def listToStringWithoutBrackets(data):
    return str(data).replace('[','').replace(']','')

ing2 = ing2.apply(listToStringWithoutBrackets)

print(ing2)

0       1 tablespoons extra virgin olive oil, 1 cup ch...
1       4 large eggs, Salt (black pepper, everything b...
2       olive oil spray, 4 about 5 ounce each salmon f...
3       1/2 cup freshly grated Parmesan (not pre-grate...
4       3 1/2 pounds new potatoes (about 10 peeled and...
                              ...                        
1393    1/2 large chopped onion, 2 cloves garlic (minc...
1394    1 large head cauliflower - chopped, 1/2 cup ch...
1395    Ingredients:, 1 smoked ham hock (or smoked por...
1396    1 tbsp finely chopped fresh oregano or 1 tsp d...
1397    16 ounce bag of dried split peas, 2 teaspoons ...
Name: ingredients, Length: 1383, dtype: object


In [12]:
#Remove Character Ingredients
cookmet = df2.cooking_method.apply(str)

def remove_character2(data2):
    return re.sub(r"'", "", data2)

cookmet = cookmet.apply(remove_character2)

def listToStringWithoutBrackets2(data2):
    return str(data2).replace('[','').replace(']','')

cookmet = cookmet.apply(listToStringWithoutBrackets2)

print(cookmet)

0       Set the racks in the middle and upper thirds o...
1       Place the eggs in the air fryer basket and coo...
2       Air Fryer directions:, Preheat air fryer to 40...
3       Preheat the oven to 400F., Pour 2 tablespoon o...
4       Cook potatoes in a large pot of salted water u...
                              ...                        
1393    Brown meat on high heat in large saute pan and...
1394    In a 5 quart saucepan, add all ingredients., C...
1395    Soak bean over night. Discard water., In a pre...
1396    Preheat broiler., Line broiling pan with alumi...
1397    Wash the peas and drain., In a large pot or Du...
Name: cooking_method, Length: 1383, dtype: object


## Tokenizing

In [13]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [16]:
def remove_tweet_special(text):
    # remove tab, new line, ans back slice
    text = text.replace('\\t'," ").replace('\\n'," ").replace('\\u'," ").replace('\\',"")
    # remove non ASCII (emoticon, chinese word, .etc)
    text = text.encode('ascii', 'replace').decode('ascii')
    # remove mention, link, hashtag
    text = ' '.join(re.sub("([@#][A-Za-z0-9]+)|(\w+:\/\/\S+)"," ", text).split())
    # remove incomplete URL
    return text.replace("http://", " ").replace("https://", " ")
                
ing = ing.apply(remove_tweet_special)

#remove number
def remove_number(text):
    return  re.sub(r"\d+", "", text)

ing = ing.apply(remove_number)

#remove punctuation
def remove_punctuation(text):
    return text.translate(str.maketrans("","",string.punctuation))
ing = ing.apply(remove_punctuation)

#remove whitespace leading & trailing
def remove_whitespace_LT(text):
    return text.strip()

ing = ing.apply(remove_whitespace_LT)

#remove multiple whitespace into single whitespace
def remove_whitespace_multiple(text):
    return re.sub('\s+',' ',text)

ing = ing.apply(remove_whitespace_multiple)

# remove single char
def remove_singl_char(text):
    return re.sub(r"\b[a-zA-Z]\b", "", text)

ing = ing.apply(remove_singl_char)

# NLTK word rokenize 
def word_tokenize_wrapper(text):
    return word_tokenize(text)

ing_tokens = ing.apply(word_tokenize_wrapper)

print('Tokenizing Result : \n') 
print(ing_tokens.head())
print('\n\n\n')

Tokenizing Result : 

0    [tablespoons, extra, virgin, olive, oil, cup, ...
1    [large, eggs, salt, black, pepper, everything,...
2    [olive, oil, spray, about, ounce, each, salmon...
3    [cup, freshly, grated, parmesan, not, pregrate...
4    [pounds, new, potatoes, about, peeled, and, cu...
Name: ingredients, dtype: object






In [17]:
# NLTK calc frequency distribution
def freqDist_wrapper(text):
    return FreqDist(text)

ing_tokens_fdist = ing_tokens.apply(freqDist_wrapper)

print('Frequency Tokens : \n') 
print(ing_tokens_fdist.head().apply(lambda x : x.most_common()))

Frequency Tokens : 

0    [(cup, 5), (olive, 3), (oil, 3), (pepper, 3), ...
1    [(large, 1), (eggs, 1), (salt, 1), (black, 1),...
2    [(tablespoons, 2), (olive, 1), (oil, 1), (spra...
3    [(teaspoon, 4), (seeds, 2), (minced, 2), (drie...
4    [(and, 2), (beans, 2), (black, 2), (cup, 2), (...
Name: ingredients, dtype: object


## Filtering

In [18]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [19]:
list_stopwords = stopwords.words('english')

In [20]:
print(stopwords.words('english'))

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

In [21]:
list_stopwords.extend(["cubes","center","separated","deveined","country","style","weight","short","ingredients","bag","split","small","thinly","ounces","tsp","lbs","lean","aka","could","use","divided","check","labels","used","optional","optionals","option","options","chunks","bitesize","ml","reduce","reduced","pound","neverany","abf","top","round","teaspoons","tablespoons","extra","cup","chopped","teaspoon","crushed","taste","one","ounce","tablespoon","large","halved","freshly","four","everything","optional","about","each","ground","fresh","plus","grated","pre","using","grater","pounds","new","peeled","cut","inch","pieces","lb","ends","trimmed","oz","sliced","drained","high","heat","medium","roughly","reserved","fine","frain","cups","not","hulled","loosely","packed","serve","cooking","spray","diced","leftover","frozen","steamed","half","amp","pinch","washed","tbsp","all","purpose","whole","regular","if","any","store","bought","homemade","see","recipe","above","into","thin","strips","suggest","finely","dried","shredded","removed","pregrated","holes","box"])

In [22]:
def stopwords_removal(words):
    return [word for word in words if word not in list_stopwords]

ing_tokens_WSW = ing_tokens_fdist.apply(stopwords_removal) 
print(ing_tokens_WSW.head())

0    [olive, oil, pepper, kosher, salt, bass, fille...
1        [eggs, salt, black, pepper, bagel, seasoning]
2    [olive, oil, salmon, fillets, skin, lemon, kos...
3    [seeds, minced, flakes, parmesan, sesame, onio...
4    [beans, black, potatoes, green, olives, balsam...
Name: ingredients, dtype: object


## Stemming

In [23]:
st = nltk.PorterStemmer()
def stemming_on_text(data):
    text = [st.stem(word) for word in data]
    return data
ing_stm= ing_tokens_WSW.swifter.apply(lambda x: stemming_on_text(x))
ing_stm.head()

Pandas Apply:   0%|          | 0/1383 [00:00<?, ?it/s]

0    [olive, oil, pepper, kosher, salt, bass, fille...
1        [eggs, salt, black, pepper, bagel, seasoning]
2    [olive, oil, salmon, fillets, skin, lemon, kos...
3    [seeds, minced, flakes, parmesan, sesame, onio...
4    [beans, black, potatoes, green, olives, balsam...
Name: ingredients, dtype: object

## Lemmatization

In [24]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [25]:
lm = nltk.WordNetLemmatizer()
def lemmatizer_on_text(data):
    text = [lm.lemmatize(word) for word in data]
    return data
ing_lm = ing_stm.apply(lambda x: lemmatizer_on_text(x))
ing_lm.head()

0    [olive, oil, pepper, kosher, salt, bass, fille...
1        [eggs, salt, black, pepper, bagel, seasoning]
2    [olive, oil, salmon, fillets, skin, lemon, kos...
3    [seeds, minced, flakes, parmesan, sesame, onio...
4    [beans, black, potatoes, green, olives, balsam...
Name: ingredients, dtype: object

### Save Data

In [26]:
df2['Ingredients'] = ing_lm
df2['ingredient'] = ing2
df2['Cooking_Method'] = cookmet
df2

Unnamed: 0,cooking_method,image,ingredients,recipe_name,Ingredients,ingredient,Cooking_Method
0,['Set the racks in the middle and upper thirds...,https://www.skinnytaste.com/wp-content/uploads...,"['1 tablespoons extra virgin olive oil', '1 cu...",Mediterranean Sea Bass,"[olive, oil, pepper, kosher, salt, bass, fille...","1 tablespoons extra virgin olive oil, 1 cup ch...",Set the racks in the middle and upper thirds o...
1,['Place the eggs in the air fryer basket and c...,https://www.skinnytaste.com/wp-content/uploads...,"['4 large eggs', 'Salt (black pepper, everythi...",Air Fryer Hard Boiled Eggs,"[eggs, salt, black, pepper, bagel, seasoning]","4 large eggs, Salt (black pepper, everything b...",Place the eggs in the air fryer basket and coo...
2,"['Air Fryer directions:', 'Preheat air fryer t...",https://www.skinnytaste.com/wp-content/uploads...,"['olive oil spray', '4 about 5 ounce each salm...",Air Fryer Basil-Parmesan Salmon,"[olive, oil, salmon, fillets, skin, lemon, kos...","olive oil spray, 4 about 5 ounce each salmon f...","Air Fryer directions:, Preheat air fryer to 40..."
3,"['Preheat the oven to 400F.', 'Pour 2 tablespo...",https://www.skinnytaste.com/wp-content/uploads...,['1/2 cup freshly grated Parmesan (not pre-gra...,Everything Parmesan Crisps,"[seeds, minced, flakes, parmesan, sesame, onio...",1/2 cup freshly grated Parmesan (not pre-grate...,"Preheat the oven to 400F., Pour 2 tablespoon o..."
4,['Cook potatoes in a large pot of salted water...,https://www.skinnytaste.com/wp-content/uploads...,['3 1/2 pounds new potatoes (about 10 peeled a...,Potato and Green Bean Salad,"[beans, black, potatoes, green, olives, balsam...",3 1/2 pounds new potatoes (about 10 peeled and...,Cook potatoes in a large pot of salted water u...
...,...,...,...,...,...,...,...
1393,['Brown meat on high heat in large saute pan a...,https://www.skinnytaste.com/wp-content/uploads...,"['1/2 large chopped onion', '2 cloves garlic (...",Cuban Picadillo,"[pepper, onion, cloves, garlic, minced, mato, ...","1/2 large chopped onion, 2 cloves garlic (minc...",Brown meat on high heat in large saute pan and...
1394,"['In a 5 quart saucepan, add all ingredients.'...",https://www.skinnytaste.com/wp-content/uploads...,"['1 large head cauliflower - chopped', '1/2 cu...",Dad's Cauliflower Soup,"[head, cauliflower, onions, water, chicken, bo...","1 large head cauliflower - chopped, 1/2 cup ch...","In a 5 quart saucepan, add all ingredients., C..."
1395,"['Soak bean over night. Discard water.', 'In a...",https://www.skinnytaste.com/wp-content/uploads...,"['Ingredients:', '1 smoked ham hock (or smoked...",Brazilian Black Beans,"[smoked, ham, hock, pork, chop, bay, leaves, p...","Ingredients:, 1 smoked ham hock (or smoked por...","Soak bean over night. Discard water., In a pre..."
1396,"['Preheat broiler.', 'Line broiling pan with a...",https://www.skinnytaste.com/wp-content/uploads...,['1 tbsp finely chopped fresh oregano or 1 tsp...,Broiled Tilapia Oreganata,"[oregano, tilapia, fillets, salt, pepper, oliv...",1 tbsp finely chopped fresh oregano or 1 tsp d...,"Preheat broiler., Line broiling pan with alumi..."


In [27]:
df2.to_csv("recipe.csv")

# Recommendation

In [28]:
# loading the data from the csv file to apandas dataframe
recipes_data = pd.read_csv("recipe.csv").dropna()
recipes_data.head()

Unnamed: 0.1,Unnamed: 0,cooking_method,image,ingredients,recipe_name,Ingredients,ingredient,Cooking_Method
0,0,['Set the racks in the middle and upper thirds...,https://www.skinnytaste.com/wp-content/uploads...,"['1 tablespoons extra virgin olive oil', '1 cu...",Mediterranean Sea Bass,"['olive', 'oil', 'pepper', 'kosher', 'salt', '...","1 tablespoons extra virgin olive oil, 1 cup ch...",Set the racks in the middle and upper thirds o...
1,1,['Place the eggs in the air fryer basket and c...,https://www.skinnytaste.com/wp-content/uploads...,"['4 large eggs', 'Salt (black pepper, everythi...",Air Fryer Hard Boiled Eggs,"['eggs', 'salt', 'black', 'pepper', 'bagel', '...","4 large eggs, Salt (black pepper, everything b...",Place the eggs in the air fryer basket and coo...
2,2,"['Air Fryer directions:', 'Preheat air fryer t...",https://www.skinnytaste.com/wp-content/uploads...,"['olive oil spray', '4 about 5 ounce each salm...",Air Fryer Basil-Parmesan Salmon,"['olive', 'oil', 'salmon', 'fillets', 'skin', ...","olive oil spray, 4 about 5 ounce each salmon f...","Air Fryer directions:, Preheat air fryer to 40..."
3,3,"['Preheat the oven to 400F.', 'Pour 2 tablespo...",https://www.skinnytaste.com/wp-content/uploads...,['1/2 cup freshly grated Parmesan (not pre-gra...,Everything Parmesan Crisps,"['seeds', 'minced', 'flakes', 'parmesan', 'ses...",1/2 cup freshly grated Parmesan (not pre-grate...,"Preheat the oven to 400F., Pour 2 tablespoon o..."
4,4,['Cook potatoes in a large pot of salted water...,https://www.skinnytaste.com/wp-content/uploads...,['3 1/2 pounds new potatoes (about 10 peeled a...,Potato and Green Bean Salad,"['beans', 'black', 'potatoes', 'green', 'olive...",3 1/2 pounds new potatoes (about 10 peeled and...,Cook potatoes in a large pot of salted water u...


In [29]:
recipes_data = recipes_data.drop(columns=['ingredients', 'Unnamed: 0'])

recipes_data.head()

Unnamed: 0,cooking_method,image,recipe_name,Ingredients,ingredient,Cooking_Method
0,['Set the racks in the middle and upper thirds...,https://www.skinnytaste.com/wp-content/uploads...,Mediterranean Sea Bass,"['olive', 'oil', 'pepper', 'kosher', 'salt', '...","1 tablespoons extra virgin olive oil, 1 cup ch...",Set the racks in the middle and upper thirds o...
1,['Place the eggs in the air fryer basket and c...,https://www.skinnytaste.com/wp-content/uploads...,Air Fryer Hard Boiled Eggs,"['eggs', 'salt', 'black', 'pepper', 'bagel', '...","4 large eggs, Salt (black pepper, everything b...",Place the eggs in the air fryer basket and coo...
2,"['Air Fryer directions:', 'Preheat air fryer t...",https://www.skinnytaste.com/wp-content/uploads...,Air Fryer Basil-Parmesan Salmon,"['olive', 'oil', 'salmon', 'fillets', 'skin', ...","olive oil spray, 4 about 5 ounce each salmon f...","Air Fryer directions:, Preheat air fryer to 40..."
3,"['Preheat the oven to 400F.', 'Pour 2 tablespo...",https://www.skinnytaste.com/wp-content/uploads...,Everything Parmesan Crisps,"['seeds', 'minced', 'flakes', 'parmesan', 'ses...",1/2 cup freshly grated Parmesan (not pre-grate...,"Preheat the oven to 400F., Pour 2 tablespoon o..."
4,['Cook potatoes in a large pot of salted water...,https://www.skinnytaste.com/wp-content/uploads...,Potato and Green Bean Salad,"['beans', 'black', 'potatoes', 'green', 'olive...",3 1/2 pounds new potatoes (about 10 peeled and...,Cook potatoes in a large pot of salted water u...


In [30]:
recipes_data = recipes_data.reset_index(drop=True)

In [32]:
recipes_data['index'] = range(0, 1380, 1)
recipes_data

Unnamed: 0,cooking_method,image,recipe_name,Ingredients,ingredient,Cooking_Method,index
0,['Set the racks in the middle and upper thirds...,https://www.skinnytaste.com/wp-content/uploads...,Mediterranean Sea Bass,"['olive', 'oil', 'pepper', 'kosher', 'salt', '...","1 tablespoons extra virgin olive oil, 1 cup ch...",Set the racks in the middle and upper thirds o...,0
1,['Place the eggs in the air fryer basket and c...,https://www.skinnytaste.com/wp-content/uploads...,Air Fryer Hard Boiled Eggs,"['eggs', 'salt', 'black', 'pepper', 'bagel', '...","4 large eggs, Salt (black pepper, everything b...",Place the eggs in the air fryer basket and coo...,1
2,"['Air Fryer directions:', 'Preheat air fryer t...",https://www.skinnytaste.com/wp-content/uploads...,Air Fryer Basil-Parmesan Salmon,"['olive', 'oil', 'salmon', 'fillets', 'skin', ...","olive oil spray, 4 about 5 ounce each salmon f...","Air Fryer directions:, Preheat air fryer to 40...",2
3,"['Preheat the oven to 400F.', 'Pour 2 tablespo...",https://www.skinnytaste.com/wp-content/uploads...,Everything Parmesan Crisps,"['seeds', 'minced', 'flakes', 'parmesan', 'ses...",1/2 cup freshly grated Parmesan (not pre-grate...,"Preheat the oven to 400F., Pour 2 tablespoon o...",3
4,['Cook potatoes in a large pot of salted water...,https://www.skinnytaste.com/wp-content/uploads...,Potato and Green Bean Salad,"['beans', 'black', 'potatoes', 'green', 'olive...",3 1/2 pounds new potatoes (about 10 peeled and...,Cook potatoes in a large pot of salted water u...,4
...,...,...,...,...,...,...,...
1375,['Brown meat on high heat in large saute pan a...,https://www.skinnytaste.com/wp-content/uploads...,Cuban Picadillo,"['pepper', 'onion', 'cloves', 'garlic', 'mince...","1/2 large chopped onion, 2 cloves garlic (minc...",Brown meat on high heat in large saute pan and...,1375
1376,"['In a 5 quart saucepan, add all ingredients.'...",https://www.skinnytaste.com/wp-content/uploads...,Dad's Cauliflower Soup,"['head', 'cauliflower', 'onions', 'water', 'ch...","1 large head cauliflower - chopped, 1/2 cup ch...","In a 5 quart saucepan, add all ingredients., C...",1376
1377,"['Soak bean over night. Discard water.', 'In a...",https://www.skinnytaste.com/wp-content/uploads...,Brazilian Black Beans,"['smoked', 'ham', 'hock', 'pork', 'chop', 'bay...","Ingredients:, 1 smoked ham hock (or smoked por...","Soak bean over night. Discard water., In a pre...",1377
1378,"['Preheat broiler.', 'Line broiling pan with a...",https://www.skinnytaste.com/wp-content/uploads...,Broiled Tilapia Oreganata,"['oregano', 'tilapia', 'fillets', 'salt', 'pep...",1 tbsp finely chopped fresh oregano or 1 tsp d...,"Preheat broiler., Line broiling pan with alumi...",1378


In [33]:
# selecting the relevant features for recommendation
selected_features = ['Ingredients']
print(selected_features)

['Ingredients']


In [34]:
# replacing the null valuess with null string
for feature in selected_features:
  recipes_data[feature] = recipes_data[feature].fillna('')

In [35]:
# selected features
features = recipes_data['Ingredients']
print(features)

0       ['olive', 'oil', 'pepper', 'kosher', 'salt', '...
1       ['eggs', 'salt', 'black', 'pepper', 'bagel', '...
2       ['olive', 'oil', 'salmon', 'fillets', 'skin', ...
3       ['seeds', 'minced', 'flakes', 'parmesan', 'ses...
4       ['beans', 'black', 'potatoes', 'green', 'olive...
                              ...                        
1375    ['pepper', 'onion', 'cloves', 'garlic', 'mince...
1376    ['head', 'cauliflower', 'onions', 'water', 'ch...
1377    ['smoked', 'ham', 'hock', 'pork', 'chop', 'bay...
1378    ['oregano', 'tilapia', 'fillets', 'salt', 'pep...
1379    ['peas', 'olive', 'oil', 'onion', 'cloves', 'g...
Name: Ingredients, Length: 1380, dtype: object


In [36]:
# converting the text data to feature vectors
vectorizer = TfidfVectorizer()
feature_vectors = vectorizer.fit_transform(features)
print(feature_vectors)

  (0, 561)	0.16725140067710972
  (0, 151)	0.07560951148963109
  (0, 1124)	0.18428914281430112
  (0, 1619)	0.2435160139480384
  (0, 362)	0.23369328901675684
  (0, 725)	0.21984893579642742
  (0, 598)	0.1787400353326364
  (0, 1822)	0.2573603671683678
  (0, 595)	0.21984893579642742
  (0, 1637)	0.2573603671683678
  (0, 1448)	0.17112093795173797
  (0, 308)	0.2573603671683678
  (0, 1512)	0.1122981469929893
  (0, 1113)	0.149566556867257
  (0, 834)	0.18635913271353485
  (0, 1244)	0.16605322944925363
  (0, 1832)	0.1167184119534221
  (0, 1821)	0.09625016622227675
  (0, 519)	0.13548089963001692
  (0, 885)	0.10627673878160988
  (0, 117)	0.11644193776040784
  (0, 827)	0.2573603671683678
  (0, 1716)	0.0960980752078572
  (0, 577)	0.2024071134842474
  (0, 604)	0.12815157238670283
  :	:
  (1378, 1200)	0.12291502847010528
  (1378, 1108)	0.1339168910964221
  (1378, 1112)	0.1501696751916381
  (1379, 174)	0.2587914525747976
  (1379, 1367)	0.3711815617357355
  (1379, 997)	0.269906892038473
  (1379, 81)	0.371

## Cosine Similarity

In [37]:
# getting the similarity scores using cosine similarity
similarity = cosine_similarity(feature_vectors)
print(similarity)

[[1.         0.03303096 0.14780993 ... 0.10983587 0.13857582 0.04540229]
 [0.03303096 1.         0.055518   ... 0.04801837 0.03557838 0.02230521]
 [0.14780993 0.055518   1.         ... 0.09486906 0.21836844 0.10377542]
 ...
 [0.10983587 0.04801837 0.09486906 ... 1.         0.07578148 0.30738592]
 [0.13857582 0.03557838 0.21836844 ... 0.07578148 1.         0.05542579]
 [0.04540229 0.02230521 0.10377542 ... 0.30738592 0.05542579 1.        ]]


In [38]:
print(similarity.shape)

(1380, 1380)


## Getting the ingredients from the user

In [39]:
# getting the ingredients from the user
ingred = input(' Enter your ingredients : ')

 Enter your ingredients : Chicken


In [40]:
# creating a list with all the recipe names given in the dataset
list_of_all_recipe = recipes_data['recipe_name'].tolist()
print(list_of_all_recipe)

['Mediterranean Sea Bass', 'Air Fryer Hard Boiled Eggs', 'Air Fryer Basil-Parmesan Salmon', 'Everything Parmesan Crisps', 'Potato and Green Bean Salad', 'Green Detox Soup with Toasted Hemp Gremolata', 'Ham and Swiss Crustless Quiche', 'Strawberries Romanoff', 'Smoked Salmon Breakfast Flatbread', 'Ranch Chicken Salad', 'Avocado Egg Rolls with Sweet and Spicy Dipping Sauce', 'Quickest Cast-Iron Thin Crust Pizza', 'Tex-Mex Migas', 'Key Lime Yogurt Pie', 'Chicken Parmesan Lasagna', 'Chicken and Asparagus Lemon Stir Fry', 'Turkey Meatloaf', 'Steamed Mussels with Piri Piri Sauce', 'Chicken Club Lettuce Wrap Sandwich', 'Houston’s Grilled Chicken Salad', 'Zucchini Banana Cake with Cream Cheese Frosting', 'Grilled Pesto Chicken Couscous Bowls', 'How To Make Nut Butter', 'Grilled Lobster Tails with Herb Garlic Butter', 'Low Carb Potato Salad', 'Grilled Corn Salad with Feta', 'Soy Marinated Flank Steak', 'Taco Empanadas', 'Shrimp Scampi Foil Packets', 'Potato and Broccoli Cakes', 'BBQ Chicken Que

In [41]:
# finding the close match for the recipe given by the user
find_close_match = difflib.get_close_matches(ingred, list_of_all_recipe, 3, 0.1)
print(find_close_match)

['Chicken Piccata', 'Chicken Rollatini', 'BBQ Chicken Salad']


In [42]:
close_match = find_close_match[0]
print(close_match)

Chicken Piccata


In [43]:
# finding the index of the recipe with ingredient
index_of_the_recipe = recipes_data[recipes_data.recipe_name == close_match]['index'].values[0]
print(index_of_the_recipe)

1097


In [44]:
# getting a list of similar recipe
similarity_score = list(enumerate(similarity[index_of_the_recipe]))
print(similarity_score)

[(0, 0.12006401968911395), (1, 0.03982101260874494), (2, 0.09606428269788428), (3, 0.0), (4, 0.058989802171716954), (5, 0.14006635638272685), (6, 0.02749360597606991), (7, 0.03409843531188253), (8, 0.11340563287484236), (9, 0.08387425737863055), (10, 0.0599022307453435), (11, 0.010672199123573346), (12, 0.0805397951843783), (13, 0.13276756532306763), (14, 0.14826140594863746), (15, 0.11638522330806893), (16, 0.12089720879131842), (17, 0.1712264095279842), (18, 0.03357346554173134), (19, 0.14793842708633803), (20, 0.06198764108630862), (21, 0.07673312625736653), (22, 0.0), (23, 0.11642348148701229), (24, 0.04873101747339345), (25, 0.08645888325828009), (26, 0.05393899271202851), (27, 0.03540581170596117), (28, 0.2507219305834903), (29, 0.025548922662097934), (30, 0.0651576649280668), (31, 0.2170836221586472), (32, 0.11099881434761276), (33, 0.005845615371015288), (34, 0.0), (35, 0.10833003789875016), (36, 0.03887477952822859), (37, 0.08840076229111989), (38, 0.03546630373145182), (39, 0

In [45]:
len(similarity_score)

1380

In [46]:
# sorting the recipe based on their similarity score
sorted_similar_recipe = sorted(similarity_score, key = lambda x:x[1], reverse = True) 
print(sorted_similar_recipe)

[(1097, 1.0), (965, 0.7817918145191036), (892, 0.4792470243479494), (775, 0.4355963511937373), (1093, 0.3652719555549098), (265, 0.31868864244195516), (1074, 0.3003425761925309), (653, 0.29203362458479526), (305, 0.2916807290082687), (1255, 0.2896453277339307), (60, 0.2810414956302987), (65, 0.2802216180241514), (754, 0.27724253369163293), (1198, 0.27521005132579485), (472, 0.27431992670219957), (491, 0.2707464209881089), (689, 0.2659742522756448), (1256, 0.26491902829185066), (445, 0.2624982465709615), (173, 0.26165114025273306), (393, 0.2582648415333079), (28, 0.2507219305834903), (1302, 0.24857013807043776), (607, 0.24787865791781938), (488, 0.2478413368150518), (140, 0.24749973055573393), (660, 0.24288272496287422), (532, 0.2416640178588316), (740, 0.24142854182701945), (188, 0.23684295735875582), (287, 0.23509341904086264), (622, 0.23375666948264964), (699, 0.23375666948264964), (765, 0.23318469724050717), (396, 0.23295733957836784), (548, 0.23262118049558317), (430, 0.23243781521

In [47]:
# print the name of similar recipe based on the index
print('Recipe suggested for you : \n')
i = 1
for recipe in sorted_similar_recipe:
  index = recipe[0]
  title_from_index = recipes_data[recipes_data.index==index]['recipe_name'].values[0]
  if (i<11):
    print(i, '.',title_from_index)
    i+=1

Recipe suggested for you : 

1 . Chicken Piccata
2 . Flounder Piccata
3 . Chicken Francese - Lightened Up
4 . Flounder Milanese with Arugula and Tomatoes
5 . Lighter Chicken Saltimbocca 
6 . Chicken Marsala Meatballs
7 . Greek Turkey Meatballs
8 . Chicken Ropa Vieja
9 . Skillet Lemon Chicken with Olives and Herbs
10 . Chicken Divan, Lightened Up


# Recipe Recommendation System

In [48]:
# getting the ingredients from the user
ingred = input('Enter your ingredients : ')

# creating a list with all the recipe names given in the dataset
list_of_all_recipe = recipes_data['recipe_name'].tolist()

# finding the close match for the recipe given by the user
find_close_match = difflib.get_close_matches(ingred, list_of_all_recipe, 3, 0.1)
close_match = find_close_match[0]

# finding the index of the recipe with ingredient
index_of_the_recipe = recipes_data[recipes_data.recipe_name == close_match]['index'].values[0]

# getting a list of similar recipe
similarity_score = list(enumerate(similarity[index_of_the_recipe]))
len(similarity_score)

# sorting the recipe based on their similarity score
sorted_similar_recipe = sorted(similarity_score, key = lambda x:x[1], reverse = True)

# print the name of similar recipe based on the index
print('Recipe suggested for you : \n')
i = 1
for recipe in sorted_similar_recipe:
  index = recipe[0]
  title_from_index = recipes_data[recipes_data.index==index]['recipe_name'].values[0]
  if (i<11):
    print(i, ".", title_from_index)
    i+=1

Enter your ingredients : Banana
Recipe suggested for you : 

1 . Baked Bananas
2 . Bali Banana Date Smoothie
3 . Baked Pears with Walnuts and Honey
4 . Banana Nut Pancakes
5 . Strawberry Rhubarb Baked Oatmeal
6 . Chewy Low Fat Banana Nut Oatmeal Cookies
7 . 4-Ingredient Flourless Banana-Nut Pancakes
8 . Skinny Green Tropical Smoothie
9 . Piña Banana Colada Smoothie
10 . Blueberry Banana Oatmeal Smoothie


# Save

In [49]:
# Save dataframe
pickle.dump(recipes_data.to_dict(), open('recipes_dict.pkl', 'wb'))

In [50]:
# Save similarity
pickle.dump(similarity, open('similarity.pkl', 'wb'))