# Recommendation

# Step 1: Load the Dataset


In [1]:
import pandas as pd

# Load the dataset
file_path = 'recipe_cleaned.csv'
recipe_df = pd.read_csv(file_path)

recipe_df.head()

Unnamed: 0.1,Unnamed: 0,recipe_id,recipe_name,ingredients_list,minutes,n_steps,precedure_steps,tags,description,calories(in kcal),protein(in PDV)
0,0,137739,arriba baked winter squash mexican style,"['winter squash', 'mexican seasoning', 'mixed ...",55,11,"['make a choice and proceed with recipe', 'dep...","['60-minutes-or-less', 'time-to-make', 'course...",autumn is my favorite time of year to cook! th...,51.5,2.0
1,1,31490,a bit different breakfast pizza,"['prepared pizza crust', 'sausage patty', 'egg...",30,9,"['preheat oven to 425 degrees f', 'press dough...","['30-minutes-or-less', 'time-to-make', 'course...",this recipe calls for the crust to be prebaked...,173.4,22.0
2,2,112140,all in the kitchen chili,"['ground beef', 'yellow onions', 'diced tomato...",130,6,"['brown ground beef in large pot', 'add choppe...","['time-to-make', 'course', 'preparation', 'mai...",this modified version of 'mom's' chili was a h...,269.8,39.0
3,3,59389,alouette potatoes,"['spreadable cheese with garlic and herbs', 'n...",45,11,['place potatoes in a large pot of lightly sal...,"['60-minutes-or-less', 'time-to-make', 'course...","this is a super easy, great tasting, make ahea...",368.1,14.0
4,5,25274,aww marinated olives,"['fennel seeds', 'green olives', 'ripe olives'...",15,4,['toast the fennel seeds and lightly crush the...,"['15-minutes-or-less', 'time-to-make', 'course...",my italian mil was thoroughly impressed by my ...,380.7,6.0


In [2]:
len(recipe_df)

178023

In [3]:
recipe_df= recipe_df.drop(columns='Unnamed: 0')

In [4]:
recipe_df.columns

Index(['recipe_id', 'recipe_name', 'ingredients_list', 'minutes', 'n_steps',
       'precedure_steps', 'tags', 'description', 'calories(in kcal)',
       'protein(in PDV)'],
      dtype='object')

In [5]:
recipe_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178023 entries, 0 to 178022
Data columns (total 10 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   recipe_id          178023 non-null  int64  
 1   recipe_name        178023 non-null  object 
 2   ingredients_list   178023 non-null  object 
 3   minutes            178023 non-null  int64  
 4   n_steps            178023 non-null  int64  
 5   precedure_steps    178023 non-null  object 
 6   tags               178023 non-null  object 
 7   description        178023 non-null  object 
 8   calories(in kcal)  178023 non-null  float64
 9   protein(in PDV)    178023 non-null  float64
dtypes: float64(2), int64(3), object(5)
memory usage: 13.6+ MB


In [6]:
recipe_df.describe()

Unnamed: 0,recipe_id,minutes,n_steps,calories(in kcal),protein(in PDV)
count,178023.0,178023.0,178023.0,178023.0,178023.0
mean,227797.684726,38.08645,8.787219,314.115567,24.274554
std,139085.391976,26.071078,4.389725,203.118282,23.94734
min,40.0,0.0,0.0,0.0,0.0
25%,108469.5,20.0,5.0,157.2,6.0
50%,215661.0,31.0,8.0,275.3,14.0
75%,338013.0,50.0,12.0,431.65,39.0
max,537716.0,132.0,21.0,988.9,96.0


## Necessary Imports

In [7]:
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline

### Vectorizer

In [9]:
# Preprocess Ingredients
vectorizer = TfidfVectorizer()
X_ingredients = vectorizer.fit_transform(recipe_df['ingredients_list'][:50000])

In [10]:
X_ingredients.shape

(50000, 2969)

### Normalization

In [11]:
# Normalize Numerical Features
scaler = StandardScaler()
X_numerical = scaler.fit_transform(recipe_df[['calories(in kcal)','protein(in PDV)', 'minutes', 'n_steps']][:50000])

## KNN For the Recommendation

In [12]:
# Combine Features
X_combined = np.hstack([X_numerical, X_ingredients.toarray()])

# Train KNN Model
knn = NearestNeighbors(n_neighbors=3, metric='euclidean')
knn.fit(X_combined)

##  Main Recommendation Function

In [16]:
# Function to Recommend Recipes
def recommend_recipes(input_features):
    input_features_scaled = scaler.transform([input_features[:4]])
    input_ingredients_transformed = vectorizer.transform([input_features[4]])
    input_combined = np.hstack([input_features_scaled, input_ingredients_transformed.toarray()])
    distances, indices = knn.kneighbors(input_combined)
    recommendations = recipe_df.iloc[indices[0]]
    return recommendations[['recipe_name', 'ingredients_list', 'precedure_steps', 'description']]

# Example Input
input_features = [15, 36, 20, 15,'floor, pulse, chicken, tomatoes']
recommendations = recommend_recipes(input_features)
recommendations



Unnamed: 0,recipe_name,ingredients_list,precedure_steps,description
32769,chicken corn egg flower soup,"['fat free chicken broth', 'gingerroot', 'corn...","['place broth and ginger in medium saucepan', ...","original recipe from kikkoman soy sauce, i cha..."
29474,ceviche tacos,"['limes', 'tomatoes', 'avocado', 'cilantro', '...",['finely grate rind from limes to measure 1 ta...,"cerviche is a mexican appetizer, kind of like ..."
21898,breakfast omelette ww,"['eggs', 'egg whites', 'salt', 'pepper', 'onio...","['in a bowl whisk together eggs , egg whites ,...",if following the ww flex plan this is 4 points...


## Using Cosine Similarity for the Recommendation

In [26]:
from sklearn.metrics.pairwise import cosine_similarity

# Function to Recommend Recipes Using Cosine Similarity
def recommend_recipes_cosine(input_features):
    # Scale and transform the input features
    input_features_scaled = scaler.transform([input_features[:4]])
    input_ingredients_transformed = vectorizer.transform([input_features[4]])
    input_combined = np.hstack([input_features_scaled, input_ingredients_transformed.toarray()])
    
    # Compute cosine similarity
    similarities = cosine_similarity(input_combined, X_combined)
    
    # Get top 3 recommendations
    top_indices = np.argsort(similarities[0])[-3:][::-1]  # Get indices of top 3 similar items
    recommendations = recipe_df.iloc[top_indices]
    
    return recommendations[['recipe_name', 'ingredients_list', 'precedure_steps', 'description']]

# Example Input
input_features = [15, 36, 15, 20, 'floor, vegitable, milk, pulse, tomatoes']
recommendations = recommend_recipes_cosine(input_features)
recommendations



Unnamed: 0,recipe_name,ingredients_list,precedure_steps,description
17677,betsy s really good tuna for sandwiches,"['tuna in vegetable oil', 'dill pickle', 'cele...",['empty drained can of tuna into medium sized ...,tuna was something we ate a lot when i was a k...
241,cheesecake factory crab cakes,"['lump crabmeat', 'plain breadcrumbs', 'mayonn...",['measure all the ingredients for the crab cak...,be careful not to stir the crab too much into ...
24123,build your own canadian cranberry and herb tur...,"['fresh breadcrumb', 'extra virgin olive oil',...","['if grilling the burgers , preheat grill to m...",build your own burger - the recipe here is for...
