## Import and Sanity Checks

In [1]:
# Checking if I'm in the right environment:
import sys
sys.executable

'C:\\Users\\mathe\\anaconda3\\envs\\deepchef\\python.exe'

In [2]:
# General libraries:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MinMaxScaler

In [3]:
# This allows scrolling through all the columns. Useful for dataframes with too many columns.
pd.set_option('display.max_columns', None)

In [4]:
recipes = pd.read_parquet('BasicCleanData.parquet')

In [5]:
recipes = recipes.reset_index(drop=True)

In [6]:
recipes.isna().sum()

RecipeId                           0
AuthorId                           0
Description                        0
RecipeCategory                     0
Keywords                           0
RecipeIngredientQuantities         0
RecipeIngredientParts              0
ReviewCount                   247487
Calories                           0
FatContent                         0
SaturatedFatContent                0
CholesterolContent                 0
SodiumContent                      0
CarbohydrateContent                0
FiberContent                       0
SugarContent                       0
ProteinContent                     0
RecipeServings                182910
RecipeInstructions                 0
url                                0
YearPublished                      0
MonthPublished                     0
DayPublished                       0
HourPublished                      0
TotalMinutes                       0
CorrectAggregatedRating       256533
dtype: int64

## Feature Engineering
### Turning `RecipeCategory` Into One-Hot Vectors

In [7]:
from sklearn.preprocessing import OneHotEncoder

In [8]:
ohe = OneHotEncoder()

In [9]:
recipe_category = pd.DataFrame(recipes['RecipeCategory'])
recipe_category 

Unnamed: 0,RecipeCategory
0,Desserts
1,Chicken
2,Beverages
3,Vegetarian/Vegan
4,Vegetables
...,...
522507,Desserts
522508,Healthy
522509,Desserts
522510,International


In [10]:
encoded = ohe.fit_transform(recipe_category)
encoded

<522512x31 sparse matrix of type '<class 'numpy.float64'>'
	with 522512 stored elements in Compressed Sparse Row format>

In [11]:
dense_array = encoded.toarray()
dense_array

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [12]:
ohe.categories_

[array(['Baked Goods', 'Beans/Legumes', 'Beverages', 'Breakfast/Brunch',
        'Chicken', 'Cooking Methods', 'Dairy', 'Desserts',
        'Family-Friendly', 'Flavor Profiles', 'Fruit', 'Healthy',
        'International', 'Lunch/Snacks', 'Main Dish', 'Meat',
        'Nuts/Seeds/Grains', 'Occasions', 'Pasta', 'Poultry',
        'Quick and Easy', 'Regional', 'Sauces/Condiments', 'Seafood',
        'Seasonal', 'Side Dishes', 'Soups', 'Special Dietary Needs',
        'Uncategorized', 'Vegetables', 'Vegetarian/Vegan'], dtype=object)]

In [13]:
encoded_recipe_cat = pd.DataFrame(dense_array, columns=ohe.categories_, dtype='int')
encoded_recipe_cat.isna().sum()

Baked Goods              0
Beans/Legumes            0
Beverages                0
Breakfast/Brunch         0
Chicken                  0
Cooking Methods          0
Dairy                    0
Desserts                 0
Family-Friendly          0
Flavor Profiles          0
Fruit                    0
Healthy                  0
International            0
Lunch/Snacks             0
Main Dish                0
Meat                     0
Nuts/Seeds/Grains        0
Occasions                0
Pasta                    0
Poultry                  0
Quick and Easy           0
Regional                 0
Sauces/Condiments        0
Seafood                  0
Seasonal                 0
Side Dishes              0
Soups                    0
Special Dietary Needs    0
Uncategorized            0
Vegetables               0
Vegetarian/Vegan         0
dtype: int64

In [14]:
#encoded_recipe_cat['RecipeCategory'] = recipes['RecipeCategory']

In [15]:
#encoded_recipe_cat.head()

In [16]:
recipes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 522512 entries, 0 to 522511
Data columns (total 26 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   RecipeId                    522512 non-null  float64
 1   AuthorId                    522512 non-null  int32  
 2   Description                 522512 non-null  object 
 3   RecipeCategory              522512 non-null  object 
 4   Keywords                    522512 non-null  object 
 5   RecipeIngredientQuantities  522512 non-null  object 
 6   RecipeIngredientParts       522512 non-null  object 
 7   ReviewCount                 275025 non-null  float64
 8   Calories                    522512 non-null  float64
 9   FatContent                  522512 non-null  float64
 10  SaturatedFatContent         522512 non-null  float64
 11  CholesterolContent          522512 non-null  float64
 12  SodiumContent               522512 non-null  float64
 13  CarbohydrateCo

In [17]:
recipes = pd.concat([recipes,encoded_recipe_cat],axis=1)

In [18]:
recipes.dtypes[recipes.dtypes == 'object']

Description                   object
RecipeCategory                object
Keywords                      object
RecipeIngredientQuantities    object
RecipeIngredientParts         object
RecipeInstructions            object
url                           object
dtype: object

In [19]:
recipes_no_null = recipes.dropna(subset=['CorrectAggregatedRating','RecipeServings'])
recipes_no_null.head()

Unnamed: 0,RecipeId,AuthorId,Description,RecipeCategory,Keywords,RecipeIngredientQuantities,RecipeIngredientParts,ReviewCount,Calories,FatContent,SaturatedFatContent,CholesterolContent,SodiumContent,CarbohydrateContent,FiberContent,SugarContent,ProteinContent,RecipeServings,RecipeInstructions,url,YearPublished,MonthPublished,DayPublished,HourPublished,TotalMinutes,CorrectAggregatedRating,"(Baked Goods,)","(Beans/Legumes,)","(Beverages,)","(Breakfast/Brunch,)","(Chicken,)","(Cooking Methods,)","(Dairy,)","(Desserts,)","(Family-Friendly,)","(Flavor Profiles,)","(Fruit,)","(Healthy,)","(International,)","(Lunch/Snacks,)","(Main Dish,)","(Meat,)","(Nuts/Seeds/Grains,)","(Occasions,)","(Pasta,)","(Poultry,)","(Quick and Easy,)","(Regional,)","(Sauces/Condiments,)","(Seafood,)","(Seasonal,)","(Side Dishes,)","(Soups,)","(Special Dietary Needs,)","(Uncategorized,)","(Vegetables,)","(Vegetarian/Vegan,)"
0,38.0,1533,Make and share this Low-Fat Berry Blue Frozen ...,Desserts,"[Dessert, Low Protein, Low Cholesterol, Health...","[4, 1⁄4, 1, 1]","[blueberries, granulated sugar, vanilla yogurt...",4.0,170.9,2.5,1.3,8.0,29.8,37.1,3.6,30.2,3.2,4.0,"[Toss 2 cups berries with sugar., Let stand fo...",https://www.food.com/recipe/Low-Fat-Berry-Blue...,1999,8,9,21,285,4.25,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,39.0,1567,Make and share this Biryani recipe from Food.com.,Chicken,"[Chicken Thigh & Leg, Chicken, Poultry, Meat, ...","[1, 4, 2, 2, 8, 1⁄4, 8, 1⁄2, 1, 1, 1⁄4, 1⁄4, 1...","[saffron, milk, hot green chili peppers, onion...",1.0,1110.7,58.8,16.6,372.8,368.4,84.4,9.0,20.4,63.4,6.0,[Soak saffron in warm milk for 5 minutes and p...,https://www.food.com/recipe/Biryani-39,1999,8,29,13,265,3.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,40.0,1566,This is from one of my first Good House Keepi...,Beverages,"[Low Protein, Low Cholesterol, Healthy, Summer...","[1 1⁄2, 1, None, 1 1⁄2, None, 3⁄4]","[sugar, lemons, rind of, lemon, zest of, fresh...",10.0,311.1,0.2,0.0,0.0,1.8,81.5,0.4,77.2,0.3,4.0,"[Into a 1 quart Jar with tight fitting lid, pu...",https://www.food.com/recipe/Best-Lemonade-40,1999,9,5,19,35,4.33,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,41.0,1586,This dish is best prepared a day in advance to...,Vegetarian/Vegan,"[Beans, Vegetable, Low Cholesterol, Weeknight,...","[12, 1, 2, 1, 10, 1, 3, 2, 2, 2, 1, 2, 1⁄2, 1⁄...","[extra firm tofu, eggplant, zucchini, mushroom...",2.0,536.1,24.0,3.8,0.0,1558.6,64.2,17.3,32.1,29.3,2.0,"[Drain the tofu, carefully squeezing out exces...",https://www.food.com/recipe/Carina's-Tofu-Vege...,1999,9,3,14,260,4.5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,42.0,1538,Make and share this Cabbage Soup recipe from F...,Vegetables,"[Low Protein, Vegan, Low Cholesterol, Healthy,...","[46, 4, 1, 2, 1]","[plain tomato juice, cabbage, onion, carrots, ...",11.0,103.6,0.4,0.1,0.0,959.3,25.1,4.8,17.7,4.3,4.0,"[Mix everything together and bring to a boil.,...",https://www.food.com/recipe/Cabbage-Soup-42,1999,9,19,6,50,2.67,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0


In [20]:
recipes_no_null.isna().sum()

RecipeId                      0
AuthorId                      0
Description                   0
RecipeCategory                0
Keywords                      0
RecipeIngredientQuantities    0
RecipeIngredientParts         0
ReviewCount                   0
Calories                      0
FatContent                    0
SaturatedFatContent           0
CholesterolContent            0
SodiumContent                 0
CarbohydrateContent           0
FiberContent                  0
SugarContent                  0
ProteinContent                0
RecipeServings                0
RecipeInstructions            0
url                           0
YearPublished                 0
MonthPublished                0
DayPublished                  0
HourPublished                 0
TotalMinutes                  0
CorrectAggregatedRating       0
(Baked Goods,)                0
(Beans/Legumes,)              0
(Beverages,)                  0
(Breakfast/Brunch,)           0
(Chicken,)                    0
(Cooking

In [21]:
recipes_no_null.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 170214 entries, 0 to 522426
Data columns (total 57 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   RecipeId                    170214 non-null  float64
 1   AuthorId                    170214 non-null  int32  
 2   Description                 170214 non-null  object 
 3   RecipeCategory              170214 non-null  object 
 4   Keywords                    170214 non-null  object 
 5   RecipeIngredientQuantities  170214 non-null  object 
 6   RecipeIngredientParts       170214 non-null  object 
 7   ReviewCount                 170214 non-null  float64
 8   Calories                    170214 non-null  float64
 9   FatContent                  170214 non-null  float64
 10  SaturatedFatContent         170214 non-null  float64
 11  CholesterolContent          170214 non-null  float64
 12  SodiumContent               170214 non-null  float64
 13  CarbohydrateCo

In [22]:
recipes_no_null = recipes_no_null.reset_index(drop=True)

Now we have dropped all null values from the dataset.

In [23]:
recipes_no_null.columns

Index([                  'RecipeId',                   'AuthorId',
                      'Description',             'RecipeCategory',
                         'Keywords', 'RecipeIngredientQuantities',
            'RecipeIngredientParts',                'ReviewCount',
                         'Calories',                 'FatContent',
              'SaturatedFatContent',         'CholesterolContent',
                    'SodiumContent',        'CarbohydrateContent',
                     'FiberContent',               'SugarContent',
                   'ProteinContent',             'RecipeServings',
               'RecipeInstructions',                        'url',
                    'YearPublished',             'MonthPublished',
                     'DayPublished',              'HourPublished',
                     'TotalMinutes',    'CorrectAggregatedRating',
                   ('Baked Goods',),           ('Beans/Legumes',),
                     ('Beverages',),        ('Breakfast/Brunch

In [24]:
nutritional_cols = ['Calories','FatContent','SaturatedFatContent','CholesterolContent','SodiumContent','CarbohydrateContent','FiberContent','SugarContent','ProteinContent','RecipeServings']
nutritional_facts = recipes_no_null[nutritional_cols]

Extract Nutritional Facts for Individuals:

In [25]:
nutritional_facts = nutritional_facts.apply(lambda column: round(column/nutritional_facts['RecipeServings'],2))
nutritional_facts

Unnamed: 0,Calories,FatContent,SaturatedFatContent,CholesterolContent,SodiumContent,CarbohydrateContent,FiberContent,SugarContent,ProteinContent,RecipeServings
0,42.72,0.62,0.32,2.00,7.45,9.28,0.90,7.55,0.80,1.0
1,185.12,9.80,2.77,62.13,61.40,14.07,1.50,3.40,10.57,1.0
2,77.78,0.05,0.00,0.00,0.45,20.38,0.10,19.30,0.08,1.0
3,268.05,12.00,1.90,0.00,779.30,32.10,8.65,16.05,14.65,1.0
4,25.90,0.10,0.02,0.00,239.82,6.28,1.20,4.42,1.08,1.0
...,...,...,...,...,...,...,...,...,...,...
170209,82.55,0.30,0.10,0.00,3514.20,20.65,1.50,19.10,1.40,1.0
170210,285.40,0.10,0.00,0.00,6994.90,76.70,0.30,75.50,0.60,1.0
170211,44.92,2.72,0.88,71.95,127.85,1.55,0.12,0.25,3.32,1.0
170212,19.36,1.02,0.14,1.43,21.08,2.41,0.13,1.32,0.27,1.0


In [26]:
recipes_no_null.drop(nutritional_cols,axis=1,inplace=True)

In [27]:
recipes_no_null.head()

Unnamed: 0,RecipeId,AuthorId,Description,RecipeCategory,Keywords,RecipeIngredientQuantities,RecipeIngredientParts,ReviewCount,RecipeInstructions,url,YearPublished,MonthPublished,DayPublished,HourPublished,TotalMinutes,CorrectAggregatedRating,"(Baked Goods,)","(Beans/Legumes,)","(Beverages,)","(Breakfast/Brunch,)","(Chicken,)","(Cooking Methods,)","(Dairy,)","(Desserts,)","(Family-Friendly,)","(Flavor Profiles,)","(Fruit,)","(Healthy,)","(International,)","(Lunch/Snacks,)","(Main Dish,)","(Meat,)","(Nuts/Seeds/Grains,)","(Occasions,)","(Pasta,)","(Poultry,)","(Quick and Easy,)","(Regional,)","(Sauces/Condiments,)","(Seafood,)","(Seasonal,)","(Side Dishes,)","(Soups,)","(Special Dietary Needs,)","(Uncategorized,)","(Vegetables,)","(Vegetarian/Vegan,)"
0,38.0,1533,Make and share this Low-Fat Berry Blue Frozen ...,Desserts,"[Dessert, Low Protein, Low Cholesterol, Health...","[4, 1⁄4, 1, 1]","[blueberries, granulated sugar, vanilla yogurt...",4.0,"[Toss 2 cups berries with sugar., Let stand fo...",https://www.food.com/recipe/Low-Fat-Berry-Blue...,1999,8,9,21,285,4.25,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,39.0,1567,Make and share this Biryani recipe from Food.com.,Chicken,"[Chicken Thigh & Leg, Chicken, Poultry, Meat, ...","[1, 4, 2, 2, 8, 1⁄4, 8, 1⁄2, 1, 1, 1⁄4, 1⁄4, 1...","[saffron, milk, hot green chili peppers, onion...",1.0,[Soak saffron in warm milk for 5 minutes and p...,https://www.food.com/recipe/Biryani-39,1999,8,29,13,265,3.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,40.0,1566,This is from one of my first Good House Keepi...,Beverages,"[Low Protein, Low Cholesterol, Healthy, Summer...","[1 1⁄2, 1, None, 1 1⁄2, None, 3⁄4]","[sugar, lemons, rind of, lemon, zest of, fresh...",10.0,"[Into a 1 quart Jar with tight fitting lid, pu...",https://www.food.com/recipe/Best-Lemonade-40,1999,9,5,19,35,4.33,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,41.0,1586,This dish is best prepared a day in advance to...,Vegetarian/Vegan,"[Beans, Vegetable, Low Cholesterol, Weeknight,...","[12, 1, 2, 1, 10, 1, 3, 2, 2, 2, 1, 2, 1⁄2, 1⁄...","[extra firm tofu, eggplant, zucchini, mushroom...",2.0,"[Drain the tofu, carefully squeezing out exces...",https://www.food.com/recipe/Carina's-Tofu-Vege...,1999,9,3,14,260,4.5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,42.0,1538,Make and share this Cabbage Soup recipe from F...,Vegetables,"[Low Protein, Vegan, Low Cholesterol, Healthy,...","[46, 4, 1, 2, 1]","[plain tomato juice, cabbage, onion, carrots, ...",11.0,"[Mix everything together and bring to a boil.,...",https://www.food.com/recipe/Cabbage-Soup-42,1999,9,19,6,50,2.67,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0


In [28]:
recipes_no_null = pd.concat([recipes_no_null, nutritional_facts],axis=1)
recipes_no_null.head()

Unnamed: 0,RecipeId,AuthorId,Description,RecipeCategory,Keywords,RecipeIngredientQuantities,RecipeIngredientParts,ReviewCount,RecipeInstructions,url,YearPublished,MonthPublished,DayPublished,HourPublished,TotalMinutes,CorrectAggregatedRating,"(Baked Goods,)","(Beans/Legumes,)","(Beverages,)","(Breakfast/Brunch,)","(Chicken,)","(Cooking Methods,)","(Dairy,)","(Desserts,)","(Family-Friendly,)","(Flavor Profiles,)","(Fruit,)","(Healthy,)","(International,)","(Lunch/Snacks,)","(Main Dish,)","(Meat,)","(Nuts/Seeds/Grains,)","(Occasions,)","(Pasta,)","(Poultry,)","(Quick and Easy,)","(Regional,)","(Sauces/Condiments,)","(Seafood,)","(Seasonal,)","(Side Dishes,)","(Soups,)","(Special Dietary Needs,)","(Uncategorized,)","(Vegetables,)","(Vegetarian/Vegan,)",Calories,FatContent,SaturatedFatContent,CholesterolContent,SodiumContent,CarbohydrateContent,FiberContent,SugarContent,ProteinContent,RecipeServings
0,38.0,1533,Make and share this Low-Fat Berry Blue Frozen ...,Desserts,"[Dessert, Low Protein, Low Cholesterol, Health...","[4, 1⁄4, 1, 1]","[blueberries, granulated sugar, vanilla yogurt...",4.0,"[Toss 2 cups berries with sugar., Let stand fo...",https://www.food.com/recipe/Low-Fat-Berry-Blue...,1999,8,9,21,285,4.25,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,42.72,0.62,0.32,2.0,7.45,9.28,0.9,7.55,0.8,1.0
1,39.0,1567,Make and share this Biryani recipe from Food.com.,Chicken,"[Chicken Thigh & Leg, Chicken, Poultry, Meat, ...","[1, 4, 2, 2, 8, 1⁄4, 8, 1⁄2, 1, 1, 1⁄4, 1⁄4, 1...","[saffron, milk, hot green chili peppers, onion...",1.0,[Soak saffron in warm milk for 5 minutes and p...,https://www.food.com/recipe/Biryani-39,1999,8,29,13,265,3.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,185.12,9.8,2.77,62.13,61.4,14.07,1.5,3.4,10.57,1.0
2,40.0,1566,This is from one of my first Good House Keepi...,Beverages,"[Low Protein, Low Cholesterol, Healthy, Summer...","[1 1⁄2, 1, None, 1 1⁄2, None, 3⁄4]","[sugar, lemons, rind of, lemon, zest of, fresh...",10.0,"[Into a 1 quart Jar with tight fitting lid, pu...",https://www.food.com/recipe/Best-Lemonade-40,1999,9,5,19,35,4.33,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,77.78,0.05,0.0,0.0,0.45,20.38,0.1,19.3,0.08,1.0
3,41.0,1586,This dish is best prepared a day in advance to...,Vegetarian/Vegan,"[Beans, Vegetable, Low Cholesterol, Weeknight,...","[12, 1, 2, 1, 10, 1, 3, 2, 2, 2, 1, 2, 1⁄2, 1⁄...","[extra firm tofu, eggplant, zucchini, mushroom...",2.0,"[Drain the tofu, carefully squeezing out exces...",https://www.food.com/recipe/Carina's-Tofu-Vege...,1999,9,3,14,260,4.5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,268.05,12.0,1.9,0.0,779.3,32.1,8.65,16.05,14.65,1.0
4,42.0,1538,Make and share this Cabbage Soup recipe from F...,Vegetables,"[Low Protein, Vegan, Low Cholesterol, Healthy,...","[46, 4, 1, 2, 1]","[plain tomato juice, cabbage, onion, carrots, ...",11.0,"[Mix everything together and bring to a boil.,...",https://www.food.com/recipe/Cabbage-Soup-42,1999,9,19,6,50,2.67,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,25.9,0.1,0.02,0.0,239.82,6.28,1.2,4.42,1.08,1.0


In [30]:
recipes_no_null = recipes_no_null.reset_index(drop=True)

## Recommender System

We can implement the recommender system using a pipeline. Here's an example of how you can use scikit-learn's pipeline to build the recommender system:

In [44]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler

df = recipes_no_null.copy()

# Select the columns used for similarity calculation
selected_columns = ['Calories', 'FatContent', 'SaturatedFatContent', 'CholesterolContent',
                    'SodiumContent', 'CarbohydrateContent', 'FiberContent', 'SugarContent',
                    'ProteinContent', 'CorrectAggregatedRating']

# Define the pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Step 1: Scale the selected features
    ('knn', NearestNeighbors(metric='euclidean'))  # Step 2: Apply the KNN model with cosine similarity metric
])

# Fit the pipeline
scaled_features = pipeline['scaler'].fit_transform(df[selected_columns].values)
pipeline['knn'].fit(scaled_features)

# Function to get similar recipes
def get_similar_recipes(recipe_id, top_n=20):
    recipe_index = df[df['RecipeId'] == recipe_id].index[0]
    recipe_features = df.loc[recipe_index, selected_columns].values.reshape(1, -1)
    recipe_features_scaled = pipeline['scaler'].transform(recipe_features)
    _, top_indices = pipeline['knn'].kneighbors(recipe_features_scaled, n_neighbors=top_n+1)
    top_indices = top_indices.flatten()[1:]
    top_recipe_ids = df.loc[top_indices, 'RecipeId'].values
    top_recipes = df[df['RecipeId'].isin(top_recipe_ids)]
    return top_recipes

# Example usage:
recipe_id = 4662  # Replace with the actual RecipeId you want recommendations for
top_similar_recipes = get_similar_recipes(recipe_id, top_n=20)
pd.concat([df[df['RecipeId'] == recipe_id],get_similar_recipes(recipe_id, top_n=20)])

Unnamed: 0,RecipeId,AuthorId,Description,RecipeCategory,Keywords,RecipeIngredientQuantities,RecipeIngredientParts,ReviewCount,RecipeInstructions,url,YearPublished,MonthPublished,DayPublished,HourPublished,TotalMinutes,CorrectAggregatedRating,"(Baked Goods,)","(Beans/Legumes,)","(Beverages,)","(Breakfast/Brunch,)","(Chicken,)","(Cooking Methods,)","(Dairy,)","(Desserts,)","(Family-Friendly,)","(Flavor Profiles,)","(Fruit,)","(Healthy,)","(International,)","(Lunch/Snacks,)","(Main Dish,)","(Meat,)","(Nuts/Seeds/Grains,)","(Occasions,)","(Pasta,)","(Poultry,)","(Quick and Easy,)","(Regional,)","(Sauces/Condiments,)","(Seafood,)","(Seasonal,)","(Side Dishes,)","(Soups,)","(Special Dietary Needs,)","(Uncategorized,)","(Vegetables,)","(Vegetarian/Vegan,)",Calories,FatContent,SaturatedFatContent,CholesterolContent,SodiumContent,CarbohydrateContent,FiberContent,SugarContent,ProteinContent,RecipeServings
1422,4662.0,1925,Make and share this Baked Tomatoes with Orzo a...,Vegetables,"[European, Low Cholesterol, Healthy, < 15 Mins...","[4, 3, 1⁄4, 1, 3, 3, 1, 1⁄4]","[tomatoes, orzo pasta, garlic, basil, parsley,...",2.0,"[Preheat oven to 350 degrees or grill, Halve a...",https://www.food.com/recipe/Baked-Tomatoes-wit...,1999,11,17,17,0,4.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,36.4,0.61,0.14,0.35,11.54,6.41,0.41,0.45,1.3,1.0
5459,14083.0,20754,Make and share this Toasted Swiss & Ham Sandwi...,Lunch/Snacks,"[Pork, Meat, Broil/Grill, < 30 Mins, Oven]","[1, 1, 8, 1⁄4, 1⁄2, 4]","[Land o' Lakes Butter, fresh mushrooms, Alpine...",2.0,[Melt butter until sizzling in 10-inch skillet...,https://www.food.com/recipe/Toasted-Swiss-&-Ha...,2001,11,11,10,30,4.0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,38.89,0.34,0.08,0.0,72.75,7.45,0.35,0.56,1.6,1.0
14656,33817.0,41087,Make and share this Linguine salad recipe from...,Quick and Easy,[Easy],"[1, 1 -16, 2, 2, 2, 1, None]","[linguine, tomatoes, cucumbers, onion]",1.0,"[Cook noodles as directed on package, drain co...",https://www.food.com/recipe/Linguine-salad-33817,2002,7,11,19,40,4.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,30.81,0.25,0.05,0.0,8.14,6.1,0.35,0.52,1.05,1.0
21221,50487.0,57828,"My brother, the carnivore, gave this his seal ...",Main Dish,"[Vegetable, Low Cholesterol, Healthy, Kosher, ...","[2, 12, 2, 1⁄4, 1, 1, 1, None, 1]","[olive oil, mushrooms, flour, scallion, milk, ...",2.0,[Wash and chop mushrooms (I buy the presliced ...,https://www.food.com/recipe/Pasta-With-Mushroo...,2003,1,8,20,30,4.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,35.86,0.7,0.18,0.54,11.44,6.15,0.36,0.38,1.29,1.0
32344,78810.0,68460,Make and share this Sweet and Sour Broccoli Pa...,Lunch/Snacks,"[Apple, Fruit, Vegetable, Potluck, < 30 Mins, ...","[8, 2, 2⁄3, 1, 1⁄3, 1⁄3, 3, 1, 1, 1, 1⁄2, None]","[pasta, broccoli florets, carrot, Red Deliciou...",1.0,"[Cook pasta per package directions, omitting s...",https://www.food.com/recipe/Sweet-and-Sour-Bro...,2003,12,13,20,30,4.0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,34.22,0.52,0.08,0.05,9.38,6.32,0.37,1.2,1.12,1.0
33534,82296.0,121950,Make and share this Orzo with Spinach and Asia...,Vegetables,"[Cheese, Greens, Vegetable, < 30 Mins, Easy]","[3⁄4, 2, 1, 1, 1, 1, 1⁄4, 1⁄2, 1⁄4, None]","[orzo pasta, olive oil, onion, celery rib, gar...",2.0,[Cook the orzo according to package directions...,https://www.food.com/recipe/Orzo-with-Spinach-...,2004,1,28,19,25,4.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,37.48,0.72,0.1,0.0,4.03,6.52,0.38,0.35,1.23,1.0
37316,90719.0,118163,Versions of this dish - a relative to spanish ...,Nuts/Seeds/Grains,"[Rice, Vegetable, Mexican, Low Cholesterol, He...","[1, 1, 1⁄2, 2, 2, 2, 1⁄2, 3, 1, None]","[long-grain rice, chopped tomatoes, onion, gar...",2.0,[Put the rice in a large heatproof bowl and po...,https://www.food.com/recipe/Mexican-Rice-90719,2004,5,5,20,50,4.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,36.7,0.98,0.15,0.4,56.72,5.92,0.38,0.83,1.05,1.0
51338,121760.0,120566,Make and share this 4 Point Stove Top Macaroni...,Dairy,"[Cauliflower, Vegetable, < 30 Mins, Easy]","[4 1⁄2, 2, 1, 1⁄2, 1]","[elbow macaroni, alfredo sauce, Ww Vegetable W...",1.0,[Prepare macaroni according to package directi...,https://www.food.com/recipe/4-Point-Stove-Top-...,2005,5,12,14,20,4.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,33.7,0.18,0.02,0.0,12.8,6.75,0.45,0.32,1.27,1.0
52168,123944.0,119466,I find both cucumbers and fresh dill to be esp...,Vegetables,"[Low Protein, Low Cholesterol, Healthy, < 15 M...","[1 1⁄4, 1, 1⁄2, 1⁄4, 2, 2, 1, 1⁄2, 1⁄4, 1, 1⁄4...","[water, couscous, low-fat buttermilk, plain lo...",8.0,"[In a medium saucepan, bring water to a boil a...",https://www.food.com/recipe/Couscous-and-Cucum...,2005,5,30,8,15,4.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,27.85,0.5,0.1,0.23,38.32,4.93,0.43,0.75,0.98,1.0
59590,143081.0,214855,Make and share this Grated Apple Pikelets reci...,Breakfast/Brunch,"[Apple, Fruit, Australian, Kid Friendly, < 15 ...","[2⁄3, 1⁄4, 1, 1, 2⁄3, 1]","[self raising flour, bicarbonate of soda, Sple...",1.0,"[Combine flour, bicarb, 3 tsp splenda and 1/2 ...",https://www.food.com/recipe/Grated-Apple-Pikel...,2005,10,28,16,15,4.0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31.02,0.45,0.25,1.42,28.25,5.65,0.35,0.92,1.12,1.0
