<a href="https://colab.research.google.com/github/kavyajeetbora/recipe_recommender/blob/master/analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from glob import glob
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import streamlit as st

In [4]:
files = glob(r"data\*.parquet")
df = pd.read_parquet(files)
df['steps'] = df['steps'].str.strip("[]")
df.iloc[0]['steps']

nan

In [5]:
df['name']

0         arriba   baked winter squash mexican style
1                   a bit different  breakfast pizza
2                          all in the kitchen  chili
3                                 alouette  potatoes
4                 amish  tomato ketchup  for canning
                             ...                    
231632        egyptian slow cooked eggs  beid hamine
231633                  egyptian spiced carrot puree
231634                        egyptian spiced prawns
231635     egyptian spicy meat pie in a phyllo crust
231636                       egyptian spinach omelet
Name: name, Length: 231637, dtype: object

In [6]:
files = glob(r"data\*.parquet")
df = pd.read_parquet(files)
df['ingredients'] = df['ingredients'].str.strip("[]").str.replace("'","").str.replace('"',"").str.split("',").apply(lambda x: [y.strip() for y in x])
print("Shape of the dataframe",df.shape)
df.head(3)

Shape of the dataframe (231637, 15)


Unnamed: 0,name,minutes,n_steps,steps,description,ingredients,n_ingredients,calories,total fat (PDV),sugar (PDV),sodium (PDV),protein (PDV),saturated fat (PDV),carbohydrates (PDV),embedding
0,arriba baked winter squash mexican style,55,11,"[make a choice and proceed with recipe, depend...",autumn is my favorite time of year to cook! th...,"[winter squash, mexican seasoning, mixed spice...",7,51.5,0.0,13.0,0.0,2.0,0.0,4.0,"[-0.008845049, 0.009866926, 0.028063796, 0.101..."
1,a bit different breakfast pizza,30,9,"[preheat oven to 425 degrees f, press dough in...",this recipe calls for the crust to be prebaked...,"[prepared pizza crust, sausage patty, eggs, mi...",6,173.4,18.0,0.0,17.0,22.0,35.0,1.0,"[-0.054575536, 0.027983457, 0.065263726, 0.032..."
2,all in the kitchen chili,130,6,"[brown ground beef in large pot, add chopped o...",this modified version of 'mom's' chili was a h...,"[ground beef, yellow onions, diced tomatoes, t...",13,269.8,22.0,32.0,48.0,39.0,27.0,5.0,"[-0.10401253, -0.024388006, 0.06653514, 0.0346..."


In [7]:
def cosine_similarity(vec1,vec2):
    '''
    Returns the cosine similarity between two vectors of n dimension
    '''
    denom = np.sqrt(np.sum(np.square(vec1))) * np.sqrt(np.sum(np.square(vec2)))
    return np.round(np.dot(vec1,vec2) / denom * 100, 2)

In [8]:
index = 999
data = df.iloc[index]
recipe, vector = data['name'], data['embedding']

print("Name of the dish:", recipe)

Name of the dish: 1890 cream cake


In [9]:
%%time

df_result = df.copy()
df_result['similarity'] = df_result['embedding'].apply(lambda x : cosine_similarity(vector, x))
df_result.drop('embedding', axis=1, inplace=True)
df_result.sort_values(by="similarity", ascending=False).iloc[1:4]

CPU times: total: 3.98 s
Wall time: 3.97 s


Unnamed: 0,name,minutes,n_steps,steps,description,ingredients,n_ingredients,calories,total fat (PDV),sugar (PDV),sodium (PDV),protein (PDV),saturated fat (PDV),carbohydrates (PDV),similarity
170691,white sponge cake,70,21,"[preheat oven to 400 degrees f, grease 3- 12 i...",nice light white cake. delicious served filled...,"[egg yolk, eggs, sugar, salt, baking powder, c...",10,584.2,30.0,168.0,26.0,24.0,13.0,29.0,93.06
3952,almond torta,90,17,"[preheat oven to 325 f, butter and flour a 10 ...",posted for zaar world tour 2005. recipe sourc...,"[almonds, flour, salt, egg yolks, amaretto, va...",11,205.3,17.0,48.0,5.0,15.0,7.0,6.0,92.76
136066,south african beesting cake with custard filling,105,24,"[preheat oven to 350 deg f / 180 deg celsius, ...","i do not know where the ""bee-sting"" comes from...","[flour, baking powder, salt, butter, superfine...",13,500.9,49.0,83.0,15.0,16.0,83.0,15.0,92.58


In [10]:
df_result.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 231637 entries, 0 to 231636
Data columns (total 15 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   name                 231636 non-null  object 
 1   minutes              231637 non-null  int64  
 2   n_steps              231637 non-null  int64  
 3   steps                231637 non-null  object 
 4   description          226658 non-null  object 
 5   ingredients          231637 non-null  object 
 6   n_ingredients        231637 non-null  int64  
 7   calories             231637 non-null  float64
 8   total fat (PDV)      231637 non-null  float64
 9   sugar (PDV)          231637 non-null  float64
 10  sodium (PDV)         231637 non-null  float64
 11  protein (PDV)        231637 non-null  float64
 12  saturated fat (PDV)  231637 non-null  float64
 13  carbohydrates (PDV)  231637 non-null  float64
 14  similarity           231637 non-null  float64
dtypes: float64(8), in

## Plotting the nutrition values

In [11]:
def setColor(pdv):
    if pdv<5:
        return '#8ADAB2'
    
    elif pdv>=5 and pdv<20:
        return '#D0F288'
    
    elif pdv>20:
        return "#DF826C"
    
def plot_nutrition(data):
    
    x = data.index[8:13]
    y = data.values[8:13]

    fig = go.Figure(
        go.Bar(
            name="",
            x = x,
            y = y,
            width = 0.2,
            uirevision = True,
            marker=dict(color = list(map(setColor,y))),
            hovertemplate =
                '<br><b>%{x}</b>: %{y:.2f}'
            ),
            
    )
    fig.update_layout(
        template="plotly_dark",
        margin=dict(l=20, r=20, t=20, b=20)
    )
    fig.update_xaxes(
        showgrid=False,
    )
    # fig.update_yaxes(
    #     showgrid=False,
    #     showticklabels=False
    # )
    fig.layout.xaxis.fixedrange = True
    fig.layout.yaxis.fixedrange = True

    return fig

In [12]:
data.index

Index(['name', 'minutes', 'n_steps', 'steps', 'description', 'ingredients',
       'n_ingredients', 'calories', 'total fat (PDV)', 'sugar (PDV)',
       'sodium (PDV)', 'protein (PDV)', 'saturated fat (PDV)',
       'carbohydrates (PDV)', 'embedding'],
      dtype='object')

In [13]:
fig = plot_nutrition(df_result.iloc[3])
fig.show()

In [15]:
pd.read_parquet(r'https://github.com/kavyajeetbora/recipe_recommender/raw/master/data/recipes_0.parquet')

Unnamed: 0,name,minutes,n_steps,steps,description,ingredients,n_ingredients,calories,total fat (PDV),sugar (PDV),sodium (PDV),protein (PDV),saturated fat (PDV),carbohydrates (PDV),embedding
0,arriba baked winter squash mexican style,55,11,"[make a choice and proceed with recipe, depend...",autumn is my favorite time of year to cook! th...,"winter squash, mexican seasoning, mixed spice,...",7,51.5,0.0,13.0,0.0,2.0,0.0,4.0,"[-0.008845049, 0.009866926, 0.028063796, 0.101..."
1,a bit different breakfast pizza,30,9,"[preheat oven to 425 degrees f, press dough in...",this recipe calls for the crust to be prebaked...,"prepared pizza crust, sausage patty, eggs, mil...",6,173.4,18.0,0.0,17.0,22.0,35.0,1.0,"[-0.054575536, 0.027983457, 0.065263726, 0.032..."
2,all in the kitchen chili,130,6,"[brown ground beef in large pot, add chopped o...",this modified version of 'mom's' chili was a h...,"ground beef, yellow onions, diced tomatoes, to...",13,269.8,22.0,32.0,48.0,39.0,27.0,5.0,"[-0.10401253, -0.024388006, 0.06653514, 0.0346..."
3,alouette potatoes,45,11,[place potatoes in a large pot of lightly salt...,"this is a super easy, great tasting, make ahea...","spreadable cheese with garlic and herbs, new p...",11,368.1,17.0,10.0,2.0,14.0,8.0,20.0,"[-0.048682474, -0.04654153, -0.013404117, 0.01..."
4,amish tomato ketchup for canning,190,5,"[mix all ingredients& boil for 2 1 / 2 hours ,...",my dh's amish mother raised him on this recipe...,"tomato juice, apple cider vinegar, sugar, salt...",8,352.9,1.0,337.0,23.0,3.0,0.0,28.0,"[-0.06817169, 0.030255292, 0.026339032, 0.0327..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7982,apricot and raisin fruit bars,80,16,"[preheat oven to 350f, cut butter in pieces an...",delicious and easy fruit bars that make good u...,"butter, flour, powdered sugar, salt, dried apr...",11,2693.9,208.0,846.0,78.0,82.0,339.0,116.0,"[-0.037797265, -0.0024575847, 0.050846573, 0.0..."
7983,apricot and sweet chilli chicken hot pot,35,6,[heat oil in a large saucepan and cook chicken...,"i found this recipe from campbells, a great qu...","oil, chicken breast fillets, chicken stock, dr...",7,457.3,17.0,93.0,27.0,97.0,11.0,13.0,"[-0.061615206, -0.0468792, 0.05577014, 0.08877..."
7984,apricot and tarragon baby carrots,17,4,"[steam or boil carrots until fork-tender, drai...",i serve these carrots often.,"baby carrots, unsalted butter, salt, apricot p...",6,105.3,4.0,53.0,9.0,1.0,9.0,6.0,"[-0.045175068, 0.03886988, 0.01198832, 0.04431..."
7985,apricot angel brownies,35,8,"[preheat oven to 350 degrees f , then grease a...",here's something a bit different for the brown...,"white chocolate baking squares, unsalted butte...",11,108.1,8.0,42.0,1.0,3.0,14.0,4.0,"[0.024234097, -0.00023074009, 0.051806446, 0.0..."
