# iFood Ideas

## Data Sources

https://www.kaggle.com/datasets/sarthak71/food-recipes


## Initial Setup

In [1]:
import pandas as pd
import numpy as np
import scipy as sp
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.stats.mstats import winsorize
from math import ceil
from datetime import timedelta
import os

import warnings
warnings.filterwarnings('ignore')

from matplotlib.colors import LinearSegmentedColormap

import json
from ast import literal_eval

from sklearn import tree
from sklearn.naive_bayes import CategoricalNB
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn import preprocessing

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
## Some Constants

RANDOM_STATE = 0

## Data

In [3]:
fulldata = pd.read_csv("food_recipes.csv", )
fulldata.shape

(8009, 16)

In [4]:
data = pd.read_csv("food_recipes.csv", 
                   nrows=4000
                  )
#data.head(3)

In [5]:
data.drop(columns=['url', 'record_health', 'vote_count', 'author'], inplace=True)
data.dropna(inplace=True)
data.drop_duplicates(keep='first', inplace=True)
data.reset_index(inplace=True, drop=True)

data['ingredients_xpl'] = data['ingredients']
data['ingredients'] = data['ingredients'].apply(lambda x: x.replace('|', ' '))

data['tags'] = data['tags'].apply(lambda x: x.replace('|', ' '))
data['prep_time'] = data['prep_time'].str.split(' ').str[0].astype(int)
data['cook_time'] = data['cook_time'].str.split(' ').str[0].astype(int)


In [6]:
#data.head(3)


In [7]:
## Create ingredients index with ingredients exploded vs recipe id

ingredients_index = data.loc[:,['ingredients_xpl']]
ingredients_index['ingredients'] = ingredients_index['ingredients_xpl'].apply(lambda x: x.split('|')).apply(lambda x:[(str.lower(i)) for i in x])
ingredients_index.drop(columns=['ingredients_xpl'], inplace=True)

ingredients_index['recipe_id'] = data.index
#ingredients_index['recipe_name'] = data['recipe_title']
ingredients_index = ingredients_index.explode('ingredients').reset_index(drop=True)

ingredients_index.head(2)


Unnamed: 0,ingredients,recipe_id
0,tortillas,0
1,extra virgin olive oil,0


In [8]:
def combined_features(row):
    combi = row['course']+" "+row['cuisine']+" "+row['diet']+" "+row['ingredients']+" "+row['tags']+" "+row['category']
    combi.replace(" Recipes", "")
    return combi

data["combined_features"] = data.apply(combined_features, axis =1)

In [9]:
## Get cosine similarity matrix

cv = CountVectorizer()
count_matrix = cv.fit_transform(data["combined_features"])

cosine_sim_df = pd.DataFrame(cosine_similarity(count_matrix))
#cosine_sim_df

In [10]:
def make_corr_heatmap(df, title="Cosine Similarity Heatmap"):
    l = len(df.columns.tolist())
    fig, ax = plt.subplots(figsize=(.75*l, .4*l))
    mask = np.triu(np.ones_like(df, dtype=bool))
    heatmap = sns.heatmap(df, mask=mask, vmin=0, vmax=1, annot=False)# ,cmap=DIV_CMAP)

    plt.show()

In [11]:
## This takes long to generate for large df
#make_corr_heatmap(cosine_sim_df)


In [12]:
def get_recipe_id_from_ingredients(df, ing):
    """
    df = ingredient_index
    ing = list of ingredients to look up
    """
    rid = df.loc[df['ingredients'].isin(ing),:]
    
    return rid
get_recipe_id_from_ingredients(ingredients_index, ['onion', 'salt'])

Unnamed: 0,ingredients,recipe_id
16,salt,1
25,salt,2
30,salt,3
48,salt,4
63,salt,5
...,...,...
44595,onion,3867
44611,onion,3868
44617,salt,3868
44632,onion,3870


In [13]:
def get_recipe_from_index(df, i):
    return df.iloc[i,:]

get_recipe_from_index(data,2493)

recipe_title         Whole Wheat Ragi Chocolate Chip Cookie Pizza R...
rating                                                        4.955381
description          Whole Wheat Ragi Chocolate Chip Cookie Pizza R...
cuisine                                                       American
course                                                         Dessert
diet                                                        Vegetarian
prep_time                                                           10
cook_time                                                           30
ingredients          Whole Wheat Flour Ragi Flour (Finger Millet/ N...
instructions         To begin making Whole Wheat Ragi Chocolate Chi...
tags                 Whole Wheat Recipes Tea Party Recipes Chocolat...
category                                      Cookie & Biscuit Recipes
ingredients_xpl      Whole Wheat Flour|Ragi Flour (Finger Millet/ N...
combined_features    Dessert American Vegetarian Whole Wheat Flour ...
Name: 

In [14]:
def get_top_similar(simx, k, i, df):
    """
    simx = similarity matrix
    k = number of results to return
    i = index of recipe to compare
    """
    similar_recipes = list(enumerate(simx[i]))
    sorted_similar = sorted(similar_recipes, key=lambda x:x[1], reverse=True)
    top_k = sorted_similar[1:k+1]
    
    
    top_k_df = pd.DataFrame(columns=df.columns)
    top_k_scores = []
    top_k_index = []
    for i in top_k:
        top_k_df = top_k_df.append(get_recipe_from_index(df,i[0]), ignore_index=True)
        top_k_scores.append(i[1])
        top_k_index.append(i[0])

    top_k_df['Score'] = top_k_scores
    top_k_df['id'] = top_k_index

    return top_k_df
    
    

In [15]:

get_top_similar(cosine_sim_df, 5, 2493, data)



Unnamed: 0,recipe_title,rating,description,cuisine,course,diet,prep_time,cook_time,ingredients,instructions,tags,category,ingredients_xpl,combined_features,Score,id
0,Quick and Easy Eggless Whole Wheat Chocolate C...,4.871935,"The is the most delicious feather like cake,...",Continental,Dessert,Vegetarian,0,40,Whole Wheat Flour Cocoa Powder Baking soda Oil...,To begin making the Whole Wheat Chocolate Cupc...,Party Food Recipes Kids Lunch Box Recipes Choc...,Cake Recipes,Whole Wheat Flour|Cocoa Powder|Baking soda|Oil...,Dessert Continental Vegetarian Whole Wheat Flo...,0.80353,689
1,Banana Date Chocolate Chip Cake Recipe,4.924005,Banana Date Chocolate Chip Cake Recipe is the ...,Continental,Dessert,Eggetarian,10,45,Nutralite Classic Spread Brown Sugar (Demerara...,To begin making the Banana Date Chocolate Chip...,Tea Party Recipes Chocolate Recipes Millet Rec...,Cake Recipes,Nutralite Classic Spread|Brown Sugar (Demerara...,Dessert Continental Eggetarian Nutralite Class...,0.797749,2618
2,Eggless Chocolate Chip And Honey Cookies Recipe,4.866322,Eggless Chocolate Chip And Honey Cookies is a ...,Continental,Snack,Vegetarian,20,25,All Purpose Flour (Maida) Caster Sugar Honey V...,To begin making the Eggless Chocolate Chip And...,Tea Party Recipes Chocolate Recipes Eggless Ca...,Cookie & Biscuit Recipes,All Purpose Flour (Maida)|Caster Sugar|Honey|V...,Snack Continental Vegetarian All Purpose Flour...,0.780401,1820
3,Eggless Cranberry Pistachio Biscotti Recipe,4.790036,Cranberry Pistachio Biscotti Recipe is one of ...,French,Dessert,Vegetarian,60,35,All Purpose Flour (Maida) Baking powder Oil Su...,To begin making the Cranberry Pistachio Biscot...,Tea Party Recipes Healthy Kids Snack Recipes E...,Cookie & Biscuit Recipes,All Purpose Flour (Maida)|Baking powder|Oil|Su...,Dessert French Vegetarian All Purpose Flour (M...,0.776096,242
4,Ragi & Whole Wheat Halwa Recipe - Finger Mille...,4.894584,Ragi Halwa or Finger Millet Pudding is a delic...,Indian,Dessert,Vegetarian,10,30,Whole Wheat Flour Ragi Flour (Finger Millet/ N...,"To make the Ragi Halwa Recipe, melt the ghee i...",High Protein Vegetarian Diet Recipes Whole Whe...,Sweet Recipes (Indian Mithai / Indian Dessert),Whole Wheat Flour|Ragi Flour (Finger Millet/ N...,Dessert Indian Vegetarian Whole Wheat Flour Ra...,0.772011,1155


In [16]:
#data.loc[data['cuisine']=='American',:]