In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import statistics # for variance
from scipy import stats # calculate trimmed mean

pd.set_option('display.max_rows', 426)

na_vals = ['-1', 1]
indian_food = pd.read_csv("indian_food.csv", na_values=na_vals)
indian_food['ingredients'] = indian_food['ingredients'].apply(lambda x: x.lower())

In [2]:
indian_food['num_ingr'] = indian_food['ingredients'].apply(lambda x: len(x.split(',')))

In [16]:
indian_food.shape

(255, 10)

In [3]:
indian_food.head(2)

Unnamed: 0,name,ingredients,diet,prep_time,cook_time,flavor_profile,course,state,region,num_ingr
0,Balu shahi,"maida flour, yogurt, oil, sugar",vegetarian,45.0,25.0,sweet,dessert,West Bengal,East,4
1,Boondi,"gram flour, ghee, sugar",vegetarian,80.0,30.0,sweet,dessert,Rajasthan,West,3


In [20]:
fig = px.scatter(indian_food, x="prep_time", y='cook_time', color="course", symbol="course")
fig.update_traces(marker_size=10)
fig.show()

In [23]:
indian_food.sort_values('prep_time')

Unnamed: 0,name,ingredients,diet,prep_time,cook_time,flavor_profile,course,state,region,num_ingr
214,Khichu,"rice flour, sesame seeds, baking soda, peanut oil",vegetarian,5.0,10.0,spicy,snack,Gujarat,West,4
38,Bandar laddu,"besan, jaggery, cardamom powder, ghee, cashews...",vegetarian,5.0,35.0,sweet,dessert,Andhra Pradesh,South,7
111,Papad,"urad dal, sev, lemon juice, chopped tomatoes",vegetarian,5.0,5.0,spicy,snack,,,4
70,Aloo matar,"potato, peas, chillies, ginger, garam masala, ...",vegetarian,5.0,40.0,spicy,main course,Punjab,North,6
45,Mysore pak,"besan flour, semolina, mung bean, jaggery, coc...",vegetarian,5.0,20.0,sweet,dessert,Karnataka,South,8
180,Dalithoy,"arhar dal, coconut oil, curry leaves, mustard ...",vegetarian,5.0,20.0,,main course,Maharashtra,West,5
175,Chorafali,"urad dal, bengal gram flour, dried mango, baki...",vegetarian,5.0,15.0,spicy,snack,Gujarat,West,5
49,Pongal,"rice, jaggery, cashews, ghee",vegetarian,5.0,20.0,sweet,dessert,Tamil Nadu,South,4
197,Mohanthal,"rose water, pistachio, badam, bengal gram flou...",vegetarian,5.0,45.0,sweet,dessert,Gujarat,West,5
143,Kuzhambu,"pearl onions, urad dal, drumsticks, tomato, cu...",vegetarian,5.0,30.0,spicy,main course,Tamil Nadu,South,5


#### Correcting preparation times

The correction is based on the webpages indicated by the author of the database. If the dish is not founded in any of them, the preparation time will be search on internet.

https://www.kaggle.com/nehaprabhavalkar/indian-food-101

https://hebbarskitchen.com/  
https://www.archanaskitchen.com/

In [4]:
outlier_prep = indian_food[(indian_food['prep_time'] > 45)]

In [5]:
outlier_prep.shape

(22, 10)

In [6]:
outlier_prep.sort_values('prep_time', ascending=False)

Unnamed: 0,name,ingredients,diet,prep_time,cook_time,flavor_profile,course,state,region,num_ingr
114,Pindi chana,"fennel, tea bags, tomato, kasuri methi, cinnamon",vegetarian,500.0,120.0,spicy,main course,Punjab,North,5
155,Puttu,"brown rice flour, sugar, grated coconut",vegetarian,495.0,40.0,,main course,Kerala,South,3
29,Misti doi,"milk, jaggery",vegetarian,480.0,30.0,sweet,dessert,West Bengal,East,2
144,Masala Dosa,"chana dal, urad dal, potatoes, idli rice, thic...",vegetarian,360.0,90.0,spicy,snack,,South,5
130,Idli,"split urad dal, urad dal, idli rice, thick poh...",vegetarian,360.0,90.0,spicy,snack,,South,5
128,Dosa,"chana dal, urad dal, whole urad dal, blend ric...",vegetarian,360.0,90.0,spicy,snack,,South,5
123,Tandoori Fish Tikka,"chickpea flour, biryani masala powder, yogurt,...",non vegetarian,240.0,30.0,spicy,starter,Punjab,North,5
122,Tandoori Chicken,"greek yogurt, garam masala, kasuri methi, mari...",non vegetarian,240.0,30.0,spicy,main course,Punjab,North,5
66,Chak Hao Kheer,"rice, milk, sugar, cardamom",vegetarian,240.0,45.0,sweet,dessert,Manipur,North East,4
48,Poornalu,"chana dal, jaggery",vegetarian,240.0,60.0,sweet,dessert,Andhra Pradesh,South,2


In [7]:
#def add_col(df):
   # if outlier_prep['name'] == 'Misti doi':
        

SyntaxError: unexpected EOF while parsing (<ipython-input-7-3927f9df98b3>, line 3)

In [8]:
outlier_prep['prep_time_reason'] = 'soaking time'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [9]:
outlier_prep

Unnamed: 0,name,ingredients,diet,prep_time,cook_time,flavor_profile,course,state,region,num_ingr,prep_time_reason
1,Boondi,"gram flour, ghee, sugar",vegetarian,80.0,30.0,sweet,dessert,Rajasthan,West,3,soaking time
29,Misti doi,"milk, jaggery",vegetarian,480.0,30.0,sweet,dessert,West Bengal,East,2,soaking time
33,Ras malai,"chhena, reduced milk, pistachio",vegetarian,180.0,60.0,sweet,dessert,West Bengal,East,3,soaking time
46,Obbattu holige,"maida flour, turmeric, coconut, chickpeas, jag...",vegetarian,180.0,60.0,sweet,main course,Karnataka,South,7,soaking time
48,Poornalu,"chana dal, jaggery",vegetarian,240.0,60.0,sweet,dessert,Andhra Pradesh,South,2,soaking time
66,Chak Hao Kheer,"rice, milk, sugar, cardamom",vegetarian,240.0,45.0,sweet,dessert,Manipur,North East,4,soaking time
81,Chicken Tikka,"chicken, whole wheat bread, rice flour, garam ...",non vegetarian,120.0,45.0,spicy,starter,Punjab,North,5,soaking time
104,Naan,"whole wheat flour, honey, butter, garlic",vegetarian,60.0,30.0,,main course,Punjab,North,4,soaking time
114,Pindi chana,"fennel, tea bags, tomato, kasuri methi, cinnamon",vegetarian,500.0,120.0,spicy,main course,Punjab,North,5,soaking time
122,Tandoori Chicken,"greek yogurt, garam masala, kasuri methi, mari...",non vegetarian,240.0,30.0,spicy,main course,Punjab,North,5,soaking time


In [10]:
outlier_prep.at[29,'prep_time_reason']='other reason'
outlier_prep.at[33,'prep_time_reason']='other reason'
outlier_prep.at[129,'prep_time_reason']='other reason'

outlier_prep.at[123,'prep_time_reason']='marinating'
outlier_prep.at[122,'prep_time_reason']='marinating'
outlier_prep.at[81,'prep_time_reason']='marinating'
outlier_prep.at[46,'prep_time_reason']='dough preparation'
outlier_prep.at[157,'prep_time_reason']='dough preparation'
outlier_prep.at[1,'prep_time_reason']='dough preparation'
outlier_prep.at[104,'prep_time_reason']='dough preparation'

In [11]:
outlier_prep

Unnamed: 0,name,ingredients,diet,prep_time,cook_time,flavor_profile,course,state,region,num_ingr,prep_time_reason
1,Boondi,"gram flour, ghee, sugar",vegetarian,80.0,30.0,sweet,dessert,Rajasthan,West,3,dough preparation
29,Misti doi,"milk, jaggery",vegetarian,480.0,30.0,sweet,dessert,West Bengal,East,2,other reason
33,Ras malai,"chhena, reduced milk, pistachio",vegetarian,180.0,60.0,sweet,dessert,West Bengal,East,3,other reason
46,Obbattu holige,"maida flour, turmeric, coconut, chickpeas, jag...",vegetarian,180.0,60.0,sweet,main course,Karnataka,South,7,dough preparation
48,Poornalu,"chana dal, jaggery",vegetarian,240.0,60.0,sweet,dessert,Andhra Pradesh,South,2,soaking time
66,Chak Hao Kheer,"rice, milk, sugar, cardamom",vegetarian,240.0,45.0,sweet,dessert,Manipur,North East,4,soaking time
81,Chicken Tikka,"chicken, whole wheat bread, rice flour, garam ...",non vegetarian,120.0,45.0,spicy,starter,Punjab,North,5,marinating
104,Naan,"whole wheat flour, honey, butter, garlic",vegetarian,60.0,30.0,,main course,Punjab,North,4,dough preparation
114,Pindi chana,"fennel, tea bags, tomato, kasuri methi, cinnamon",vegetarian,500.0,120.0,spicy,main course,Punjab,North,5,soaking time
122,Tandoori Chicken,"greek yogurt, garam masala, kasuri methi, mari...",non vegetarian,240.0,30.0,spicy,main course,Punjab,North,5,marinating


In [15]:
import plotly.express as px

fig = px.scatter(outlier_prep, x="prep_time", y='name', color="prep_time_reason", symbol="prep_time_reason")
fig.update_traces(marker_size=10)
fig.show()

29 Misti doi 10 min instead 480

In [None]:
indian_food[(indian_food['prep_time'] >= 240)] #overnight soaking, fermentation, marinating