In [1]:
import numpy as np, pandas as pd

In [2]:
df = pd.read_csv('indian_food.csv')

In [3]:
df.head()

Unnamed: 0,id,Dish,ingredients,diet,prep_time,cook_time,flavor_profile,course,state,region
0,1,Balu shahi,"Maida flour, yogurt, oil, sugar",vegetarian,45,25,sweet,dessert,West Bengal,East
1,2,Boondi,"Gram flour, ghee, sugar",vegetarian,80,30,sweet,dessert,Rajasthan,West
2,3,Gajar ka halwa,"Carrots, milk, sugar, ghee, cashews, raisins",vegetarian,15,60,sweet,dessert,Punjab,North
3,4,Ghevar,"Flour, ghee, kewra, milk, clarified butter, su...",vegetarian,15,30,sweet,dessert,Rajasthan,West
4,5,Gulab jamun,"Milk powder, plain flour, baking powder, ghee,...",vegetarian,15,40,sweet,dessert,West Bengal,East


In [4]:
df = df[['id','Dish','ingredients']]

In [5]:
df.head()

Unnamed: 0,id,Dish,ingredients
0,1,Balu shahi,"Maida flour, yogurt, oil, sugar"
1,2,Boondi,"Gram flour, ghee, sugar"
2,3,Gajar ka halwa,"Carrots, milk, sugar, ghee, cashews, raisins"
3,4,Ghevar,"Flour, ghee, kewra, milk, clarified butter, su..."
4,5,Gulab jamun,"Milk powder, plain flour, baking powder, ghee,..."


In [6]:
df.to_csv('recipe.csv')

In [7]:
len(df)

255

In [8]:
df.isnull().sum()

id             0
Dish           0
ingredients    0
dtype: int64

In [9]:
df.duplicated().sum()

0

In [10]:
df['Dish'] = df['Dish'].apply(lambda x:x.lower())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [11]:
df.head(50)

Unnamed: 0,id,Dish,ingredients
0,1,balu shahi,"Maida flour, yogurt, oil, sugar"
1,2,boondi,"Gram flour, ghee, sugar"
2,3,gajar ka halwa,"Carrots, milk, sugar, ghee, cashews, raisins"
3,4,ghevar,"Flour, ghee, kewra, milk, clarified butter, su..."
4,5,gulab jamun,"Milk powder, plain flour, baking powder, ghee,..."
5,6,imarti,"Sugar syrup, lentil flour"
6,7,jalebi,"Maida, corn flour, baking soda, vinegar, curd,..."
7,8,kaju katli,"Cashews, ghee, cardamom, sugar"
8,9,kalakand,"Milk, cottage cheese, sugar"
9,10,kheer,"Milk, rice, sugar, dried fruits"


In [12]:
import nltk
from nltk.stem.porter import PorterStemmer
p = PorterStemmer()

In [13]:
# define a function for stemming
def stem(text):
    y=[]
    for i in text.split():
        y.append(p.stem(i))
    return ' '.join(y)

In [14]:
df['Dish']=df['Dish'].apply(stem)

In [15]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features = 3000,stop_words = 'english')

In [16]:
vectors = cv.fit_transform(df['Dish']).toarray()

In [17]:
vectors

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [18]:
cv.get_feature_names()



['adhirasam',
 'aloo',
 'alu',
 'amti',
 'anarsa',
 'appam',
 'ariselu',
 'attu',
 'avial',
 'baati',
 'bajri',
 'balu',
 'bandar',
 'basundi',
 'bath',
 'bati',
 'bebinca',
 'beef',
 'bele',
 'bengena',
 'bhaji',
 'bhakri',
 'bhape',
 'bharta',
 'bhatur',
 'bhatura',
 'bhindi',
 'bilahi',
 'biryani',
 'bisi',
 'black',
 'bombil',
 'boondi',
 'bora',
 'brown',
 'butter',
 'chak',
 'chakali',
 'cham',
 'chana',
 'chapati',
 'chaval',
 'cheera',
 'chevdo',
 'chhena',
 'chholar',
 'chicken',
 'chikki',
 'chingri',
 'chokha',
 'chole',
 'chorafali',
 'churma',
 'coconut',
 'copra',
 'curri',
 'currivepillai',
 'da',
 'daal',
 'dabe',
 'dahi',
 'dal',
 'dalithoy',
 'dharwad',
 'dhokla',
 'dhokli',
 'dhonda',
 'di',
 'doi',
 'dom',
 'doodhpak',
 'dosa',
 'doubl',
 'duck',
 'dudhi',
 'dum',
 'falooda',
 'fara',
 'farsi',
 'feni',
 'fish',
 'fri',
 'gajar',
 'galho',
 'gatta',
 'gavvalu',
 'gheela',
 'ghevar',
 'ghooghra',
 'gobi',
 'goja',
 'gota',
 'gud',
 'gulab',
 'guri',
 'halvasan',
 'ha

In [19]:
from sklearn.metrics.pairwise import cosine_similarity
similarity=cosine_similarity(vectors)

In [26]:
def recommend(recipe):
    recipe_index = df[df['Dish']== recipe].index[0]
    distances = similarity[recipe_index]
    recipe_list = sorted(list(enumerate(distances)),reverse=True,key=lambda x:x[1])[1:4]
    
    
    for i in recipe_list:
        print(df.iloc[i[0]].Dish),
        print(df.iloc[i[0]].ingredients)

In [27]:
recommend('kalakand')

balu shahi
Maida flour, yogurt, oil, sugar
boondi
Gram flour, ghee, sugar
gajar ka halwa
Carrots, milk, sugar, ghee, cashews, raisins
