In [402]:
import pandas as pd
import string

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [403]:
df = pd.read_csv('food-dataset.csv')
df.head()

Unnamed: 0,Food_ID,Name,C_Type,Veg_Non,Describe
0,1,summer squash salad,Healthy Food,veg,"white balsamic vinegar, lemon juice, lemon rin..."
1,2,chicken minced salad,Healthy Food,non-veg,"olive oil, chicken mince, garlic (minced), oni..."
2,3,sweet chilli almonds,Snack,veg,"almonds whole, egg white, curry leaves, salt, ..."
3,4,tricolour salad,Healthy Food,veg,"vinegar, honey/sugar, soy sauce, salt, garlic ..."
4,5,christmas cake,Dessert,veg,"christmas dry fruits (pre-soaked), orange zest..."


In [404]:
len(list(df['Name'].unique()))

400

In [405]:
df['C_Type'].unique()

array(['Healthy Food', 'Snack', 'Dessert', 'Japanese', 'Indian', 'French',
       'Mexican', 'Italian', 'Chinese', 'Beverage', 'Thai', 'Korean',
       ' Korean', 'Vietnames', 'Nepalese', 'Spanish'], dtype=object)

In [406]:
def text_cleaning(text):
    text  = "".join([char for char in text if char not in string.punctuation])    
    return text

In [407]:
df['Describe'] = df['Describe'].apply(text_cleaning)

In [408]:
df.head()

Unnamed: 0,Food_ID,Name,C_Type,Veg_Non,Describe
0,1,summer squash salad,Healthy Food,veg,white balsamic vinegar lemon juice lemon rind ...
1,2,chicken minced salad,Healthy Food,non-veg,olive oil chicken mince garlic minced onion sa...
2,3,sweet chilli almonds,Snack,veg,almonds whole egg white curry leaves salt suga...
3,4,tricolour salad,Healthy Food,veg,vinegar honeysugar soy sauce salt garlic clove...
4,5,christmas cake,Dessert,veg,christmas dry fruits presoaked orange zest lem...


In [409]:
rating = pd.read_csv('ratings.csv')
rating.head()

Unnamed: 0,User_ID,Food_ID,Rating
0,1.0,88.0,4.0
1,1.0,46.0,3.0
2,1.0,24.0,5.0
3,1.0,25.0,4.0
4,2.0,49.0,1.0


In [410]:
rating = rating[:511]
rating.tail()

Unnamed: 0,User_ID,Food_ID,Rating
506,99.0,65.0,7.0
507,99.0,22.0,1.0
508,100.0,24.0,10.0
509,100.0,233.0,10.0
510,100.0,29.0,7.0


In [411]:
average_ratings = rating.groupby('Food_ID')['Rating'].mean().reset_index()
average_ratings.columns = ['Food_ID', 'Average_Rating']

In [412]:
updated_food_data = pd.merge(df, average_ratings, on='Food_ID', how='left')
updated_food_data.head()

Unnamed: 0,Food_ID,Name,C_Type,Veg_Non,Describe,Average_Rating
0,1,summer squash salad,Healthy Food,veg,white balsamic vinegar lemon juice lemon rind ...,7.5
1,2,chicken minced salad,Healthy Food,non-veg,olive oil chicken mince garlic minced onion sa...,6.0
2,3,sweet chilli almonds,Snack,veg,almonds whole egg white curry leaves salt suga...,4.0
3,4,tricolour salad,Healthy Food,veg,vinegar honeysugar soy sauce salt garlic clove...,6.0
4,5,christmas cake,Dessert,veg,christmas dry fruits presoaked orange zest lem...,6.5


In [413]:
updated_food_data['Average_Rating'] = updated_food_data['Average_Rating'].fillna(updated_food_data['Average_Rating'].mean())

In [414]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['Describe'])
tfidf_matrix.shape

(400, 1261)

In [415]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
cosine_sim

array([[1.        , 0.16228366, 0.13001124, ..., 0.1286286 , 0.04277223,
        0.09993639],
       [0.16228366, 1.        , 0.06799336, ..., 0.14878001, 0.05688681,
        0.16917639],
       [0.13001124, 0.06799336, 1.        , ..., 0.03291577, 0.11795401,
        0.01834168],
       ...,
       [0.1286286 , 0.14878001, 0.03291577, ..., 1.        , 0.        ,
        0.10087579],
       [0.04277223, 0.05688681, 0.11795401, ..., 0.        , 1.        ,
        0.        ],
       [0.09993639, 0.16917639, 0.01834168, ..., 0.10087579, 0.        ,
        1.        ]])

In [416]:
indices = pd.Series(df.index, index=df['Name']).drop_duplicates()
indices

Name
summer squash salad                                          0
chicken minced salad                                         1
sweet chilli almonds                                         2
tricolour salad                                              3
christmas cake                                               4
                                                          ... 
Kimchi Toast                                               395
Tacos de Gobernador (Shrimp, Poblano, and Cheese Tacos)    396
Melted Broccoli Pasta With Capers and Anchovies            397
Lemon-Ginger Cake with Pistachios                          398
Rosemary Roasted Vegetables                                399
Length: 400, dtype: int64

In [417]:
def get_recommendations(title, cosine_sim=cosine_sim):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    sim_scores = sim_scores[1:6]
    
    food_indices = [i[0] for i in sim_scores]
    return df['Name'].iloc[food_indices]

In [418]:
features = ['C_Type','Veg_Non', 'Describe']

In [419]:
def create_soup(x):
    return x['C_Type'] + " " + x['Veg_Non'] + " " + x['Describe']

In [420]:
df['soup'] = df.apply(create_soup, axis=1)

In [421]:
df.head()

Unnamed: 0,Food_ID,Name,C_Type,Veg_Non,Describe,soup
0,1,summer squash salad,Healthy Food,veg,white balsamic vinegar lemon juice lemon rind ...,Healthy Food veg white balsamic vinegar lemon ...
1,2,chicken minced salad,Healthy Food,non-veg,olive oil chicken mince garlic minced onion sa...,Healthy Food non-veg olive oil chicken mince g...
2,3,sweet chilli almonds,Snack,veg,almonds whole egg white curry leaves salt suga...,Snack veg almonds whole egg white curry leaves...
3,4,tricolour salad,Healthy Food,veg,vinegar honeysugar soy sauce salt garlic clove...,Healthy Food veg vinegar honeysugar soy sauce ...
4,5,christmas cake,Dessert,veg,christmas dry fruits presoaked orange zest lem...,Dessert veg christmas dry fruits presoaked ora...


In [422]:
count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(df['soup'])

In [423]:
cosine_sim2 = cosine_similarity(count_matrix, count_matrix)

In [424]:
df = df.reset_index()
indices = pd.Series(df.index, index=df['Name'])

In [425]:
display(indices)

Name
summer squash salad                                          0
chicken minced salad                                         1
sweet chilli almonds                                         2
tricolour salad                                              3
christmas cake                                               4
                                                          ... 
Kimchi Toast                                               395
Tacos de Gobernador (Shrimp, Poblano, and Cheese Tacos)    396
Melted Broccoli Pasta With Capers and Anchovies            397
Lemon-Ginger Cake with Pistachios                          398
Rosemary Roasted Vegetables                                399
Length: 400, dtype: int64

In [426]:
get_recommendations('Kimchi Toast', cosine_sim2)

372         Spicy Kimchi Tofu Stew
365                Kimchi Bokumbab
366           Korean fried Chicken
391                 Rajas Poblanas
311    Kimchi and Miso Noodle Soup
Name: Name, dtype: object

In [427]:
def highest_food_choices(veg_non, c_type):
    filtered_updated_food_data = updated_food_data[(updated_food_data['Veg_Non'] == veg_non) & (updated_food_data['C_Type'] == c_type)]
    sorted_filtered_data = filtered_updated_food_data.sort_values(by='Average_Rating', ascending=False)
    return sorted_filtered_data[['Name', 'Describe', 'Average_Rating']].head(5)

In [428]:
choices = highest_food_choices("non-veg", "Healthy Food")
choices

Unnamed: 0,Name,Describe,Average_Rating
108,chicken quinoa biryani,onions tomato green chilliesslit open ginger g...,10.0
86,roasted spring chicken with root veggies,whole chicken thyme garlic lemon orange salt b...,7.5
7,lamb and chargrilled bell pepper soup,lamb bones preferably shank and shoulder onion...,7.0
1,chicken minced salad,olive oil chicken mince garlic minced onion sa...,6.0
330,Lemony Crab Salad with Baby Greens,Lemon juice olive oil crab lecttus papersalt v...,5.508376


In [429]:
def random_food_choices(veg_non, c_type, num_choices=5):
    filtered_updated_food_data = updated_food_data[(updated_food_data['Veg_Non'] == veg_non) & (updated_food_data['C_Type'] == c_type)]
    sorted_filtered_data = filtered_updated_food_data.sort_values(by='Average_Rating', ascending=False)
    
    if len(sorted_filtered_data) <= num_choices:
        return sorted_filtered_data[['Name', 'Describe', 'Average_Rating']]
    else:
        return sorted_filtered_data[['Name', 'Describe', 'Average_Rating']].sample(n=num_choices, random_state=1)

In [430]:
random = random_food_choices("veg", "Healthy Food")
random

Unnamed: 0,Name,Describe,Average_Rating
37,mixed salad with lotus root,Iceberg Lattoos Lolo Rosso Endive Lettuce Red ...,4.0
300,brown rice,riety of rice,1.0
232,gluten free almond cake,Almond Powder Egg Honey Baking Soda Vanilla Es...,10.0
344,Cucumber and Radish Salad,cucumberraddishvinegar corianderolive salt papper,5.508376
39,corn and raw mango salad,Corn kernels onions green onions paprika raw m...,10.0
