In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import jaccard_score
from scipy.spatial.distance import pdist, squareform

In [None]:
dataset = pd.read_csv('/content/dataset.csv')
dataset.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


In [None]:
refined_dataset = dataset[['Restaurant ID','Restaurant Name','Cuisines','Price range','Aggregate rating','Votes']]
refined_dataset

Unnamed: 0,Restaurant ID,Restaurant Name,Cuisines,Price range,Aggregate rating,Votes
0,6317637,Le Petit Souffle,"French, Japanese, Desserts",3,4.8,314
1,6304287,Izakaya Kikufuji,Japanese,3,4.5,591
2,6300002,Heat - Edsa Shangri-La,"Seafood, Asian, Filipino, Indian",4,4.4,270
3,6318506,Ooma,"Japanese, Sushi",4,4.9,365
4,6314302,Sambo Kojin,"Japanese, Korean",4,4.8,229
...,...,...,...,...,...,...
9546,5915730,Naml۱ Gurme,Turkish,3,4.1,788
9547,5908749,Ceviz A��ac۱,"World Cuisine, Patisserie, Cafe",3,4.2,1034
9548,5915807,Huqqa,"Italian, World Cuisine",4,3.7,661
9549,5916112,A���k Kahve,Restaurant Cafe,4,4.0,901


In [None]:
#handle missing values
refined_dataset.isna().sum()

refined_dataset = refined_dataset.dropna()
refined_dataset.isna().sum()
refined_dataset.duplicated().sum()
refined_dataset['Restaurant Name'].duplicated().sum()
refined_dataset['Restaurant Name'].value_counts()

Unnamed: 0_level_0,count
Restaurant Name,Unnamed: 1_level_1
Cafe Coffee Day,83
Domino's Pizza,79
Subway,63
Green Chick Chop,51
McDonald's,48
...,...
The Town House Cafe,1
The G.T. Road,1
The Darzi Bar & Kitchen,1
Smoke On Water,1


In [None]:
#sorting
refined_dataset = refined_dataset.sort_values(by=['Restaurant Name','Aggregate rating'],ascending=False)
refined_dataset.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Cuisines,Price range,Aggregate rating,Votes
9523,6000871,�ukura��a Sofras۱,"Kebab, Izgara",3,4.4,296
3120,18222559,{Niche} - Cafe & Bar,"North Indian, Chinese, Italian, Continental",3,4.1,492
9334,7100938,wagamama,"Japanese, Asian",4,3.7,131
9454,6401789,tashas,"Cafe, Mediterranean",4,4.1,374
4659,18361747,t Lounge by Dilmah,"Cafe, Tea, Desserts",2,3.6,34


In [None]:
refined_dataset[refined_dataset["Restaurant Name"]=="Cafe Coffee Day"].head()

Unnamed: 0,Restaurant ID,Restaurant Name,Cuisines,Price range,Aggregate rating,Votes
6430,5595,Cafe Coffee Day,Cafe,1,3.6,58
8432,594,Cafe Coffee Day,Cafe,1,3.6,125
3946,305736,Cafe Coffee Day,Cafe,1,3.5,35
5877,8828,Cafe Coffee Day,Cafe,1,3.5,50
3001,596,Cafe Coffee Day,Cafe,1,3.4,277


In [None]:
#removing duplicate entries
refined_dataset = refined_dataset.drop_duplicates('Restaurant Name',keep='first')
refined_dataset

Unnamed: 0,Restaurant ID,Restaurant Name,Cuisines,Price range,Aggregate rating,Votes
9523,6000871,�ukura��a Sofras۱,"Kebab, Izgara",3,4.4,296
3120,18222559,{Niche} - Cafe & Bar,"North Indian, Chinese, Italian, Continental",3,4.1,492
9334,7100938,wagamama,"Japanese, Asian",4,3.7,131
9454,6401789,tashas,"Cafe, Mediterranean",4,4.1,374
4659,18361747,t Lounge by Dilmah,"Cafe, Tea, Desserts",2,3.6,34
...,...,...,...,...,...,...
8692,18317511,#Urban Caf��,"North Indian, Chinese, Italian",2,3.3,49
6998,18336489,#OFF Campus,"Cafe, Continental, Italian, Fast Food",2,3.7,216
2613,18311951,#InstaFreeze,Ice Cream,1,0.0,2
9148,18378803,#Dilliwaala6,North Indian,3,3.7,124


In [None]:
refined_dataset['Restaurant Name'].value_counts()
refined_dataset = refined_dataset[refined_dataset['Aggregate rating']>3.9]
refined_dataset

Unnamed: 0,Restaurant ID,Restaurant Name,Cuisines,Price range,Aggregate rating,Votes
9523,6000871,�ukura��a Sofras۱,"Kebab, Izgara",3,4.4,296
3120,18222559,{Niche} - Cafe & Bar,"North Indian, Chinese, Italian, Continental",3,4.1,492
9454,6401789,tashas,"Cafe, Mediterranean",4,4.1,374
9385,6113857,sketch Gallery,"British, Contemporary",4,4.5,148
1837,18418247,feel ALIVE,"North Indian, American, Asian, Biryani",3,4.7,69
...,...,...,...,...,...,...
1468,18408054,19 Flavours Biryani,"Mughlai, Hyderabadi",2,4.1,84
2484,18233317,145 Kala Ghoda,"Fast Food, Beverages, Desserts",3,4.2,1606
2292,2100784,11th Avenue Cafe Bistro,"Cafe, American, Italian, Continental",2,4.1,377
751,2600031,10 Downing Street,"North Indian, Chinese",3,4.0,257


In [None]:
#splitting cuisines into list
refined_dataset.loc[:, 'Cuisines'] = refined_dataset['Cuisines'].str.split(', ')
refined_dataset

Unnamed: 0,Restaurant ID,Restaurant Name,Cuisines,Price range,Aggregate rating,Votes
9523,6000871,�ukura��a Sofras۱,"[Kebab, Izgara]",3,4.4,296
3120,18222559,{Niche} - Cafe & Bar,"[North Indian, Chinese, Italian, Continental]",3,4.1,492
9454,6401789,tashas,"[Cafe, Mediterranean]",4,4.1,374
9385,6113857,sketch Gallery,"[British, Contemporary]",4,4.5,148
1837,18418247,feel ALIVE,"[North Indian, American, Asian, Biryani]",3,4.7,69
...,...,...,...,...,...,...
1468,18408054,19 Flavours Biryani,"[Mughlai, Hyderabadi]",2,4.1,84
2484,18233317,145 Kala Ghoda,"[Fast Food, Beverages, Desserts]",3,4.2,1606
2292,2100784,11th Avenue Cafe Bistro,"[Cafe, American, Italian, Continental]",2,4.1,377
751,2600031,10 Downing Street,"[North Indian, Chinese]",3,4.0,257


In [None]:
refined_dataset = refined_dataset.explode('Cuisines')
refined_dataset

Unnamed: 0,Restaurant ID,Restaurant Name,Cuisines,Price range,Aggregate rating,Votes
9523,6000871,�ukura��a Sofras۱,Kebab,3,4.4,296
9523,6000871,�ukura��a Sofras۱,Izgara,3,4.4,296
3120,18222559,{Niche} - Cafe & Bar,North Indian,3,4.1,492
3120,18222559,{Niche} - Cafe & Bar,Chinese,3,4.1,492
3120,18222559,{Niche} - Cafe & Bar,Italian,3,4.1,492
...,...,...,...,...,...,...
2292,2100784,11th Avenue Cafe Bistro,Italian,2,4.1,377
2292,2100784,11th Avenue Cafe Bistro,Continental,2,4.1,377
751,2600031,10 Downing Street,North Indian,3,4.0,257
751,2600031,10 Downing Street,Chinese,3,4.0,257


In [None]:
refined_dataset['Cuisines'].value_counts()

Unnamed: 0_level_0,count
Cuisines,Unnamed: 1_level_1
North Indian,270
Italian,237
Chinese,200
Continental,199
Cafe,177
...,...
Pub Food,1
Durban,1
Irish,1
Persian,1


In [None]:
restoXcuisines = pd.crosstab(dataset['Restaurant Name'], dataset['Cuisines'])
restoXcuisines

Cuisines,Afghani,"Afghani, Mughlai, Chinese","Afghani, North Indian","Afghani, North Indian, Pakistani, Arabian",African,"African, Portuguese",American,"American, Asian, Burger","American, Asian, European, Seafood","American, Asian, Italian, Seafood",...,"Turkish, Arabian, Middle Eastern","Turkish, Arabian, Moroccan, Lebanese","Turkish, Mediterranean, Middle Eastern",Vietnamese,"Vietnamese, Fish and Chips","Western, Asian, Cafe","Western, Fusion, Fast Food",World Cuisine,"World Cuisine, Mexican, Italian","World Cuisine, Patisserie, Cafe"
Restaurant Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
#45,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
#Dilliwaala6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
#InstaFreeze,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
#OFF Campus,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
#Urban Caf��,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
t Lounge by Dilmah,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tashas,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
wagamama,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
{Niche} - Cafe & Bar,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
dataset['Restaurant Name'].sample(20, random_state=194)

Unnamed: 0,Restaurant Name
8324,Super Cake Shop
8443,Imly
7250,Mitthu Tikki Wala
1861,Downtown - Diners & Living Beer Cafe
346,HI Lite Bar & Lounge
2061,Shri Ram Restaurant
788,Brooklyn Central
7014,Grill Master
9284,Satoo - Hotel Shangri-La
5476,New Arjun Bombay Pav Bhaji


In [None]:
print(jaccard_score(restoXcuisines.loc["Olive Bistro"].values,restoXcuisines.loc["Rose Cafe"].values, average="macro"))

0.3329680365296804


In [None]:
jaccardDist = pdist(restoXcuisines.values, metric='jaccard')
jaccardMatrix = squareform(jaccardDist)
jaccardSim = 1 - jaccardMatrix
dfJaccard = pd.DataFrame(
    jaccardSim,
    index=restoXcuisines.index,
    columns=restoXcuisines.index)

dfJaccard

Restaurant Name,#45,#Dilliwaala6,#InstaFreeze,#OFF Campus,#Urban Caf��,#hashtag,'Ohana,10 Downing Street,10 To 10 In Delhi,11th Avenue Cafe Bistro,...,feel ALIVE,hug!,iGNiTE,iKitchen,sketch Gallery,t Lounge by Dilmah,tashas,wagamama,{Niche} - Cafe & Bar,�ukura��a Sofras۱
Restaurant Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
#45,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#Dilliwaala6,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#InstaFreeze,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#OFF Campus,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#Urban Caf��,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
t Lounge by Dilmah,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
tashas,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
wagamama,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
{Niche} - Cafe & Bar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [None]:
refined_dataset['Restaurant Name'].sample(20)

In [None]:
resto = 'Ooma'

sim = dfJaccard.loc[resto].sort_values(ascending=False)
sim = pd.DataFrame({'Restaurant Name': sim.index, 'simScore': sim.values})
sim = sim[(sim['Restaurant Name']!= resto) & (sim['simScore']>=0.7)].head(5)
RestoRec = pd.merge(sim,refined_dataset[['Restaurant Name','Aggregate rating']],how='inner',on='Restaurant Name')
FinalRestoRec = RestoRec.sort_values('Aggregate rating',ascending=False).drop_duplicates('Restaurant Name',keep='first')
FinalRestoRec