In [2]:
import pandas as pd

df = pd.read_csv('../data/processed/data.csv')
df.sample(1)

pd.set_option('display.max_columns', 50)

# General

### 01. How many unique restaurants?

In [3]:
df['restaurant_id'].nunique()

6929

### 02. How many unique countries?

In [4]:
df['country_code'].nunique()

15

### 03. How many unique cities?

In [5]:
df['city'].nunique()

125

### 04. How many ratings?

In [6]:
df['votes'].sum()

4194533

### 05. How many types of cuisines?

In [7]:
df['cuisines'].nunique()

165

# Country

### 1. Which country has the most cities registered?

In [8]:
df.loc[:, ['country', 'city']].groupby(['country']).nunique().sort_values(by='city', ascending=False).reset_index()

Unnamed: 0,country,city
0,India,49
1,United States of America,22
2,Philippines,12
3,South Africa,12
4,England,5
5,New Zeland,4
6,United Arab Emirates,4
7,Australia,3
8,Brazil,3
9,Canada,3


### 2. Which country has the most restaurants registered?

In [9]:
df.loc[:, ['restaurant_id', 'country']].groupby(['country']).count().sort_values(by='restaurant_id', ascending=False).reset_index()

Unnamed: 0,country,restaurant_id
0,India,3111
1,United States of America,1374
2,England,400
3,South Africa,346
4,United Arab Emirates,300
5,Brazil,240
6,New Zeland,239
7,Australia,180
8,Canada,180
9,Turkey,159


### 3. Which country has the most restaurants with type of price at 4?   

In [10]:
df.loc[df['price_range'] == 4, ['country', 'price_range']].groupby(['country']).count().sort_values(by='price_range', ascending=False).reset_index()

Unnamed: 0,country,price_range
0,United States of America,415
1,South Africa,221
2,India,195
3,Brazil,148
4,United Arab Emirates,102
5,England,79
6,New Zeland,79
7,Turkey,59
8,Singapure,58
9,Canada,54


### 4. Which country has the most different types of cuisines?

In [11]:
df.loc[:,['country','cuisines']].groupby(['country']).nunique().sort_values(by='cuisines',ascending = False).reset_index()

Unnamed: 0,country,cuisines
0,India,77
1,United States of America,74
2,England,52
3,United Arab Emirates,46
4,Australia,43
5,Brazil,43
6,Canada,42
7,South Africa,42
8,New Zeland,37
9,Qatar,31


### 5. Which country has the most number of ratings?

In [12]:
df.loc[:, ['country','votes']].groupby(['country']).sum().sort_values(by='votes', ascending=False).reset_index()

Unnamed: 0,country,votes
0,India,2800164
1,United States of America,522348
2,United Arab Emirates,177564
3,Australia,130357
4,Canada,105018
5,Turkey,100193
6,Indonesia,89026
7,South Africa,81939
8,New Zeland,52532
9,Philippines,48398


### 6. Which country has the most number of restaurants with delivery?

In [13]:
df.loc[df['has_online_delivery'] == 1, ['country','restaurant_id']].groupby(['country']).count().sort_values(by='restaurant_id', ascending=False).reset_index()

Unnamed: 0,country,restaurant_id
0,India,2177
1,United Arab Emirates,205
2,Qatar,37
3,Philippines,9


### 7. Which country has the most number of restaurants that accept reservations?

In [14]:
df.loc[df['has_table_booking'] == 1, ['country', 'has_table_booking']].groupby(['country']).count().sort_values(by='has_table_booking', ascending=False).reset_index()

Unnamed: 0,country,has_table_booking
0,India,256
1,England,55
2,Australia,29
3,Philippines,22
4,New Zeland,19
5,Indonesia,14
6,United Arab Emirates,10
7,Qatar,4
8,Turkey,4
9,South Africa,3


### 8. Which country has the most number of ratings on average?

In [15]:
df.loc[:, ['country', 'votes']].groupby(['country']).mean().sort_values(by='votes', ascending=False).reset_index()

Unnamed: 0,country,votes
0,Indonesia,1112.825
1,India,900.08486
2,Australia,724.205556
3,Turkey,630.144654
4,Philippines,604.975
5,United Arab Emirates,591.88
6,Canada,583.433333
7,United States of America,380.165939
8,Qatar,376.325
9,South Africa,236.817919


### 9. Which country has the highest grade, on average?

In [16]:
df.loc[:, ['country', 'aggregate_rating']].groupby(['country']).mean().sort_values(by='aggregate_rating', ascending=False).reset_index()

Unnamed: 0,country,aggregate_rating
0,Indonesia,4.60125
1,Philippines,4.46375
2,Singapure,4.435
3,United States of America,4.402693
4,Australia,4.372778
5,Canada,4.321667
6,Turkey,4.310063
7,Qatar,4.24
8,New Zeland,4.164435
9,South Africa,4.061561


### 10. Which country has the lowest grade, on average?

In [17]:
df.loc[:, ['country', 'aggregate_rating']].groupby(['country']).mean().sort_values(by='aggregate_rating', ascending=True).reset_index()

Unnamed: 0,country,aggregate_rating
0,Brazil,3.321667
1,United Arab Emirates,4.023667
2,India,4.037126
3,England,4.04175
4,Sri Lanka,4.0575
5,South Africa,4.061561
6,New Zeland,4.164435
7,Qatar,4.24
8,Turkey,4.310063
9,Canada,4.321667


### 11. What is the average meal cost for two in each country?

In [18]:
df.loc[:, ['country', 'average_cost_for_two']].groupby(['country']).mean().sort_values(by='average_cost_for_two', ascending=False).reset_index()

Unnamed: 0,country,average_cost_for_two
0,Indonesia,303000.0
1,Australia,138959.783333
2,Sri Lanka,2579.375
3,Philippines,1227.825
4,India,704.400514
5,South Africa,339.228324
6,Qatar,174.0
7,United Arab Emirates,153.716667
8,Singapure,141.4375
9,Brazil,138.8125


# Cities

### 1. Which city has the most number of restaurants registered?

In [19]:
df.loc[:, ['city', 'restaurant_id']].groupby(['city']).count().sort_values(by='restaurant_id', ascending=False).reset_index()

Unnamed: 0,city,restaurant_id
0,Abu Dhabi,80
1,Mysore,80
2,Glasgow,80
3,Goa,80
4,Guwahati,80
...,...,...
120,San Juan City,1
121,Roodepoort,1
122,Muntinlupa City,1
123,Johannesburg South,1


### 2. Which city has the most number of restaurants with aggregate rating above 4?

In [20]:
df.loc[df['aggregate_rating'] > 4, ['city', 'aggregate_rating']].groupby(['city']).count().sort_values(by='aggregate_rating', ascending=False).reset_index()

Unnamed: 0,city,aggregate_rating
0,Bangalore,79
1,London,78
2,Houston,75
3,Jakarta,74
4,Chennai,72
...,...,...
116,Ghaziabad,1
117,San Juan City,1
118,Roodepoort,1
119,Zirakpur,1


### 3. Which city has the most number of restaurants with rating below 2.5?

In [21]:
df.loc[df['aggregate_rating'] < 2.5, ['city', 'aggregate_rating']].groupby(['city']).count().sort_values(by='aggregate_rating', ascending=False).reset_index()

Unnamed: 0,city,aggregate_rating
0,Gangtok,33
1,Ooty,19
2,São Paulo,16
3,Brasília,15
4,Rio de Janeiro,12
5,Manchester,7
6,Clarens,4
7,Shimla,4
8,Edinburgh,2
9,Hamilton,2


### 4. Which city has, on average, the most expensive cost of meal for two?

In [22]:
df.loc[:, ['city','average_cost_for_two']].groupby(['city']).mean().sort_values(by='average_cost_for_two', ascending=False).reset_index()

Unnamed: 0,city,average_cost_for_two
0,Adelaide,416734.133333
1,Jakarta,309605.263158
2,Tangerang,180000.000000
3,Bogor,175000.000000
4,Pasay City,4000.000000
...,...,...
120,Calgary,32.500000
121,Los Angeles,30.000000
122,Denver,29.916667
123,Phoenix,28.000000


### 5. Which city has the most number of unique cuisines?

In [23]:
df.loc[:, ['city', 'cuisines']].groupby(['city']).nunique().sort_values(by='cuisines', ascending=False).reset_index()

Unnamed: 0,city,cuisines
0,Birmingham,32
1,Doha,31
2,Montreal,30
3,São Paulo,30
4,Manchester,30
...,...,...
120,Las Piñas City,1
121,Marikina City,1
122,Roodepoort,1
123,Muntinlupa City,1


### 6. Which city has the most number of restaurants that have table booking?

In [24]:
df.loc[df['has_table_booking'] == 1, ['city', 'has_table_booking']].groupby(['city']).count().sort_values(by='has_table_booking', ascending=False).reset_index()

Unnamed: 0,city,has_table_booking
0,Bangalore,42
1,Chennai,28
2,Pune,25
3,Mumbai,18
4,Brisbane,17
...,...,...
64,Pasig City,1
65,Fujairah,1
66,Nashik,1
67,Johannesburg,1


### 7. Which city has the most number of restaurants with delivery?

In [25]:
df.loc[df['is_delivering_now'] == 1, ['city', 'is_delivering_now']].groupby(['city']).count().sort_values(by='is_delivering_now', ascending=False).reset_index()

Unnamed: 0,city,is_delivering_now
0,Vadodara,48
1,Amritsar,48
2,Aurangabad,47
3,Ludhiana,46
4,Bhopal,46
5,Dehradun,45
6,Ranchi,42
7,Jaipur,42
8,Ahmedabad,41
9,Varanasi,40


### 8. Which city has the most number of restaurants that have online ordering?

In [26]:
df.loc[df['has_online_delivery'] == 1, ['city', 'has_online_delivery']].groupby(['city']).count().sort_values(by='has_online_delivery', ascending=False).reset_index()

Unnamed: 0,city,has_online_delivery
0,Bhopal,75
1,Vadodara,74
2,Abu Dhabi,71
3,Sharjah,71
4,Nagpur,69
5,Aurangabad,69
6,Patna,69
7,Ranchi,69
8,Coimbatore,68
9,Allahabad,68


# Restaurants

### 1. Which restaurant has the most number of ratings?

In [27]:
df.loc[:, ['restaurant_id', 'restaurant_name', 'votes']].sort_values(by=['votes', 'restaurant_id'], ascending=[False, True])

Unnamed: 0,restaurant_id,restaurant_name,votes
3678,90896,Bawarchi,41333
2901,58882,Byg Brewski Brewing Company,17394
2853,51705,Toit,15270
2867,51040,Truffles,14984
4697,308322,Hauz Khas Social,13627
...,...,...,...
6221,18756337,Alston Bar & Beef,0
6222,18756343,The Pasta Factory,0
6218,18756659,Veeno,0
4776,18971344,Kabab Corner,0


### 2. Which restaurante has the highest average rating?

In [28]:
df.loc[:, ['restaurant_id', 'restaurant_name', 'aggregate_rating']].sort_values(by=['aggregate_rating', 'restaurant_id'], ascending=[False, True])

Unnamed: 0,restaurant_id,restaurant_name,aggregate_rating
3533,7528,Indian Grill Room,4.9
3526,8913,Pirates of Grill,4.9
4995,10021,Darshan,4.9
4067,20842,Barbeque Nation,4.9
2883,56618,AB's - Absolute Barbecues,4.9
...,...,...,...
4783,18994548,V. Krishnan Bakery,0.0
4730,18997511,Jain Restaurant,0.0
5166,19034826,Republic of Chicken,0.0
5174,19035941,City Point,0.0


### 3. Which restaurant has the most expensive meal for two? 

In [71]:
df.loc[:, ['restaurant_id', 'restaurant_name', 'average_cost_for_two']].sort_values(by=['average_cost_for_two', 'restaurant_id'], ascending=[False, True]).head()

Unnamed: 0,restaurant_id,restaurant_name,average_cost_for_two
356,16608070,d'Arry's Verandah Restaurant,25000017
5567,7403667,The Café - Hotel Mulia,1200000
5603,7423164,GIA Restaurant & Bar,700000
5562,7402935,SKYE,600000
5573,7425027,Shabu-Shabu Shaburi,600000


### 4. Which restaurant with Brazilian cuisines, has the lowest average rating?  

In [70]:
#df.loc[:, ['restaurant_id','restaurant_name', 'aggregate_rating']].sort_values(by=['aggregate_rating', 'restaurant_id'], ascending=[True, True])

df.loc[df['cuisines'] == 'Brazilian', ['restaurant_id', 'restaurant_name', 'aggregate_rating']].sort_values(by=['aggregate_rating', 'restaurant_id'], ascending=[True, True]).head()

Unnamed: 0,restaurant_id,restaurant_name,aggregate_rating
117,6600100,Loca Como tu Madre,0.0
96,6600119,Café Savana,0.0
116,6600124,Inácia Poulet Rôti,0.0
139,6600456,Galeteria Beira Lago,0.0
120,6601251,Severina,0.0


### 5. Which restaurant with Brazilian cuisines, from Brazil, has the highest average rating?  

In [69]:
#df.loc[:, ['restaurant_id','restaurant_name', 'aggregate_rating']].sort_values(by=['aggregate_rating', 'restaurant_id'], ascending=[False, True])

df.loc[(df['cuisines'] == 'Brazilian') & (df['country'] == 'Brazil'), ['restaurant_id', 'restaurant_name', 'aggregate_rating']].sort_values(by=['aggregate_rating', 'restaurant_id'], ascending=[False, True]).head()

Unnamed: 0,restaurant_id,restaurant_name,aggregate_rating
191,7300955,Braseiro da Gávea,4.9
230,7302898,Aprazível,4.9
317,6709740,Mocotó,4.8
197,7300515,Garota de Ipanema,4.8
165,7301525,Fogo de Chão,4.8


### 6. Do the restaurants with online delivery have, on average, the most number of ratings?

In [58]:
df.loc[:,[ 'has_online_delivery', 'votes']].groupby(['has_online_delivery']).mean().reset_index()

Unnamed: 0,has_online_delivery,votes
0,0,479.421018
1,1,838.821664


### 7. Do the restaurants with table booking have, on average, the most expensive meal for two?

In [46]:
df.loc[:, ['has_table_booking', 'average_cost_for_two']].groupby(['has_table_booking']).mean().reset_index()

Unnamed: 0,has_table_booking,average_cost_for_two
0,0,3488.596866
1,1,69998.42381


### 8. Do the restaurants with Japanese cuisine from USA have, on average a more expensive meal for two then American Barbecues? 

In [57]:
df.loc[(df['country'] == 'United States of America') & (df['cuisines'].isin(['Japanese', 'BBQ'])), ['cuisines', 'average_cost_for_two']].groupby(['cuisines']).mean().reset_index()

Unnamed: 0,cuisines,average_cost_for_two
0,BBQ,39.642857
1,Japanese,56.40625


# Cuisines

### 1. Which restaurant with Italian cuisines, have the highest average rating?

In [73]:
df.loc[df['cuisines'] == 'Italian', ['restaurant_id', 'restaurant_name', 'aggregate_rating']].sort_values(by=['aggregate_rating', 'restaurant_id'], ascending=[False, True])

Unnamed: 0,restaurant_id,restaurant_name,aggregate_rating
4995,10021,Darshan,4.9
6651,6501298,Cafe Del Sol Classico,4.9
5810,7100171,Ombra,4.9
6054,7700796,Celino's,4.9
337,16587684,Andre's Cucina & Polenta Bar,4.9
...,...,...,...
6195,6800666,Le Delicatezze Di Bruno,0.0
220,7304910,Bene - Sheraton Rio Hotel,0.0
170,18334183,La Bocca Bar e Trattoria,0.0
6222,18756343,The Pasta Factory,0.0


### 2. Which restaurant with Italian cuisines, have the lowest average rating?

In [74]:
df.loc[df['cuisines'] == 'Italian', ['restaurant_id', 'restaurant_name', 'aggregate_rating']].sort_values(by=['aggregate_rating', 'restaurant_id'], ascending=[True, True])

Unnamed: 0,restaurant_id,restaurant_name,aggregate_rating
138,6600542,Avenida Paulista,0.0
278,6714499,Ristorantino,0.0
297,6715707,Più,0.0
6195,6800666,Le Delicatezze Di Bruno,0.0
220,7304910,Bene - Sheraton Rio Hotel,0.0
...,...,...,...
1755,17156258,Cerroni's Purple Garlic,4.9
1769,17156955,Guillermo's,4.9
1348,17242351,Central Grocery,4.9
3796,18209498,Zolocrust - Hotel Clarks Amer,4.9


### 3. Which restaurant with American cuisines, have the highest average rating?

In [75]:
df.loc[df['cuisines'] == 'American', ['restaurant_id', 'restaurant_name', 'aggregate_rating']].sort_values(by=['aggregate_rating', 'restaurant_id'], ascending=[False, True])

Unnamed: 0,restaurant_id,restaurant_name,aggregate_rating
6123,6102616,Burger & Lobster,4.9
6228,6801374,Solita,4.9
694,16631515,OEB Breakfast Co.,4.9
790,16737455,Portillo's Hot Dogs,4.9
1410,16776778,S'MAC,4.9
...,...,...,...
5850,7100971,Thunderbird Cafe,3.2
6677,6502573,Hard Rock Cafe,2.8
228,7303748,Bob's,2.6
6625,18445965,Guy Fieri's Kitchen & Bar,2.2


### 4. Which restaurant with American cuisines, have the lowest average rating?

In [76]:
df.loc[df['cuisines'] == 'American', ['restaurant_id', 'restaurant_name', 'aggregate_rating']].sort_values(by=['aggregate_rating', 'restaurant_id'], ascending=[True, True])

Unnamed: 0,restaurant_id,restaurant_name,aggregate_rating
6221,18756337,Alston Bar & Beef,0.0
6625,18445965,Guy Fieri's Kitchen & Bar,2.2
228,7303748,Bob's,2.6
6677,6502573,Hard Rock Cafe,2.8
5850,7100971,Thunderbird Cafe,3.2
...,...,...,...
1861,17069832,Hodad's,4.9
1868,17070293,Kono's Surf Club Cafe,4.9
1802,17153940,The Magnolia Pancake Haus,4.9
6309,18416901,The Cheesecake Factory,4.9


### 5. Which restaurant with Arabian cuisines, have the highest average rating?

In [86]:
df.loc[df['cuisines'] == 'Arabian', ['restaurant_id', 'restaurant_name', 'aggregate_rating']].sort_values(by=['aggregate_rating', 'restaurant_id'], ascending=[False, True]).head()

Unnamed: 0,restaurant_id,restaurant_name,aggregate_rating
3647,18535007,Mandi@36,4.7
2375,5600517,Madfoon Al Khaimah,4.5
6336,6200166,Wok of Fame,4.5
3031,18770281,Three Kings,4.5
2235,201824,Aroos Damascus,4.4


### 6. Which restaurant with Arabian cuisines, have the lowest average rating?

In [85]:
df.loc[df['cuisines'] == 'Arabian', ['restaurant_id', 'restaurant_name', 'aggregate_rating']].sort_values(by=['aggregate_rating', 'restaurant_id'], ascending=[True, True]).head()

Unnamed: 0,restaurant_id,restaurant_name,aggregate_rating
310,6706513,Raful,0.0
155,6601535,Empório Árabe,2.7
135,6600547,Salim Sou Eu,3.1
4767,18698592,V Empire,3.2
4733,18698769,Abad Hot Chicken,3.2


### 7. Which restaurant with Japanese cuisines, have the highest average rating?

In [None]:
df.loc[df['cuisines'] == 'Japanese', ['restaurant_id', 'restaurant_name', 'aggregate_rating']].sort_values(by=['aggregate_rating', 'restaurant_id'], ascending=[False, True]).head()

Unnamed: 0,restaurant_id,restaurant_name,aggregate_rating
6109,6107336,Sushi Samba,4.9
6134,6116563,Chotto Matte,4.9
31,6309831,Ramen Yushoken,4.9
16,6316125,Mendokoro Ramenba,4.9
20,6318506,Ooma,4.9


### 8. Which restaurant with Japanese cuisines, have the lowest average rating?

In [88]:
df.loc[df['cuisines'] == 'Japanese', ['restaurant_id', 'restaurant_name', 'aggregate_rating']].sort_values(by=['aggregate_rating', 'restaurant_id'], ascending=[True, True]).head()

Unnamed: 0,restaurant_id,restaurant_name,aggregate_rating
114,6600203,Banzai Sushi,0.0
167,7300930,Orienthai,2.4
168,7304077,Hachiko,2.5
103,6600314,Haná,2.7
107,6600379,Sushi Loko,2.9


### 9. Which restaurant with Home-made cuisines, have the highest average rating?

In [89]:
df.loc[df['cuisines'] == 'Home-made', ['restaurant_id', 'restaurant_name', 'aggregate_rating']].sort_values(by=['aggregate_rating', 'restaurant_id'], ascending=[False, True]).head()

Unnamed: 0,restaurant_id,restaurant_name,aggregate_rating
6927,5914190,Kanaat Lokantası,4.0
6781,6007184,GurMekan Restaurant,3.7


### 10. Which restaurant with Home-made cuisines, have the lowest average rating?

In [90]:
df.loc[df['cuisines'] == 'Home-made', ['restaurant_id', 'restaurant_name', 'aggregate_rating']].sort_values(by=['aggregate_rating', 'restaurant_id'], ascending=[True, True]).head()

Unnamed: 0,restaurant_id,restaurant_name,aggregate_rating
6781,6007184,GurMekan Restaurant,3.7
6927,5914190,Kanaat Lokantası,4.0


### 11. Which type of cuisine have the most expensive meal for two on average?

In [95]:
df.loc[:, ['cuisines', 'average_cost_for_two']].groupby(['cuisines']).mean().sort_values(by='average_cost_for_two', ascending=False).reset_index()

Unnamed: 0,cuisines,average_cost_for_two
0,Modern Australian,1.470693e+06
1,Western,3.333333e+05
2,Indonesian,2.000000e+05
3,Sunda,2.000000e+05
4,Dimsum,1.500000e+05
...,...,...
160,Donuts,1.800000e+01
161,Russian,1.500000e+01
162,Polish,1.000000e+01
163,Others,1.000000e+01


### 12. Which type of cuisine have the highest average rating?

In [111]:
df.loc[:, ['cuisines', 'aggregate_rating']].groupby(['cuisines']).mean().sort_values(by='aggregate_rating', ascending=False).reset_index()

Unnamed: 0,cuisines,aggregate_rating
0,Others,4.900000
1,Ramen,4.800000
2,Ottoman,4.800000
3,Egyptian,4.800000
4,Fresh Fish,4.750000
...,...,...
160,Tea,2.460000
161,Bengali,2.133333
162,Afghan,2.050000
163,Drinks Only,0.000000


### 13. Which type of cuisine have the most number of restaurants that have online ordering and is delivering?

In [108]:
df.loc[(df['has_online_delivery'] == True) & (df['is_delivering_now'] == True), ['cuisines', 'restaurant_id']].groupby(['cuisines']).count().sort_values(by='restaurant_id', ascending=False).reset_index()

Unnamed: 0,cuisines,restaurant_id
0,North Indian,317
1,Cafe,131
2,South Indian,79
3,Burger,76
4,Fast Food,72
5,Pizza,55
6,Chinese,49
7,Biryani,38
8,Italian,35
9,Continental,34
