In [178]:
import pandas as pd
import numpy as np
import plotly.figure_factory as ff
from plotly.offline import iplot

import plotly.express as px
from plotly.subplots import make_subplots

import plotly.graph_objects as go
import warnings
warnings.filterwarnings('ignore')

Analyzing Dataframe

In [180]:
# Reading dataframe 
df = pd.read_csv('restaurants.csv')
df.head()

Unnamed: 0,Name,Location,Locality,City,Cuisine,Rating,Votes,Cost
0,Local,"Scindia House,Connaught Place, Central Delhi",Central Delhi,Delhi,"North Indian, Finger Food, Continental",4.1,2415,2000
1,The G.T. ROAD,"M-Block,Connaught Place, Central Delhi",Central Delhi,Delhi,North Indian,4.3,2363,1500
2,Tamasha,"Connaught Place, Central Delhi",Central Delhi,Delhi,"Finger Food, North Indian, Italian, Contine...",4.2,5016,2000
3,The Junkyard Cafe,"Connaught Place, Central Delhi",Central Delhi,Delhi,"North Indian, Mediterranean, Asian, Italian...",4.2,2821,1800
4,Chili's American Grill and Bar,"M-Block,Connaught Place, Central Delhi",Central Delhi,Delhi,"Mexican, American, Italian",4.4,1094,2000


In [181]:
# Evaluating dataframe
print('* Size of dataframe: {}\n'.format(df.shape))
print('* Datatype of columns are:\n {}\n'.format(df.info()))

* Size of dataframe: (6593, 8)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6593 entries, 0 to 6592
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Name      6593 non-null   object 
 1   Location  6593 non-null   object 
 2   Locality  6593 non-null   object 
 3   City      6593 non-null   object 
 4   Cuisine   6593 non-null   object 
 5   Rating    6593 non-null   float64
 6   Votes     6593 non-null   int64  
 7   Cost      6593 non-null   int64  
dtypes: float64(1), int64(2), object(5)
memory usage: 412.2+ KB
* Datatype of columns are:
 None



In [182]:
df.describe()

Unnamed: 0,Rating,Votes,Cost
count,6593.0,6593.0,6593.0
mean,4.0882,119.420143,1102.798271
std,0.670031,261.849704,716.935212
min,1.0,1.0,100.0
25%,3.9,6.0,500.0
50%,4.2,31.0,900.0
75%,4.4,115.0,1500.0
max,5.0,5016.0,8000.0


In [183]:
df['City'].value_counts()

Bangalore     1019
Delhi          734
Mumbai         727
Kolkata        712
Hyderabad      589
Ahmedabad      414
Chennai        388
Pune           351
Jaipur         268
Chandigarh     264
Indore         204
Gurgaon        184
Noida          146
Vadodara        91
Lucknow         80
Agra            75
Nagpur          66
Surat           64
Ludhiana        62
Goa             60
Ghaziabad       50
Udaipur         43
Kochi            2
Name: City, dtype: int64

Cities can be categorized in terms of State.

Adding State Column

In [184]:
df['State'] = df['City']
df['State'] = df['City'].replace({'Bangalore': 'Karnataka', 'Delhi':'Delhi NCR', 'Mumbai':'Maharashtra', 'Kolkata':'Bengal', 'Hyderabad':'Telangana', 'Ahmedabad':'Gujarat', 'Chennai':'Tamil Nadu', 'Pune':'Maharashtra', 'Jaipur':'Rajasthan', 'Chandigarh':'Punjab', 'Indore':'Madhya Pradesh', 'Gurgaon':'Delhi NCR', 'Noida':'Delhi NCR', 'Vadodara':'Gujarat', 'Lucknow':'Uttar Pradesh', 'Agra':'Uttar Pradesh', 'Nagpur':'Maharashtra', 'Surat':'Gujarat', 'Ludhiana':'Punjab', 'Goa':'Goa', 'Ghaziabad':'Delhi NCR', 'Udaipur':'Rajasthan', 'Kochi':'Kerala'})
df['State'].value_counts()

Maharashtra       1144
Delhi NCR         1114
Karnataka         1019
Bengal             712
Telangana          589
Gujarat            569
Tamil Nadu         388
Punjab             326
Rajasthan          311
Madhya Pradesh     204
Uttar Pradesh      155
Goa                 60
Kerala               2
Name: State, dtype: int64

Kochi has just two restaurants.

Removing Kochi

In [185]:
kochi_df = df[df['City']=='Kochi']
kochi_df.index
df = df.drop(kochi_df.index)
df['City'].value_counts()

Bangalore     1019
Delhi          734
Mumbai         727
Kolkata        712
Hyderabad      589
Ahmedabad      414
Chennai        388
Pune           351
Jaipur         268
Chandigarh     264
Indore         204
Gurgaon        184
Noida          146
Vadodara        91
Lucknow         80
Agra            75
Nagpur          66
Surat           64
Ludhiana        62
Goa             60
Ghaziabad       50
Udaipur         43
Name: City, dtype: int64

Distribution of restaurant ratings, cost and votes in India

In [251]:
fig = ff.create_distplot([df.Rating],['Rating'],bin_size = 0.1)
fig.update_layout(title_text = 'Distribution of Restaurant Ratings')

In [246]:
fig = ff.create_distplot([df.Cost],['Cost'], bin_size = 100)
fig.update_layout(title_text='Distribution of Restaurant Cost')

In [248]:
fig = ff.create_distplot([df.Votes], ['Votes'], bin_size = 200)
fig.update_layout(title_text='Distribution of Restaurant Votes')

The above distribution do not provide analysis in terms of states or cities. The region-wise restaurant performance is evaluated in following sections.

Inference 1: How are restaurants distributed across India?

In [189]:
# Forming dataframes in term of cities and state
city_restaurants = df.groupby('City').sum()
state_restaurants = df.groupby('State').sum()

# List of states
rest_states = df['State'].value_counts()
rest_states

Maharashtra       1144
Delhi NCR         1114
Karnataka         1019
Bengal             712
Telangana          589
Gujarat            569
Tamil Nadu         388
Punjab             326
Rajasthan          311
Madhya Pradesh     204
Uttar Pradesh      155
Goa                 60
Name: State, dtype: int64

In [249]:
fig = px.bar(x = rest_states.index, y = rest_states)
fig.update_layout(xaxis_title = 'States', yaxis_title = 'Total Restaurants', 
                  title_text = 'Restaurant Distribution Across States', 
                  title_x = 0.5)
fig.show()

In [250]:
restnt_city = df['City'].value_counts().sort_values(ascending = True) 

fig = px.bar(y = restnt_city.index, x=restnt_city, color=restnt_city, orientation = 'h',
            labels = {
                'color': 'Total' +'<br>'+ 'Restaurants'
            }) # color continuous scale
fig.update_layout(yaxis_title = 'States', xaxis_title = 'Total Restaurants', 
                  title_text='Restaurant Distribution Across Cities', 
                  title_x=0.5,
                  font=dict(
                      family="Courier New, monospace",
                      size=12,
                      color='rgb(12, 128, 128)'
                  )
                  )
fig.show()

Inference 2: How are average ratings distributed across India?

In [192]:
df.head()

Unnamed: 0,Name,Location,Locality,City,Cuisine,Rating,Votes,Cost,State
0,Local,"Scindia House,Connaught Place, Central Delhi",Central Delhi,Delhi,"North Indian, Finger Food, Continental",4.1,2415,2000,Delhi NCR
1,The G.T. ROAD,"M-Block,Connaught Place, Central Delhi",Central Delhi,Delhi,North Indian,4.3,2363,1500,Delhi NCR
2,Tamasha,"Connaught Place, Central Delhi",Central Delhi,Delhi,"Finger Food, North Indian, Italian, Contine...",4.2,5016,2000,Delhi NCR
3,The Junkyard Cafe,"Connaught Place, Central Delhi",Central Delhi,Delhi,"North Indian, Mediterranean, Asian, Italian...",4.2,2821,1800,Delhi NCR
4,Chili's American Grill and Bar,"M-Block,Connaught Place, Central Delhi",Central Delhi,Delhi,"Mexican, American, Italian",4.4,1094,2000,Delhi NCR


2.1 State-Wise Distribution

In [193]:
# Forming state-wise dataframe
df_state = df.groupby('State').mean()
df_state.reset_index(level = 0, inplace = True)
df_state

Unnamed: 0,State,Rating,Votes,Cost
0,Bengal,3.99059,108.025281,880.897472
1,Delhi NCR,4.196589,268.77289,1516.921005
2,Goa,4.153333,27.216667,1480.0
3,Gujarat,4.164675,43.01406,743.233743
4,Karnataka,4.029931,100.21001,924.288518
5,Madhya Pradesh,3.790686,55.382353,1017.892157
6,Maharashtra,4.146591,100.781469,1205.594406
7,Punjab,4.094785,47.055215,1033.742331
8,Rajasthan,4.090032,40.488746,1145.337621
9,Tamil Nadu,4.025258,80.074742,937.113402


In [194]:
fig = px.bar(df_state, x = 'State', y = 'Rating')
fig.update_layout(xaxis_title = 'States', yaxis_title = 'Average Rating', 
                  title_text = 'Rating Distribution Across States', 
                  title_x = 0.5)
fig.show()

The bar graph shows that rating variation is quite small for different states.

Comparing Ratings with Polar Bar Plot

In [195]:
labels = df_state['State']
x1 = df_state['Rating']

num_slices = len(x1)
theta = [(i+1.5)*360/num_slices for i in range(num_slices)]
r=x1
width = [360 / num_slices for _ in range(num_slices)]


barpolar_plots = [go.Barpolar(r=[r], theta=[t], width=[w], name=n)
for r, t, w, n in zip(r, theta, width, labels)]

fig = go.Figure(barpolar_plots)

fig.update_layout(polar = dict(
                        radialaxis = dict(range=[3.5, 4.25], showticklabels=True),
                        angularaxis = dict(showticklabels=False, ticks='')
                        ),
                    title_text='Comparison of Ratings Across States', 
                    title_x=0.45,
                    font=dict(
                      family="Courier New, monospace",
                      size=12,
                  )
)
fig.show()

2.2 City-Wise Distribution

In [196]:
df_city = df.groupby('City').mean()
df_city.reset_index(level = 0, inplace = True)
df_city

Unnamed: 0,City,Rating,Votes,Cost
0,Agra,4.238667,19.613333,1320.0
1,Ahmedabad,4.202899,50.422705,777.294686
2,Bangalore,4.029931,100.21001,924.288518
3,Chandigarh,4.1375,53.083333,984.848485
4,Chennai,4.025258,80.074742,937.113402
5,Delhi,4.176567,304.476839,1526.634877
6,Ghaziabad,4.086,128.8,1374.0
7,Goa,4.153333,27.216667,1480.0
8,Gurgaon,4.308696,218.353261,1610.326087
9,Hyderabad,4.040917,155.466893,1114.940577


In [197]:
fig = px.bar(df_city, x = 'City', y = 'Rating')
fig.update_layout(xaxis_title = 'Cities', yaxis_title = 'Average Rating', 
                  title_text='Rating Distribution Across Cities')
fig.show()

The bar graph shows that rating variation is small for different cities.

Comparing Ratings with Polar Bar Plot

In [198]:
labels = df_city['City']
x1 = df_city['Rating']

num_slices = len(x1)
theta = [(i+1.5)*360/num_slices for i in range(num_slices)]
r=x1
width = [360 / num_slices for _ in range(num_slices)]

barpolar_plots = [go.Barpolar(r=[r], theta=[t], width=[w], name=n)
for r, t, w, n in zip(r, theta, width, labels)]

fig = go.Figure(barpolar_plots)


fig.update_layout(#     template='ggplot2',
                    polar = dict(
                        radialaxis = dict(range=[3.5, 4.33], showticklabels=True),
                        angularaxis = dict(showticklabels=False, ticks='')
                        ),
                    yaxis_title = 'States', xaxis_title = 'Total Restaurants', 
                    title_text='Comparison of Ratings Across Cities', 
                    title_x=0.47,
                    font=dict(
                      family="Courier New, monospace",
                      size=12,
#                       color='rgb(12, 128, 128)'
                  )
)
fig.show()

The bar graph shows that rating variation is small for different cities.

Inference 3: How is cost distributed across India?

3.1 State-wise Distribution

In [199]:
df_state

Unnamed: 0,State,Rating,Votes,Cost
0,Bengal,3.99059,108.025281,880.897472
1,Delhi NCR,4.196589,268.77289,1516.921005
2,Goa,4.153333,27.216667,1480.0
3,Gujarat,4.164675,43.01406,743.233743
4,Karnataka,4.029931,100.21001,924.288518
5,Madhya Pradesh,3.790686,55.382353,1017.892157
6,Maharashtra,4.146591,100.781469,1205.594406
7,Punjab,4.094785,47.055215,1033.742331
8,Rajasthan,4.090032,40.488746,1145.337621
9,Tamil Nadu,4.025258,80.074742,937.113402


Cost distribution across states

In [200]:
df_state.sort_values(by = ['Cost'], inplace = True)

fig = px.bar(df_state, x = 'Cost', y='State', color = 'Cost', orientation = 'h',
            labels = {
                'Cost': 'Average' +'<br>'+ 'Cost'
            })
fig.update_layout(yaxis_title = 'States', xaxis_title = 'Average Cost', 
                  title_text='Average Cost Distribution Across States', 
                  title_x=0.5)
fig.show()

3.2 City-wise Distribution

In [201]:
df_city

Unnamed: 0,City,Rating,Votes,Cost
0,Agra,4.238667,19.613333,1320.0
1,Ahmedabad,4.202899,50.422705,777.294686
2,Bangalore,4.029931,100.21001,924.288518
3,Chandigarh,4.1375,53.083333,984.848485
4,Chennai,4.025258,80.074742,937.113402
5,Delhi,4.176567,304.476839,1526.634877
6,Ghaziabad,4.086,128.8,1374.0
7,Goa,4.153333,27.216667,1480.0
8,Gurgaon,4.308696,218.353261,1610.326087
9,Hyderabad,4.040917,155.466893,1114.940577


Cost distribution across cities

In [202]:
df_city.sort_values(by = ['Cost'], inplace = True)
df_city
fig = px.bar(df_city, x = 'Cost', y = 'City', color = 'Cost', orientation = 'h',
            labels = {
                'Cost': 'Average' +'<br>'+ 'Cost'
            })
fig.update_layout(yaxis_title = 'States', xaxis_title = 'Average Cost', 
                  title_text = 'Average Cost Distribution Across Cities', 
                  title_x = 0.5)

fig.show()

Inference 4: How are votes distributed across India?

4.1 State-wise Distribution

In [203]:
df_state

Unnamed: 0,State,Rating,Votes,Cost
3,Gujarat,4.164675,43.01406,743.233743
0,Bengal,3.99059,108.025281,880.897472
4,Karnataka,4.029931,100.21001,924.288518
9,Tamil Nadu,4.025258,80.074742,937.113402
5,Madhya Pradesh,3.790686,55.382353,1017.892157
7,Punjab,4.094785,47.055215,1033.742331
10,Telangana,4.040917,155.466893,1114.940577
8,Rajasthan,4.090032,40.488746,1145.337621
6,Maharashtra,4.146591,100.781469,1205.594406
11,Uttar Pradesh,4.123871,36.180645,1280.645161


Cost distribution across states

In [204]:
df_state.sort_values(by=['Votes'], inplace = True)

fig = px.bar(df_state, x = 'Votes', y = 'State', color = 'Votes', orientation = 'h',
            labels = {
                'Votes': 'Average' +'<br>'+ 'Votes'
            })
fig.update_layout(yaxis_title = 'States', xaxis_title = 'Average Votes', 
                  title_text = 'Votes Distribution Across States', 
                  title_x=0.5)
                  
fig.show()

4.2 City-wise Distribution

Votes distribuion across cities

In [205]:
df_city.sort_values(by=['Votes'], inplace=True)

fig = px.bar(df_city, x = 'Votes', y='City', color = 'Votes', orientation = 'h',
            labels = {
                'Votes': 'Average' +'<br>'+ 'Votes'
            })
fig.update_layout(yaxis_title = 'Cities', xaxis_title = 'Average Votes', 
                  title_text='Votes Distribution Across Cities', 
                  title_x=0.5)
                
fig.show()

Inference 5: How is the overall performance of restaurants across different states?

5.1 Adding Attributes to State Dataframe

In [206]:
rest_states

Maharashtra       1144
Delhi NCR         1114
Karnataka         1019
Bengal             712
Telangana          589
Gujarat            569
Tamil Nadu         388
Punjab             326
Rajasthan          311
Madhya Pradesh     204
Uttar Pradesh      155
Goa                 60
Name: State, dtype: int64

Extracting total restaurants in each state and forming its dataframe

In [207]:
a = rest_states.index
b = rest_states
df_state_restnts = pd.DataFrame(list(zip(a,b)))
df_state_restnts.columns = ['State', 'Total Restaurants']
df_state_restnts = df_state_restnts.set_index('State')
display(df_state_restnts)

Unnamed: 0_level_0,Total Restaurants
State,Unnamed: 1_level_1
Maharashtra,1144
Delhi NCR,1114
Karnataka,1019
Bengal,712
Telangana,589
Gujarat,569
Tamil Nadu,388
Punjab,326
Rajasthan,311
Madhya Pradesh,204


In [208]:
df_state

Unnamed: 0,State,Rating,Votes,Cost
2,Goa,4.153333,27.216667,1480.0
11,Uttar Pradesh,4.123871,36.180645,1280.645161
8,Rajasthan,4.090032,40.488746,1145.337621
3,Gujarat,4.164675,43.01406,743.233743
7,Punjab,4.094785,47.055215,1033.742331
5,Madhya Pradesh,3.790686,55.382353,1017.892157
9,Tamil Nadu,4.025258,80.074742,937.113402
4,Karnataka,4.029931,100.21001,924.288518
6,Maharashtra,4.146591,100.781469,1205.594406
0,Bengal,3.99059,108.025281,880.897472


In [209]:
# Taking State column in dataframe as index
df_state = df_state.set_index('State')
df_state

Unnamed: 0_level_0,Rating,Votes,Cost
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Goa,4.153333,27.216667,1480.0
Uttar Pradesh,4.123871,36.180645,1280.645161
Rajasthan,4.090032,40.488746,1145.337621
Gujarat,4.164675,43.01406,743.233743
Punjab,4.094785,47.055215,1033.742331
Madhya Pradesh,3.790686,55.382353,1017.892157
Tamil Nadu,4.025258,80.074742,937.113402
Karnataka,4.029931,100.21001,924.288518
Maharashtra,4.146591,100.781469,1205.594406
Bengal,3.99059,108.025281,880.897472


In [210]:
# Matching indices of df_state_restnts with df_state 
df_state_restnts.reindex(df_state.index)

Unnamed: 0_level_0,Total Restaurants
State,Unnamed: 1_level_1
Goa,60
Uttar Pradesh,155
Rajasthan,311
Gujarat,569
Punjab,326
Madhya Pradesh,204
Tamil Nadu,388
Karnataka,1019
Maharashtra,1144
Bengal,712


In [211]:
# Adding total restaurants column to state dataframe  
df_state['Total Restaurants'] = df_state_restnts['Total Restaurants']
df_state

Unnamed: 0_level_0,Rating,Votes,Cost,Total Restaurants
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Goa,4.153333,27.216667,1480.0,60
Uttar Pradesh,4.123871,36.180645,1280.645161,155
Rajasthan,4.090032,40.488746,1145.337621,311
Gujarat,4.164675,43.01406,743.233743,569
Punjab,4.094785,47.055215,1033.742331,326
Madhya Pradesh,3.790686,55.382353,1017.892157,204
Tamil Nadu,4.025258,80.074742,937.113402,388
Karnataka,4.029931,100.21001,924.288518,1019
Maharashtra,4.146591,100.781469,1205.594406,1144
Bengal,3.99059,108.025281,880.897472,712


In [212]:
# Normalizing columns with integer values
df_state_normalized = df_state.copy()
columns = ['Rating', 'Votes', 'Cost', 'Total Restaurants']

# apply normalization techniques
for column in columns:
    df_state_normalized[column] = (df_state_normalized[column] / df_state_normalized[column].abs().max())

# view normalized data
df_state_normalized.reset_index(level=0, inplace=True)
display(df_state_normalized)

Unnamed: 0,State,Rating,Votes,Cost,Total Restaurants
0,Goa,0.989693,0.101263,0.975661,0.052448
1,Uttar Pradesh,0.982672,0.134614,0.84424,0.13549
2,Rajasthan,0.974609,0.150643,0.755041,0.271853
3,Gujarat,0.992395,0.160039,0.489962,0.497378
4,Punjab,0.975741,0.175074,0.681474,0.284965
5,Madhya Pradesh,0.903278,0.206056,0.671025,0.178322
6,Tamil Nadu,0.959174,0.297927,0.617773,0.339161
7,Karnataka,0.960287,0.372843,0.609319,0.890734
8,Maharashtra,0.988086,0.374969,0.794764,1.0
9,Bengal,0.950913,0.40192,0.580714,0.622378


5.2 Comparing Attributes of all States

In [213]:
# Comparing attributes of all states using polar scatter plots

fig = make_subplots(rows=6, cols=2, specs=[[{'type': 'polar'}]*2]*6, column_widths=[0.45, 0.45])

for index, state in enumerate(df_state_normalized['State']):
    if index % 2 == 0:
        row = int((index+2)/2)
        col = 1
    else: 
        row = int((index+1)/2) 
        col = 2
      
    fig.add_trace(go.Scatterpolar(
          name = df_state_normalized['State'][index],
          r = [df_state_normalized['Rating'][index], df_state_normalized['Votes'][index], df_state_normalized['Cost'][index], df_state_normalized['Total Restaurants'][index]],
          theta = ['Rating', 'Votes', 'Cost', 'Total Restaurants'],
          fill = 'toself'    
        ), row, col)

fig.update_layout(height = 2000, width = 900, title_text = "Comparison of Restaurants in Different States of India", title_x=0.5, title_font_color = '#4B0082')
fig.show()

Inference 6: What are top cuisines in India?

6.1 Forming Cuisines Dataframe

In [214]:
df.head()

Unnamed: 0,Name,Location,Locality,City,Cuisine,Rating,Votes,Cost,State
0,Local,"Scindia House,Connaught Place, Central Delhi",Central Delhi,Delhi,"North Indian, Finger Food, Continental",4.1,2415,2000,Delhi NCR
1,The G.T. ROAD,"M-Block,Connaught Place, Central Delhi",Central Delhi,Delhi,North Indian,4.3,2363,1500,Delhi NCR
2,Tamasha,"Connaught Place, Central Delhi",Central Delhi,Delhi,"Finger Food, North Indian, Italian, Contine...",4.2,5016,2000,Delhi NCR
3,The Junkyard Cafe,"Connaught Place, Central Delhi",Central Delhi,Delhi,"North Indian, Mediterranean, Asian, Italian...",4.2,2821,1800,Delhi NCR
4,Chili's American Grill and Bar,"M-Block,Connaught Place, Central Delhi",Central Delhi,Delhi,"Mexican, American, Italian",4.4,1094,2000,Delhi NCR


In [215]:
cuisines = df['Cuisine'].str.split(',').explode().unique().tolist()

In [216]:
# Forming cuisine dataframe
data = []
df_filtered = pd.DataFrame()
columns = ['Cuisine', 'Total Restaurants', 'Rating']
df_cuisine = pd.DataFrame(columns = columns)

for cuisine in cuisines:
    
    df['Cuisine Verification'] = df['Cuisine'].str.contains(cuisine, case=False, na=False).astype(int)
    df_filtered = df[df['Cuisine Verification'] == 1]
    total_restnt = len(df_filtered.index)
    df = df.drop(['Cuisine Verification'], axis=1)
    
    avg_rating = df_filtered['Rating'].sum()/total_restnt
    df_cuisine = df_cuisine.append({'Cuisine': cuisine, 'Total Restaurants': total_restnt, 'Rating':avg_rating, }, ignore_index=True)

In [217]:
df_cuisine.head()

Unnamed: 0,Cuisine,Total Restaurants,Rating
0,North Indian,3296,4.070995
1,Finger Food,155,4.083871
2,Continental,1391,4.148311
3,Finger Food,312,4.087821
4,North Indian,1068,4.108989


In [218]:
df_cuisine.shape

(170, 3)

6.2 Identifying Top Cuisines

In [219]:
fig = go.Figure(data = [
    go.Bar(name = 'Total Restaurants', x = df_cuisine['Cuisine'], y = df_cuisine['Total Restaurants'])
])
fig.update_layout(xaxis_title = 'Cuisines', yaxis_title = 'Total Restaurants', 
                  title_text='Cuisine Distribution Across Restaurants', 
                  title_x=0.5)
fig.show()

Many cuisines are served in very few restaurants.

Filtering cuisines

In [220]:
# Taking cuisines that are atleast served in over 300 restaurants 
df_cuisine = df_cuisine[df_cuisine['Total Restaurants'] > 300]
df_cuisine.shape

(26, 3)

In [221]:
fig = go.Figure(data = [
    go.Bar(name = 'Total Restaurants', x = df_cuisine['Cuisine'], y = df_cuisine['Total Restaurants'])
])

fig.update_layout(xaxis_title = 'Cuisines', yaxis_title = 'Total Restaurants', 
                  title_text = 'Distribution of Top Cuisines Across Restaurants', 
                  title_x = 0.5)
fig.show()

Cuisines dataframe consists of duplicate values.
Multi-Cuisine is not a valid category.

In [222]:
# Printing some duplicate categories
df_cuisine.Cuisine[0], df_cuisine.Cuisine[13], df_cuisine.Cuisine[0], df_cuisine.Cuisine[64]

('North Indian', '  Seafood', 'North Indian', 'Seafood')

Double spacing before text is resulting in dupicates.

In [223]:
#  Reseting index and removing double space 
df_cuisine = df_cuisine.reset_index(drop = True)   
df_cuisine.Cuisine = df_cuisine.Cuisine.str.replace('  ', '')

# Verifying double space removal
df_cuisine.Cuisine[5], df_cuisine.Cuisine[13], df_cuisine.Cuisine[0], df_cuisine.Cuisine[3]

('Asian', 'Asian', 'North Indian', 'North Indian')

In [224]:
#  Identifying with duplicate values
duplicate_cuisine = df_cuisine.duplicated(subset = ['Cuisine'])

In [225]:
duplicate_cuisines = []
duplicate_cuisines = df_cuisine.loc[duplicate_cuisine]['Cuisine']
duplicate_cuisines

3     North Indian
13           Asian
14         Italian
15     Continental
16         Chinese
17         Mughlai
18    South Indian
20       Fast Food
21        Desserts
22         Seafood
Name: Cuisine, dtype: object

In [226]:
duplicate_indices = []

# Identifying indices dulplicate cuisines 
duplicate_bool = []
count = 0
for index, cuisine in enumerate(duplicate_cuisines):
    duplicate_bool = df_cuisine['Cuisine'].str.find(cuisine)

    for index, value in enumerate(duplicate_bool):
        if value == 0:
            duplicate_indices.append(index)
duplicate_indices

[0, 3, 5, 13, 4, 14, 1, 15, 8, 16, 11, 17, 10, 18, 12, 20, 19, 21, 9, 22]

In [227]:
# Removing duplicate indices and updating attributes
i = 0
for index in duplicate_indices:
    
    if (i) % 2 == 0:
        count = 0
        # Updating attributes in first duplicate index (or Original Index)
        total_restnt_1 = (df_cuisine['Total Restaurants'][index])
        avg_rating_1 = df_cuisine['Rating'][index]
    
    else:
        count = 2
        total_restnt_2 = (df_cuisine['Total Restaurants'][index])
        avg_rating_2 = df_cuisine['Rating'][index]
    
    i += 1
    if count == 2:
        df_cuisine['Total Restaurants'][(index-1)] = (total_restnt_1 + total_restnt_2)
        df_cuisine['Rating'][(index-1)] = ((total_restnt_1*avg_rating_1) + (total_restnt_2*avg_rating_2))/(total_restnt_1 + total_restnt_2)
        
        # Removing second duplicate index
        df_cuisine = df_cuisine.drop(index)

In [228]:
df_cuisine = df_cuisine.reset_index(drop = True)
df_cuisine

Unnamed: 0,Cuisine,Total Restaurants,Rating
0,North Indian,3296,4.070995
1,Continental,1391,4.148311
2,Finger Food,4364,4.080293
3,Italian,1093,4.163861
4,Asian,379,4.221108
5,Mexican,332,4.220783
6,Multi-Cuisine,689,4.146444
7,Chinese,2007,4.056951
8,Seafood,558,4.117563
9,South Indian,583,4.020583


In [229]:
# Dropping Multi-cuisine
df_cuisine = df_cuisine.drop(index = 6)
df_cuisine = df_cuisine.reset_index(drop = True)
df_cuisine

Unnamed: 0,Cuisine,Total Restaurants,Rating
0,North Indian,3296,4.070995
1,Continental,1391,4.148311
2,Finger Food,4364,4.080293
3,Italian,1093,4.163861
4,Asian,379,4.221108
5,Mexican,332,4.220783
6,Chinese,2007,4.056951
7,Seafood,558,4.117563
8,South Indian,583,4.020583
9,Mughlai,425,4.152235


In [230]:
# Plotting cuisine with total restaurants
fig = go.Figure(data = [
    go.Bar(name = 'Total Restaurants', x = df_cuisine['Cuisine'], y = df_cuisine['Total Restaurants'])
])

fig.update_layout(xaxis_title = 'Cuisines', yaxis_title = 'Total Restaurants', 
                  title_text='Cuisine Distribution Across Restaurants', 
                  title_x=0.5)

fig.show()

In [231]:
# Plotting rating with cuisines 
fig = go.Figure(data = [
    go.Bar(name='Rating', x=df_cuisine['Cuisine'], y=df_cuisine['Rating']),
])

fig.update_traces(marker_color ='rgb(12, 128, 128)', opacity=1)
fig.update_layout(xaxis_title = 'Cuisines', yaxis_title = 'Average Rating', 
                  title_text = 'Rating Distribution of Top Cuisines', 
                  title_x = 0.5)

fig.show()

In [232]:
# Analysing with polar plot 
labels = df_cuisine['Cuisine']
x1 = df_cuisine['Rating']
num_slices = len(x1)
theta = [(i+1.5)*360/num_slices for i in range(num_slices)]
r=x1
width = [360 / num_slices for _ in range(num_slices)]

barpolar_plots = [go.Barpolar(r=[r], theta=[t], width=[w], name=n)
for r, t, w, n in zip(r, theta, width, labels)]

fig = go.Figure(barpolar_plots)

fig.update_layout(
                    polar = dict(
                        radialaxis = dict(range=[3.8, 4.25], showticklabels = True),
                        angularaxis = dict(showticklabels = False, ticks = '')
                        ),
                    yaxis_title = 'States', xaxis_title = 'Total Restaurants', 
                    title_text = 'Comparison of Ratings of Different Cuisines', 
                    title_x = 0.46
)
fig.show()

Inference 7: How are the cuisines distributed among states?

In [233]:
df.head()

Unnamed: 0,Name,Location,Locality,City,Cuisine,Rating,Votes,Cost,State
0,Local,"Scindia House,Connaught Place, Central Delhi",Central Delhi,Delhi,"North Indian, Finger Food, Continental",4.1,2415,2000,Delhi NCR
1,The G.T. ROAD,"M-Block,Connaught Place, Central Delhi",Central Delhi,Delhi,North Indian,4.3,2363,1500,Delhi NCR
2,Tamasha,"Connaught Place, Central Delhi",Central Delhi,Delhi,"Finger Food, North Indian, Italian, Contine...",4.2,5016,2000,Delhi NCR
3,The Junkyard Cafe,"Connaught Place, Central Delhi",Central Delhi,Delhi,"North Indian, Mediterranean, Asian, Italian...",4.2,2821,1800,Delhi NCR
4,Chili's American Grill and Bar,"M-Block,Connaught Place, Central Delhi",Central Delhi,Delhi,"Mexican, American, Italian",4.4,1094,2000,Delhi NCR


In [234]:
df.tail()

Unnamed: 0,Name,Location,Locality,City,Cuisine,Rating,Votes,Cost,State
6588,Shree Rathnam,"Wave Mall,BRS Nagar, West Ludhiana",West Ludhiana,Ludhiana,South Indian,4.1,20,600,Punjab
6589,Cafe Delish,"Radisson Blu,Rajguru Nagar, West Ludhiana",West Ludhiana,Ludhiana,"Multi-Cuisine, North Indian, Continental, A...",4.4,48,3100,Punjab
6590,3 K Resorts,"BRS Nagar, West Ludhiana",West Ludhiana,Ludhiana,North Indian,1.0,1,1100,Punjab
6591,Dovka Bar,"Nirvana Hotel Banquets Club,PAU, Central Ludhiana",Central Ludhiana,Ludhiana,"North Indian, Chinese, Finger Food",4.3,3,1500,Punjab
6592,Urban Vibes,"Aggar Nagar, West Ludhiana",West Ludhiana,Ludhiana,"Multi-Cuisine, North Indian, Italian, Chine...",3.8,12,2000,Punjab


7.1 Declaring Function for Obtaining Cuisine Information

In [237]:
df_state = pd.DataFrame()

def cuisine_info(state):
    state_cuisines_clean =[]   


In [238]:
# Forming state dataframe
    filter = (df['State'] == state)
    df_state = df[filter].copy() 
    


IndentationError: unexpected indent (4026314972.py, line 2)

In [239]:
# Filtering cuisines
    state_cuisines = df_state['Cuisine'].str.split(',').explode().unique().tolist()    


IndentationError: unexpected indent (1799673863.py, line 2)

In [240]:
df_state = pd.DataFrame()

def cuisine_info(state):
    state_cuisines_clean =[]


In [241]:
frames = [top_cuisine_uttar, top_cuisine_madhya, top_cuisine_rajasthan, top_cuisine_punjab, top_cuisine_tamil, top_cuisine_gujarat, top_cuisine_telangana, top_cuisine_bengal, top_cuisine_karnataka, top_cuisine_delhi, top_cuisine_maharashtra]
top_cuisine_india = pd.concat(frames)
display(top_cuisine_india)

NameError: name 'top_cuisine_uttar' is not defined

In [242]:
top_cuisine_india['Country'] = 'India'
fig = px.treemap(top_cuisine_india, 
                 path = ['Country', 'State', 'Cuisine', 'Total Votes'], 
                 values = 'Rating',
                 color = 'Rating'
                )
fig.show()

NameError: name 'top_cuisine_india' is not defined

In [243]:
df_maharashtra = df[df['State'] == 'Maharashtra']
df_delhi = df[df['State'] == 'Delhi NCR']
df_karnataka = df[df['State'] == 'Karnataka']
df_maharashtra

Unnamed: 0,Name,Location,Locality,City,Cuisine,Rating,Votes,Cost,State
1114,Pop Tate's,"Mayfair Sonata Green CHS,Vikhroli West, Centra...",Central Suburbs,Mumbai,"Chinese, Italian, Continental",4.3,333,2000,Maharashtra
1115,Global Fusion,"Times Square Tech Park,Sakinaka, Andheri East",Andheri East,Mumbai,"North Indian, Chinese, Sushi",4.5,716,2000,Maharashtra
1116,FOO,"Vikhroli West, Central Suburbs",Central Suburbs,Mumbai,"Asian, Chinese, Japanese, Sushi",4.7,50,1100,Maharashtra
1117,Kake Da Hotel,"Powai, Powai",Powai,Mumbai,"North Indian, Chinese, Mughlai",5.0,1,900,Maharashtra
1118,PizzaExpress,"Hiranandani Business Park,Powai, Powai",Powai,Mumbai,"European, Fast Food, Italian",4.5,590,1400,Maharashtra
...,...,...,...,...,...,...,...,...,...
6460,Roof Zero,"Zero Degree,Hingna T Point, South Nagpur",South Nagpur,Nagpur,"North Indian, Chinese, Continental",1.0,1,2200,Maharashtra
6461,Dublin 88 - The Irish Hub,"Vayu Sena, West Nagpur",West Nagpur,Nagpur,"North Indian, Continental",3.6,5,1300,Maharashtra
6462,Wonders Of World,"Vayu Sena, West Nagpur",West Nagpur,Nagpur,"North Indian, Chinese, Continental",3.7,36,900,Maharashtra
6463,The Habitat,"Hingna T Point, South Nagpur",South Nagpur,Nagpur,"North Indian, Chinese, Continental",5.0,3,1000,Maharashtra


8.2 Defining Function to Return Votes in a Locality

In [244]:
def total_votes(locality):
    df_x = df[df['Locality'] == locality]
    total_votes = df_x['Votes'].sum()
    return total_votes

In [245]:
#     Adding attributes to the localities dataframe
df_location = pd.DataFrame()
rating_list = []
cost_list = []
location_rating_list = []
location_cost_list = []
for index, location in enumerate(karnataka_locations_df['Location']):
    df_location = df[df['Locality'] == location]

#     Calculating average rating

    for rating in df_location["Rating"]:
        rating_list.append(rating)
    avg_rating = sum(rating_list)/len(rating_list)
    location_rating_list.append(avg_rating)
    
#     Calculating average cost

    for cost in df_location["Cost"]:
        cost_list.append(cost)
    

NameError: name 'karnataka_locations_df' is not defined

In [None]:
df.tail()