## Importing the libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.io as pio
pio.renderers.default='svg'
import warnings
warnings.filterwarnings("ignore")

## Importing the dataset

In [4]:
raw_df = pd.read_csv('Zomato_Chennai_Dataset.csv')

In [5]:
raw_df.head()

Unnamed: 0,Zomato URL,Name of Restaurant,Address,Location,Cuisine,Top Dishes,Price for 2,Dining Rating,Dining Rating Count,Delivery Rating,Delivery Rating Count,Features
0,https://www.zomato.com/chennai/yaa-mohaideen-b...,Yaa Mohaideen Briyani,"336 & 338, Main Road, Pallavaram, Chennai",Pallavaram,['Biryani'],"['Bread Halwa', ' Chicken 65', ' Mutton Biryan...",500.0,4.3,1500,4.3,9306,"['Home Delivery', 'Indoor Seating']"
1,https://www.zomato.com/chennai/sukkubhai-biriy...,Sukkubhai Biriyani,"New 14, Old 11/3Q, Railway Station Road, MKN ...",Alandur,"['Biryani', ' North Indian', ' Mughlai', ' Des...","['Beef Biryani', ' Beef Fry', ' Paratha', ' Pa...",1000.0,4.4,3059,4.1,39200,"['Home Delivery', 'Free Parking', 'Table booki..."
2,https://www.zomato.com/chennai/ss-hyderabad-bi...,SS Hyderabad Biryani,"98/339, Arcot Road, Opposite Gokulam Chit Fun...",Kodambakkam,"['Biryani', ' North Indian', ' Chinese', ' Ara...","['Brinjal Curry', ' Tandoori Chicken', ' Chick...",500.0,4.3,1361,4.4,10500,"['Home Delivery', 'Indoor Seating']"
3,https://www.zomato.com/chennai/kfc-perambur,KFC,"10, Periyar Nagar, 70 Feet Road, Near Sheeba ...",Perambur,"['Burger', ' Fast Food', ' Finger Food', ' Bev...",['Zinger Burger'],500.0,4.0,1101,4.0,11200,"['Home Delivery', 'Free Parking', 'Card Upon D..."
4,https://www.zomato.com/chennai/tasty-kitchen-p...,Tasty Kitchen,"135B, SRP Colony, Peravallur, Near Perambur, ...",Perambur,"['Chinese', ' Biryani', ' North Indian', ' Che...","['Mutton Biryani', ' Chicken Rice', ' Tomato R...",450.0,4.2,617,4.1,22400,"['Home Delivery', 'Indoor Seating']"


## Getting Basic Information about the Dataset

In [7]:
raw_df.shape

(12032, 12)

In [8]:
raw_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12032 entries, 0 to 12031
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Zomato URL             12032 non-null  object 
 1   Name of Restaurant     12032 non-null  object 
 2   Address                12032 non-null  object 
 3   Location               12032 non-null  object 
 4   Cuisine                12032 non-null  object 
 5   Top Dishes             12032 non-null  object 
 6   Price for 2            12032 non-null  float64
 7   Dining Rating          6681 non-null   float64
 8   Dining Rating Count    11812 non-null  object 
 9   Delivery Rating        6181 non-null   float64
 10  Delivery Rating Count  11812 non-null  object 
 11  Features               12032 non-null  object 
dtypes: float64(3), object(9)
memory usage: 1.1+ MB


In [9]:
raw_df.describe()

Unnamed: 0,Price for 2,Dining Rating,Delivery Rating
count,12032.0,6681.0,6181.0
mean,397.61137,3.387756,3.80529
std,332.045938,0.558181,0.374213
min,40.0,0.3,0.3
25%,200.0,3.1,3.6
50%,300.0,3.5,3.9
75%,450.0,3.8,4.0
max,5000.0,4.9,4.7


## Cleaning the Dataset

In [11]:
raw_df.isnull().sum() 

Zomato URL                  0
Name of Restaurant          0
Address                     0
Location                    0
Cuisine                     0
Top Dishes                  0
Price for 2                 0
Dining Rating            5351
Dining Rating Count       220
Delivery Rating          5851
Delivery Rating Count     220
Features                    0
dtype: int64

In [12]:
# Dropping columns which are not required for further analusis

raw_df.drop(['Dining Rating','Delivery Rating','Zomato URL'],axis=1,inplace=True)

In [13]:
raw_df.head()

Unnamed: 0,Name of Restaurant,Address,Location,Cuisine,Top Dishes,Price for 2,Dining Rating Count,Delivery Rating Count,Features
0,Yaa Mohaideen Briyani,"336 & 338, Main Road, Pallavaram, Chennai",Pallavaram,['Biryani'],"['Bread Halwa', ' Chicken 65', ' Mutton Biryan...",500.0,1500,9306,"['Home Delivery', 'Indoor Seating']"
1,Sukkubhai Biriyani,"New 14, Old 11/3Q, Railway Station Road, MKN ...",Alandur,"['Biryani', ' North Indian', ' Mughlai', ' Des...","['Beef Biryani', ' Beef Fry', ' Paratha', ' Pa...",1000.0,3059,39200,"['Home Delivery', 'Free Parking', 'Table booki..."
2,SS Hyderabad Biryani,"98/339, Arcot Road, Opposite Gokulam Chit Fun...",Kodambakkam,"['Biryani', ' North Indian', ' Chinese', ' Ara...","['Brinjal Curry', ' Tandoori Chicken', ' Chick...",500.0,1361,10500,"['Home Delivery', 'Indoor Seating']"
3,KFC,"10, Periyar Nagar, 70 Feet Road, Near Sheeba ...",Perambur,"['Burger', ' Fast Food', ' Finger Food', ' Bev...",['Zinger Burger'],500.0,1101,11200,"['Home Delivery', 'Free Parking', 'Card Upon D..."
4,Tasty Kitchen,"135B, SRP Colony, Peravallur, Near Perambur, ...",Perambur,"['Chinese', ' Biryani', ' North Indian', ' Che...","['Mutton Biryani', ' Chicken Rice', ' Tomato R...",450.0,617,22400,"['Home Delivery', 'Indoor Seating']"


## Removing the Null Records

In [15]:
# Checking for Null Records

raw_df.isnull().sum()

Name of Restaurant         0
Address                    0
Location                   0
Cuisine                    0
Top Dishes                 0
Price for 2                0
Dining Rating Count      220
Delivery Rating Count    220
Features                   0
dtype: int64

In [16]:
# Checking for a null row

raw_df[raw_df['Dining Rating Count'].isnull()]

Unnamed: 0,Name of Restaurant,Address,Location,Cuisine,Top Dishes,Price for 2,Dining Rating Count,Delivery Rating Count,Features
1603,Erode Amman Mess,"163, LB Road, SMS Building, Thiruvanmiyur, Ch...",Thiruvanmiyur,"['Chettinad', ' South Indian', ' Kerala', ' Ch...",Invalid,600.0,,,"['Home Delivery', 'Indoor Seating', 'Table boo..."
2159,The Ultimate Brownie And Chocolate Place,Delivery Only,Sholinganallur,"['Bakery', ' Desserts']",Invalid,350.0,,,"['Delivery Only', 'Desserts and Bakes']"
2419,Amudha Aunty's kitchen,Delivery Only,Sholinganallur,"['Tamil', ' South Indian']",Invalid,250.0,,,"['Breakfast', 'Delivery Only']"
2537,TN 03 CAFE,Delivery Only,Tiruvottiyur,"['Sandwich', ' Fast Food', ' Ice Cream', ' Bev...",Invalid,200.0,,,['Delivery Only']
2542,Maggi Point,"10, Manimegalai Street, New Perungalathur, Ta...",Tambaram,"['Fast Food', ' Street Food', ' Beverages']",['Masala Maggi'],500.0,,,"['Home Delivery', 'Indoor Seating']"
...,...,...,...,...,...,...,...,...,...
12027,CK's Sandwiches,Delivery Only,Porur,['Sandwich'],Invalid,350.0,,,"['Breakfast', 'Delivery Only']"
12028,CK's Sandwiches,Delivery Only,Kolathur,['Sandwich'],Invalid,350.0,,,"['Breakfast', 'Delivery Only']"
12029,CK's Sandwiches,Delivery Only,Anna Nagar East,['Sandwich'],Invalid,350.0,,,"['Breakfast', 'Delivery Only']"
12030,CK's Sandwiches,Delivery Only,Ramapuram,['Sandwich'],Invalid,350.0,,,"['Breakfast', 'Delivery Only']"


In [17]:
# 1. Drop rows where both ratings are missing
raw_df = raw_df[~(raw_df['Dining Rating Count'].isnull() & raw_df['Delivery Rating Count'].isnull())]

# 2. Reset index 
raw_df.reset_index(drop=True, inplace=True)

In [18]:
# Confirming all the null records are correct
raw_df.isnull().sum()

Name of Restaurant       0
Address                  0
Location                 0
Cuisine                  0
Top Dishes               0
Price for 2              0
Dining Rating Count      0
Delivery Rating Count    0
Features                 0
dtype: int64

In [19]:
raw_df.columns  # To see all column names

Index(['Name of Restaurant', 'Address', 'Location', 'Cuisine', 'Top Dishes',
       'Price for 2', 'Dining Rating Count', 'Delivery Rating Count',
       'Features'],
      dtype='object')

In [20]:
# Converting selected columns to numeric type
cols_to_convert = ['Price for 2', 'Dining Rating Count', 'Delivery Rating Count']
for col in cols_to_convert:
    raw_df[col] = pd.to_numeric(raw_df[col], errors='coerce')

In [21]:
raw_df.dtypes

Name of Restaurant        object
Address                   object
Location                  object
Cuisine                   object
Top Dishes                object
Price for 2              float64
Dining Rating Count      float64
Delivery Rating Count    float64
Features                  object
dtype: object

## Working Location Column

In [23]:
raw_df['Location'].value_counts()

Location
Porur                                  409
Velachery                              365
Ambattur                               326
T. Nagar                               308
Perungudi                              284
                                      ... 
Keys Hotel, Thiruvanmiyur                1
Jade Resorts, East Coast Road (ECR)      1
Novotel Chennai, OMR                     1
Hotel Blue Diamond, Kilpauk              1
Chandra Metro Mall, Virugambakkam        1
Name: count, Length: 268, dtype: int64

## Removing Duplicate Records

In [25]:
# Finding all the duplicte rows
raw_df [raw_df.duplicated()]

Unnamed: 0,Name of Restaurant,Address,Location,Cuisine,Top Dishes,Price for 2,Dining Rating Count,Delivery Rating Count,Features
11298,Parantha Pundit,"44, Vanigar Street, Kovalam, Chennai",Kovalam,"['North Indian', ' Beverages']",Invalid,200.0,,,['Delivery Only']
11496,Hyku Foods,"11/597, Arunachalam Avenue, Baraniputhur Road...",Porur,"['Healthy Food', ' South Indian']",Invalid,300.0,,,"['Breakfast', 'Home Delivery']"
11530,Wangs Express,Delivery Only,Nandanam,"['Chinese', ' Beverages']",Invalid,600.0,,,['Delivery Only']
11531,Wangs Express,"Plot 4/553, MIG Type, Mogappair, Chennai",Mogappair,"['Chinese', ' Beverages']",Invalid,600.0,,,['Delivery Only']
11788,Sardarji Kulfi,Delivery Only,Anna Nagar East,['Ice Cream'],Invalid,150.0,,,['Delivery Only']


## Copying the Cleaned data into a new DataFrame

In [27]:
zomato_df = raw_df.copy()

In [28]:
zomato_df.head()

Unnamed: 0,Name of Restaurant,Address,Location,Cuisine,Top Dishes,Price for 2,Dining Rating Count,Delivery Rating Count,Features
0,Yaa Mohaideen Briyani,"336 & 338, Main Road, Pallavaram, Chennai",Pallavaram,['Biryani'],"['Bread Halwa', ' Chicken 65', ' Mutton Biryan...",500.0,1500.0,9306.0,"['Home Delivery', 'Indoor Seating']"
1,Sukkubhai Biriyani,"New 14, Old 11/3Q, Railway Station Road, MKN ...",Alandur,"['Biryani', ' North Indian', ' Mughlai', ' Des...","['Beef Biryani', ' Beef Fry', ' Paratha', ' Pa...",1000.0,3059.0,39200.0,"['Home Delivery', 'Free Parking', 'Table booki..."
2,SS Hyderabad Biryani,"98/339, Arcot Road, Opposite Gokulam Chit Fun...",Kodambakkam,"['Biryani', ' North Indian', ' Chinese', ' Ara...","['Brinjal Curry', ' Tandoori Chicken', ' Chick...",500.0,1361.0,10500.0,"['Home Delivery', 'Indoor Seating']"
3,KFC,"10, Periyar Nagar, 70 Feet Road, Near Sheeba ...",Perambur,"['Burger', ' Fast Food', ' Finger Food', ' Bev...",['Zinger Burger'],500.0,1101.0,11200.0,"['Home Delivery', 'Free Parking', 'Card Upon D..."
4,Tasty Kitchen,"135B, SRP Colony, Peravallur, Near Perambur, ...",Perambur,"['Chinese', ' Biryani', ' North Indian', ' Che...","['Mutton Biryani', ' Chicken Rice', ' Tomato R...",450.0,617.0,22400.0,"['Home Delivery', 'Indoor Seating']"


## Performing Exploratory Data Analysis

In [30]:
import plotly.express as px

fig = px.histogram(zomato_df,
                   x='Location',color='Cuisine',
                   title='Number of Restaurants in Each Location')

fig.update_xaxes(categoryorder='total descending', tickangle=-45)

fig.show(renderer="colab")

In [31]:
zomato_df = zomato_df.groupby(by=['Location', 'Cuisine'])['Price for 2'].mean().reset_index() 
zomato_df.head() 

Unnamed: 0,Location,Cuisine,Price for 2
0,InterContinental Chennai Mahabalipuram Resort...,['Finger Food'],1500.0
1,Abhiramapuram,"['Bakery', ' Desserts']",300.0
2,Abhiramapuram,"['Beverages', ' Juices']",250.0
3,Abhiramapuram,"['Fast Food', ' Pizza', ' Sandwich', ' Beverag...",200.0
4,Abhiramapuram,"['Fast Food', ' Sandwich', ' Beverages']",300.0


In [61]:
fig = px.scatter(zomato_df, x="Location", y="Price for 2", color="Cuisine", symbol="Cuisine",  
           title=' Avg Price Distibution of High rated restaurant for each Cuisine Type').update_traces(marker_size=3)
 
fig.show(renderer="colab")

In [62]:
fingerfood_df = zomato_df[zomato_df['Cuisine'].str.contains('Finger Food')] 
fingerfood_df.sort_values(by='Price for 2',ascending=False).head(10) 

Unnamed: 0,Location,Cuisine,Price for 2
6817,"Taj Club House, Thousand Lights",['Finger Food'],5000.0
992,Anna Salai,['Finger Food'],3500.0
7140,"The Park, Nungambakkam",['Finger Food'],3250.0
2354,"Hyatt Regency, Teynampet",['Finger Food'],3200.0
8273,"WelcomHotel, RK Salai (Cathedral Road)",['Finger Food'],3000.0
5782,"Radisson Blu Temple Bay, Mamallapuram",['Finger Food'],3000.0
2363,"ITC Grand Chola, Guindy",['Finger Food'],3000.0
7135,"The Leela Palace, MRC Nagar",['Finger Food'],3000.0
7113,"The Accord Metropolitan, T. Nagar","['Finger Food', ' North Indian']",3000.0
2276,"Hablis Hotel, Guindy","['Irish', ' Finger Food']",2500.0


In [65]:
price_rating_df = zomato_df.groupby(['Cuisine','Location'])['Price for 2'].mean().reset_index()
price_rating_df

Unnamed: 0,Cuisine,Location,Price for 2
0,"['American', ' Andhra', ' Asian', ' Beverages'...",Kovalam,800.0
1,"['American', ' BBQ']",Kodambakkam,400.0
2,"['American', ' Continental', ' European', ' It...",Besant Nagar,700.0
3,"['American', ' Continental', ' Italian', ' Nor...",Anna Nagar East,800.0
4,"['American', ' Continental']",Anna Nagar East,400.0
...,...,...,...
8320,"['Tibetan', ' Momos']",Thuraipakkam,400.0
8321,['Tibetan'],Nungambakkam,200.0
8322,"['Turkish', ' Kebab']",Nungambakkam,500.0
8323,"['Wraps', ' Fast Food']",Ambattur,300.0


In [71]:
fig = px.line(price_rating_df, y="Price for 2", x="Location",color='Cuisine') 
fig.show(renderer="colab")

In [73]:
Location_price_df = zomato_df.groupby(['Location'])['Price for 2'].mean().reset_index() 
Location_price_df 

Unnamed: 0,Location,Price for 2
0,InterContinental Chennai Mahabalipuram Resort...,1500.000000
1,Abhiramapuram,228.571429
2,"Abu Sarovar Portico, Egmore",1000.000000
3,Adambakkam,303.936342
4,Adyar,496.779025
...,...,...
263,"Vivira Mall, Navallur",500.000000
264,Washermenpet,325.469055
265,"WelcomHotel, RK Salai (Cathedral Road)",2075.000000
266,West Mambalam,273.496377


In [77]:
fig = px.scatter(Location_price_df, x="Location", y="Price for 2").update_traces(marker_size=5) 
fig.show(renderer="colab")

In [79]:
max_price = zomato_df['Price for 2'].max() 
one_fourth_price = max_price/4 
one_fourth_price 

1250.0

In [83]:
 # Finding list of restaurants that have price less than and equal to 1/4th of the max price i.e Finding Cheap Restaurant
 
aff_rest_df = zomato_df[['Price for 2', 'Cuisine', 'Location']] 
aff_rest_df = aff_rest_df[aff_rest_df['Price for 2'] <= 1250] 
aff_rest_df.sort_values(by='Price for 2', inplace=True) 
aff_rest_df 

Unnamed: 0,Price for 2,Cuisine,Location
8095,50.0,"['Beverages', ' Juices']",Vepery
4396,50.0,"['Beverages', ' Juices']","OMR Food Street, Kandanchavadi"
8296,60.0,['Healthy Food'],West Mambalam
7173,100.0,"['Beverages', ' Cafe', ' Desserts', ' Fast Foo...",Thiruvallur
8209,100.0,['Beverages'],Washermenpet
...,...,...,...
5856,1200.0,"['Chinese', ' Thai']",Ramapuram
5070,1200.0,"['Biryani', ' North Indian', ' BBQ', ' South I...",Perungudi
6750,1200.0,"['North Indian', ' Chinese', ' Italian']",T. Nagar
7701,1200.0,['Japanese'],Vadapalani


In [85]:
# Finding the highest price list of restaurants 
 
highrate_rest_df = zomato_df[['Price for 2', 'Cuisine', 'Location']] 
highrate_rest_df = highrate_rest_df[highrate_rest_df['Price for 2'] >= 1250] 
highrate_rest_df.sort_values(by='Price for 2', inplace=True) 
highrate_rest_df

Unnamed: 0,Price for 2,Cuisine,Location
2368,1300.0,"['Seafood', ' Chinese', ' North Indian', ' Con...","Ideal Beach Resort, East Coast Road (ECR)"
2332,1300.0,"['Chettinad', ' Bar Food']","Hotel Ranjith, Nungambakkam"
7158,1300.0,"['Continental', ' European', ' Chinese', ' Sea...","The Savera Hotel, RK Salai (Cathedral Road)"
6406,1300.0,"['Thai', ' Chinese', ' Asian', ' Momos', ' Sou...",Semmancheri
2362,1300.0,"['Desserts', ' Ice Cream']","ITC Grand Chola, Guindy"
...,...,...,...
5788,4000.0,"['Kebab', ' Mughlai', ' Biryani']","Radisson Blu, GST Road"
2364,4500.0,"['North Indian', ' Mughlai']","ITC Grand Chola, Guindy"
2365,4500.0,"['South Indian', ' Chinese', ' Desserts']","ITC Grand Chola, Guindy"
5786,4800.0,"['Continental', ' Asian', ' Mexican', ' Italian']","Radisson Blu, GST Road"
