**Problem Statement**

Zomatoe is an online food delivery company delivering food across over 20 countries.

It is estimated that approximately 6 orders will happen every seconds.

So, they need to analyse their data to come out with insights that will increase sales.

In [62]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### so that i dont have warnings
from warnings import filterwarnings
filterwarnings('ignore')

In [63]:
#code to mout my googledrive since the dataset is there
from google.colab import drive
drive.mount("/content/gdrive")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [64]:
#reading in my dataset
data = pd.read_csv("/content/gdrive/MyDrive/Zomato.csv")

# **Data Preprocessing**

In [65]:
data.head(5)

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari
3,https://www.zomato.com/bangalore/addhuri-udupi...,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7/5,88,+91 9620009302,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari
4,https://www.zomato.com/bangalore/grand-village...,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,No,No,3.8/5,166,+91 8026612447\r\n+91 9901210005,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari


In [66]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51717 entries, 0 to 51716
Data columns (total 17 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   url                          51717 non-null  object
 1   address                      51717 non-null  object
 2   name                         51717 non-null  object
 3   online_order                 51717 non-null  object
 4   book_table                   51717 non-null  object
 5   rate                         43942 non-null  object
 6   votes                        51717 non-null  int64 
 7   phone                        50509 non-null  object
 8   location                     51696 non-null  object
 9   rest_type                    51490 non-null  object
 10  dish_liked                   23639 non-null  object
 11  cuisines                     51672 non-null  object
 12  approx_cost(for two people)  51371 non-null  object
 13  reviews_list                 51

In [67]:
data.shape

(51717, 17)

In [68]:
#checkig for missing data
data.isna().sum()

url                                0
address                            0
name                               0
online_order                       0
book_table                         0
rate                            7775
votes                              0
phone                           1208
location                          21
rest_type                        227
dish_liked                     28078
cuisines                          45
approx_cost(for two people)      346
reviews_list                       0
menu_item                          0
listed_in(type)                    0
listed_in(city)                    0
dtype: int64

In [69]:
#since we are more concerned with the locations colums, we need to drop the null values in that column

data.dropna( subset=["location"], inplace= True)

In [70]:
data.shape

(51696, 17)

In [71]:
#to know the number of unique locations we have in the data
len(data["location"].unique())

93

In [72]:
locations = pd.DataFrame()

In [73]:
locations["Name"] = data["location"].unique()

In [74]:
locations.head()

Unnamed: 0,Name
0,Banashankari
1,Basavanagudi
2,Mysore Road
3,Jayanagar
4,Kumaraswamy Layout


# **Extracting the Coordinates From Text File**

In [75]:
#geopy(geographical Python) helps us extract the lat and long

!pip install geopy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [76]:
#nominatim helps to figure out the lat and log with respect to each location

from geopy.geocoders import Nominatim

In [77]:
#initialising the nominatim

geolocator = Nominatim(user_agent= "app")

In [78]:
#function to get our lat and long through Nominatim provided y openStreetMap

lat = []
lon = []
for location in locations["Name"]:
    location = geolocator.geocode(location, timeout =60)

    if location is None:
        lat.append(np.nan)
        lon.append(np.nan)
    else:
        lat.append(location.latitude)
        lon.append(location.longitude)

In [79]:
locations["lat"] = lat
locations["lon"] = lon

In [80]:
locations.head()

Unnamed: 0,Name,lat,lon
0,Banashankari,15.887678,75.704678
1,Basavanagudi,12.941726,77.575502
2,Mysore Road,12.946662,77.53009
3,Jayanagar,27.643927,83.052805
4,Kumaraswamy Layout,12.908149,77.555318


In [81]:
locations.shape

(93, 3)

In [82]:
locations.to_csv("Zomatoe coordinate output.csv", index = False)

We have found the latitude and longitude of each location listed in the dataset using geopy.

In [83]:
#weeed to kowthe value counts for each location. so, we create another dataframe
R_locations = pd.DataFrame(data['location'].value_counts().reset_index())
R_locations.head(4)

Unnamed: 0,index,location
0,BTM,5124
1,HSR,2523
2,Koramangala 5th Block,2504
3,JP Nagar,2235


In [84]:
#changing the column names
R_locations.columns=['Name','count']
R_locations.head(5)

Unnamed: 0,Name,count
0,BTM,5124
1,HSR,2523
2,Koramangala 5th Block,2504
3,JP Nagar,2235
4,Whitefield,2144


In [85]:
locations.shape

(93, 3)

In [86]:
R_locations.shape

(93, 2)

In [87]:
#merging the R_locations dataframe with the locations data frame

Resturant_locations= R_locations.merge(locations, on= "Name", how= "left")
Resturant_locations.head(5)

Unnamed: 0,Name,count,lat,lon
0,BTM,5124,45.954851,-112.496595
1,HSR,2523,18.1475,41.538889
2,Koramangala 5th Block,2504,12.934843,77.618977
3,JP Nagar,2235,12.265594,76.64654
4,Whitefield,2144,53.553368,-2.296902


In [88]:
Resturant_locations.isna().sum()

Name     0
count    0
lat      1
lon      1
dtype: int64

In [89]:
Resturant_locations.dropna( subset=["lat", "lon"], inplace= True)

In [90]:
Resturant_locations.shape

(92, 4)

# **SPATIAL ANALYSIS**

**CREATING A BASEMAP**

In [91]:
#folium is a library used formapping and visulaisation. it helps to create an interactive map
!pip install folium

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [92]:
#creating a funtion that helps us generate the
def generateBaseMap(default_location=[12.97, 77.59], default_zoom_start=12):
  base_map = folium.Map(location=default_location, zoom_start=default_zoom_start)
  return base_map

In [93]:
import folium
basemap=generateBaseMap()
basemap

**Creating a Heatmap**

In [94]:
from folium.plugins import HeatMap

In [95]:
HeatMap(Resturant_locations[['lat','lon','count']],zoom=20,radius=15, gradient={0.2: 'blue', 0.4: 'green', 1: 'red'}).add_to(basemap)

<folium.plugins.heat_map.HeatMap at 0x7fb8509f0d90>

In [96]:
basemap

From the above, we can see that there are more resturants in the central part of Bangaluru.

**Marker Cluster Analysis**

In [97]:
from folium.plugins import FastMarkerCluster

In [98]:
FastMarkerCluster(data=Resturant_locations[['lat','lon','count']].values.tolist()).add_to(basemap)

basemap

**Looking at resturants with average ratings.**

In [99]:
#checking for unique rating
data["rate"].unique()

array(['4.1/5', '3.8/5', '3.7/5', '3.6/5', '4.6/5', '4.0/5', '4.2/5',
       '3.9/5', '3.1/5', '3.0/5', '3.2/5', '3.3/5', '2.8/5', '4.4/5',
       '4.3/5', 'NEW', '2.9/5', '3.5/5', nan, '2.6/5', '3.8 /5', '3.4/5',
       '4.5/5', '2.5/5', '2.7/5', '4.7/5', '2.4/5', '2.2/5', '2.3/5',
       '3.4 /5', '-', '3.6 /5', '4.8/5', '3.9 /5', '4.2 /5', '4.0 /5',
       '4.1 /5', '3.7 /5', '3.1 /5', '2.9 /5', '3.3 /5', '2.8 /5',
       '3.5 /5', '2.7 /5', '2.5 /5', '3.2 /5', '2.6 /5', '4.5 /5',
       '4.3 /5', '4.4 /5', '4.9/5', '2.1/5', '2.0/5', '1.8/5', '4.6 /5',
       '4.9 /5', '3.0 /5', '4.8 /5', '2.3 /5', '4.7 /5', '2.4 /5',
       '2.1 /5', '2.2 /5', '2.0 /5', '1.8 /5'], dtype=object)

In [100]:
#dropping the nana value
data.dropna(axis= 0, subset= ["rate"], inplace = True)

In [101]:
#function to split the ratings

def split(x):
  return x.split("/")[0]

In [102]:
data["rating"] = data["rate"].apply(split)
data

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city),rating
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari,4.1
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari,4.1
2,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari,3.8
3,https://www.zomato.com/bangalore/addhuri-udupi...,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7/5,88,+91 9620009302,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari,3.7
4,https://www.zomato.com/bangalore/grand-village...,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,No,No,3.8/5,166,+91 8026612447\r\n+91 9901210005,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari,3.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51709,https://www.zomato.com/bangalore/the-farm-hous...,"136, SAP Labs India, KIADB Export Promotion In...",The Farm House Bar n Grill,No,No,3.7 /5,34,+91 9980121279\n+91 9900240646,Whitefield,"Casual Dining, Bar",,"North Indian, Continental",800,"[('Rated 4.0', 'RATED\n Ambience- Big and spa...",[],Pubs and bars,Whitefield,3.7
51711,https://www.zomato.com/bangalore/bhagini-2-whi...,"139/C1, Next To GR Tech Park, Pattandur Agraha...",Bhagini,No,No,2.5 /5,81,080 65951222,Whitefield,"Casual Dining, Bar","Biryani, Andhra Meal","Andhra, South Indian, Chinese, North Indian",800,"[('Rated 4.0', 'RATED\n A fine place to chill...",[],Pubs and bars,Whitefield,2.5
51712,https://www.zomato.com/bangalore/best-brews-fo...,"Four Points by Sheraton Bengaluru, 43/3, White...",Best Brews - Four Points by Sheraton Bengaluru...,No,No,3.6 /5,27,080 40301477,Whitefield,Bar,,Continental,1500,"[('Rated 5.0', ""RATED\n Food and service are ...",[],Pubs and bars,Whitefield,3.6
51715,https://www.zomato.com/bangalore/chime-sherato...,Sheraton Grand Bengaluru Whitefield Hotel & Co...,Chime - Sheraton Grand Bengaluru Whitefield Ho...,No,Yes,4.3 /5,236,080 49652769,"ITPL Main Road, Whitefield",Bar,"Cocktails, Pizza, Buttermilk",Finger Food,2500,"[('Rated 4.0', 'RATED\n Nice and friendly pla...",[],Pubs and bars,Whitefield,4.3


In [103]:
data["rating"].unique()

array(['4.1', '3.8', '3.7', '3.6', '4.6', '4.0', '4.2', '3.9', '3.1',
       '3.0', '3.2', '3.3', '2.8', '4.4', '4.3', 'NEW', '2.9', '3.5',
       '2.6', '3.8 ', '3.4', '4.5', '2.5', '2.7', '4.7', '2.4', '2.2',
       '2.3', '3.4 ', '-', '3.6 ', '4.8', '3.9 ', '4.2 ', '4.0 ', '4.1 ',
       '3.7 ', '3.1 ', '2.9 ', '3.3 ', '2.8 ', '3.5 ', '2.7 ', '2.5 ',
       '3.2 ', '2.6 ', '4.5 ', '4.3 ', '4.4 ', '4.9', '2.1', '2.0', '1.8',
       '4.6 ', '4.9 ', '3.0 ', '4.8 ', '2.3 ', '4.7 ', '2.4 ', '2.1 ',
       '2.2 ', '2.0 ', '1.8 '], dtype=object)

In [104]:
#to rempve the New and hyphen
data.replace("NEW", 0, inplace = True)
data.replace("-", 0, inplace = True)

In [105]:
data["rating"].unique()

array(['4.1', '3.8', '3.7', '3.6', '4.6', '4.0', '4.2', '3.9', '3.1',
       '3.0', '3.2', '3.3', '2.8', '4.4', '4.3', 0, '2.9', '3.5', '2.6',
       '3.8 ', '3.4', '4.5', '2.5', '2.7', '4.7', '2.4', '2.2', '2.3',
       '3.4 ', '3.6 ', '4.8', '3.9 ', '4.2 ', '4.0 ', '4.1 ', '3.7 ',
       '3.1 ', '2.9 ', '3.3 ', '2.8 ', '3.5 ', '2.7 ', '2.5 ', '3.2 ',
       '2.6 ', '4.5 ', '4.3 ', '4.4 ', '4.9', '2.1', '2.0', '1.8', '4.6 ',
       '4.9 ', '3.0 ', '4.8 ', '2.3 ', '4.7 ', '2.4 ', '2.1 ', '2.2 ',
       '2.0 ', '1.8 '], dtype=object)

In [106]:
#converting the rating column to interger
data["rating"]= pd.to_numeric(data["rating"])

In [107]:
#to find the average rating of each location
data.groupby("location")["rating"].mean().sort_values(ascending = False)

location
Lavelle Road             4.042886
St. Marks Road           4.017201
Koramangala 3rd Block    3.978756
Sankey Road              3.965385
Church Street            3.963091
                           ...   
Electronic City          3.041909
Bommanahalli             2.926752
Hebbal                   2.880000
North Bangalore          2.385714
West Bangalore           2.020000
Name: rating, Length: 92, dtype: float64

In [108]:
avg_rating = data.groupby("location")["rating"].mean().sort_values(ascending = False).values

In [109]:
location_list = data.groupby("location")["rating"].mean().sort_values(ascending = False).index

In [110]:
location_list.shape

(92,)

In [111]:
rating= pd.DataFrame()

In [125]:
lat2 = []
lon2 = []
for loc in location_list:
  loc = geolocator.geocode(loc, timeout =60)

  if loc is not None:
    lat2.append(loc.latitude)
    lon2.append(loc.longitude)

In [131]:
len(avg_rating)

92

In [133]:
min_length = min(len(location_list), len(lat2), len(lon2), len(avg_rating))

# Create a DataFrame with the shortest length
rating = pd.DataFrame({'location': location_list[:min_length],
                       'lat': lat2[:min_length],
                       'lon': lon2[:min_length],
                       'avg_rating': avg_rating[:min_length]})

# Pad longer columns with NaN values
rating = rating.reindex(range(min_length))

# Output the resulting DataFrame
rating.head()

Unnamed: 0,location,lat,lon,avg_rating
0,Lavelle Road,40.765284,-76.373824,4.042886
1,St. Marks Road,54.133938,-4.60971,4.017201
2,Koramangala 3rd Block,12.928292,77.625403,3.978756
3,Sankey Road,38.780108,-121.505644,3.965385
4,Church Street,40.715353,-74.007621,3.963091


In [136]:
HeatMap(rating[["lat", "lon", "avg_rating"]]).add_to(basemap)
basemap

From theabove,we cansee thattheceter area has thehigest density. Meaning the zone has the maximum restuarats with higest ratings.