# 1. Importing Libraries

In [4]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import os
import folium
import json

# 2. Importing Data

In [7]:
# Importing data set zomato_cleaned.csv
df_zomato_cleaned = pd.read_csv(r'/Users/I589637/Documents/Zomato Analysis/02 - Data/Prepared Data/zomato_cleaned.csv', index_col = False)

In [9]:
# This command propts matplotlib visuals to appear in the notebook 
%matplotlib inline

In [11]:
# Import ".json" file for Bengaluru
bengaluru_geo = r'/Users/I589637/Documents/Zomato Analysis/02 - Data/Prepared Data/BBMP.geojson'

# https://github.com/datameet/Municipal_Spatial_Data/blob/master/Bangalore/BBMP.geojson

In [13]:
# Look at the JSON file contents

f = open(r'/Users/I589637/Documents/Zomato Analysis/02 - Data/Prepared Data/BBMP.geojson',)
  
# returns JSON object asa dictionary
data = json.load(f)
  
# Iterating through the json list
for i in data['features']:
    print(i)

{'type': 'Feature', 'properties': {'KGISWardID': 4878, 'KGISWardCode': '2003001', 'LGD_WardCode': 1303139, 'KGISWardNo': '1', 'KGISWardName': 'Kempegowda Ward', 'KGISTownCode': '2003'}, 'geometry': {'type': 'Polygon', 'coordinates': [[[77.6154462, 13.1302802], [77.6154925, 13.1304035], [77.6155631, 13.1305913], [77.6157095, 13.1309813], [77.6157824, 13.1312134], [77.6157106, 13.1312004], [77.615692, 13.131197], [77.615531, 13.131168], [77.6153992, 13.1312877], [77.6153732, 13.1313113], [77.6153552, 13.1313276], [77.615321, 13.1313585], [77.6152925, 13.1313843], [77.615275, 13.1314001], [77.6151037, 13.1314614], [77.6150778, 13.1314706], [77.615071, 13.1314731], [77.6148439, 13.1315544], [77.6146395, 13.1316275], [77.6145838, 13.1316623], [77.614416, 13.1317669], [77.614284, 13.1318492], [77.6142494, 13.1318708], [77.6140883, 13.1319717], [77.6126413, 13.1328783], [77.6124821, 13.1329767], [77.6124352, 13.1330057], [77.6124335, 13.1330059], [77.6116829, 13.1331098], [77.6109345, 13.1332

# 3. Cleaning & Wrangling Data

In [83]:
# create a narrowed down subset for the geo analysis
df_zomato_geo = df_zomato_cleaned[
    [
        "name",
        "rate_float",
        "votes",
        "approx_cost_2ppl",
        "location",
        "listed_in_city",
        "rest_type",
        "book_table",
        "online_order",
        "cuisines",
    ]
].copy()

In [85]:
# rename column rate_float
df_zomato_geo.rename(columns={"rate_float": "rate"}, inplace=True)

In [87]:
df_zomato_geo

Unnamed: 0,name,rate,votes,approx_cost_2ppl,location,listed_in_city,rest_type,book_table,online_order,cuisines
0,Jalsa,4.100000,775,800,Banashankari,Banashankari,Casual Dining,Yes,Yes,"North Indian, Mughlai, Chinese"
1,Spice Elephant,4.100000,787,800,Banashankari,Banashankari,Casual Dining,No,Yes,"Chinese, North Indian, Thai"
2,San Churro Cafe,3.800000,918,800,Banashankari,Banashankari,"Cafe, Casual Dining",No,Yes,"Cafe, Mexican, Italian"
3,Addhuri Udupi Bhojana,3.700000,88,300,Banashankari,Banashankari,Quick Bites,No,No,"South Indian, North Indian"
4,Grand Village,3.800000,166,600,Basavanagudi,Banashankari,Casual Dining,No,No,"North Indian, Rajasthani"
...,...,...,...,...,...,...,...,...,...,...
51143,Best Brews - Four Points by Sheraton Bengaluru...,3.600000,27,1500,Whitefield,Whitefield,Bar,No,No,Continental
51144,Vinod Bar And Restaurant,3.700449,0,600,Whitefield,Whitefield,Bar,No,No,Finger Food
51145,Plunge - Sheraton Grand Bengaluru Whitefield H...,3.700449,0,2000,Whitefield,Whitefield,Bar,No,No,Finger Food
51146,Chime - Sheraton Grand Bengaluru Whitefield Ho...,4.300000,236,2500,"ITPL Main Road, Whitefield",Whitefield,Bar,Yes,No,Finger Food


In [89]:
# Round values in the 'rate' column to 1 decimal place
df_zomato_geo['rate'] = df_zomato_geo['rate'].round(1)

In [91]:
df_zomato_geo

Unnamed: 0,name,rate,votes,approx_cost_2ppl,location,listed_in_city,rest_type,book_table,online_order,cuisines
0,Jalsa,4.1,775,800,Banashankari,Banashankari,Casual Dining,Yes,Yes,"North Indian, Mughlai, Chinese"
1,Spice Elephant,4.1,787,800,Banashankari,Banashankari,Casual Dining,No,Yes,"Chinese, North Indian, Thai"
2,San Churro Cafe,3.8,918,800,Banashankari,Banashankari,"Cafe, Casual Dining",No,Yes,"Cafe, Mexican, Italian"
3,Addhuri Udupi Bhojana,3.7,88,300,Banashankari,Banashankari,Quick Bites,No,No,"South Indian, North Indian"
4,Grand Village,3.8,166,600,Basavanagudi,Banashankari,Casual Dining,No,No,"North Indian, Rajasthani"
...,...,...,...,...,...,...,...,...,...,...
51143,Best Brews - Four Points by Sheraton Bengaluru...,3.6,27,1500,Whitefield,Whitefield,Bar,No,No,Continental
51144,Vinod Bar And Restaurant,3.7,0,600,Whitefield,Whitefield,Bar,No,No,Finger Food
51145,Plunge - Sheraton Grand Bengaluru Whitefield H...,3.7,0,2000,Whitefield,Whitefield,Bar,No,No,Finger Food
51146,Chime - Sheraton Grand Bengaluru Whitefield Ho...,4.3,236,2500,"ITPL Main Road, Whitefield",Whitefield,Bar,Yes,No,Finger Food


In [93]:
# save out subset for later
df_zomato_geo.to_csv(os.path.join(r'/Users/I589637/Documents/Zomato Analysis/02 - Data/Prepared Data/zomato_geo.csv'))

# **How to fix matching issues between data set geo data and geoJSON** 

**geoJSON area data:**
'KGISWardName': 'Chowdeswari Ward'

**Data Set area data:**
'location'
'Banashankari'


## **1. Standardizing Area Names**
One of the biggest reasons for mismatches is inconsistencies in name formatting. To improve alignment:

- Convert all area names to lowercase and strip spaces to ensure consistency.
- Replace abbreviations or alternative spellings (e.g., "Indiranagar" vs. "Indira Nagar").

### **Python Code**
import pandas as pd
import geopandas as gpd

#### Load the dataset  
df = pd.read_csv("zomato_cleaned.csv")

#### Load the Bengaluru GeoJSON file
geojson_path = "bengaluru_wards.geojson"  # Update with correct path
gdf = gpd.read_file(geojson_path)

#### Standardize names
df["location"] = df["location"].str.lower().str.strip()
gdf["ward_name"] = gdf["ward_name"].str.lower().str.strip()


## **2. Checking for Unmatched Areas**

### **Python Code**
#### Get unique names
dataset_areas = set(df["location"].unique())
geojson_areas = set(gdf["ward_name"].unique())

#### Find names in dataset that are not in the GeoJSON
unmatched_areas = dataset_areas - geojson_areas
print("Unmatched Areas:", unmatched_areas) 


## **3. Using Fuzzy Matching for Name Alignment**

### **Python Code**
from thefuzz import process

#### Function to find the best match
def find_closest_match(area_name, possible_names):
    match, score = process.extractOne(area_name, possible_names)
    return match if score > 80 else None  # Adjust threshold if needed

#### Apply fuzzy matching
df["matched_ward"] = df["location"].apply(lambda x: find_closest_match(x, geojson_areas))

#### Check unmatched areas after fuzzy matching
unmatched_after_fuzzy = df[df["matched_ward"].isna()]["location"].unique()
print("Still unmatched after fuzzy matching:", unmatched_after_fuzzy)  


## **4. Creating a Custom GeoJSON (If Necessary)**

### **Python Code**
from shapely.geometry import Point

#### Assuming 'latitude' and 'longitude' exist in the dataset  
df["geometry"] = df.apply(lambda row: Point(row["longitude"], row["latitude"]), axis=1)

#### Convert to GeoDataFrame  
custom_gdf = gpd.GeoDataFrame(df[["location", "geometry"]].drop_duplicates(), geometry="geometry")

#### Save as a new GeoJSON file  
custom_gdf.to_file("custom_bengaluru.geojson", driver="GeoJSON")
print("Custom GeoJSON created successfully!")



In [185]:
# create the new column "ward_name_location" from location column
df_zomato_geo['ward_name_location'] = df_zomato_geo['location']

In [187]:
# create the new column "ward_name_city" from location column
df_zomato_geo['ward_name_city'] = df_zomato_geo['listed_in_city']

In [101]:
df_zomato_geo

Unnamed: 0,name,rate,votes,approx_cost_2ppl,location,listed_in_city,rest_type,book_table,online_order,cuisines,ward_name_location,ward_name_city
0,Jalsa,4.1,775,800,Banashankari,Banashankari,Casual Dining,Yes,Yes,"North Indian, Mughlai, Chinese",Banashankari,Banashankari
1,Spice Elephant,4.1,787,800,Banashankari,Banashankari,Casual Dining,No,Yes,"Chinese, North Indian, Thai",Banashankari,Banashankari
2,San Churro Cafe,3.8,918,800,Banashankari,Banashankari,"Cafe, Casual Dining",No,Yes,"Cafe, Mexican, Italian",Banashankari,Banashankari
3,Addhuri Udupi Bhojana,3.7,88,300,Banashankari,Banashankari,Quick Bites,No,No,"South Indian, North Indian",Banashankari,Banashankari
4,Grand Village,3.8,166,600,Basavanagudi,Banashankari,Casual Dining,No,No,"North Indian, Rajasthani",Basavanagudi,Banashankari
...,...,...,...,...,...,...,...,...,...,...,...,...
51143,Best Brews - Four Points by Sheraton Bengaluru...,3.6,27,1500,Whitefield,Whitefield,Bar,No,No,Continental,Whitefield,Whitefield
51144,Vinod Bar And Restaurant,3.7,0,600,Whitefield,Whitefield,Bar,No,No,Finger Food,Whitefield,Whitefield
51145,Plunge - Sheraton Grand Bengaluru Whitefield H...,3.7,0,2000,Whitefield,Whitefield,Bar,No,No,Finger Food,Whitefield,Whitefield
51146,Chime - Sheraton Grand Bengaluru Whitefield Ho...,4.3,236,2500,"ITPL Main Road, Whitefield",Whitefield,Bar,Yes,No,Finger Food,"ITPL Main Road, Whitefield",Whitefield


In [103]:
# display all unique values in the "location" column
# location = df_zomato_geo["location"].unique()

# Get unique names
dataset_locations = set(df_zomato_geo['location'].unique())
dataset_cities = set(df_zomato_geo['listed_in_city'].unique())

In [105]:
dataset_locations

{'BTM',
 'Banashankari',
 'Banaswadi',
 'Bannerghatta Road',
 'Basavanagudi',
 'Basaveshwara Nagar',
 'Bellandur',
 'Bommanahalli',
 'Brigade Road',
 'Brookefield',
 'CV Raman Nagar',
 'Central Bangalore',
 'Church Street',
 'City Market',
 'Commercial Street',
 'Cunningham Road',
 'Domlur',
 'East Bangalore',
 'Ejipura',
 'Electronic City',
 'Frazer Town',
 'HBR Layout',
 'HSR',
 'Hebbal',
 'Hennur',
 'Hosur Road',
 'ITPL Main Road, Whitefield',
 'Indiranagar',
 'Infantry Road',
 'JP Nagar',
 'Jakkur',
 'Jalahalli',
 'Jayanagar',
 'Jeevan Bhima Nagar',
 'KR Puram',
 'Kaggadasapura',
 'Kalyan Nagar',
 'Kammanahalli',
 'Kanakapura Road',
 'Kengeri',
 'Koramangala',
 'Koramangala 1st Block',
 'Koramangala 2nd Block',
 'Koramangala 3rd Block',
 'Koramangala 4th Block',
 'Koramangala 5th Block',
 'Koramangala 6th Block',
 'Koramangala 7th Block',
 'Koramangala 8th Block',
 'Kumaraswamy Layout',
 'Langford Town',
 'Lavelle Road',
 'MG Road',
 'Magadi Road',
 'Majestic',
 'Malleshwaram',
 'M

In [107]:
dataset_cities

{'BTM',
 'Banashankari',
 'Bannerghatta Road',
 'Basavanagudi',
 'Bellandur',
 'Brigade Road',
 'Brookefield',
 'Church Street',
 'Electronic City',
 'Frazer Town',
 'HSR',
 'Indiranagar',
 'JP Nagar',
 'Jayanagar',
 'Kalyan Nagar',
 'Kammanahalli',
 'Koramangala 4th Block',
 'Koramangala 5th Block',
 'Koramangala 6th Block',
 'Koramangala 7th Block',
 'Lavelle Road',
 'MG Road',
 'Malleshwaram',
 'Marathahalli',
 'New BEL Road',
 'Old Airport Road',
 'Rajajinagar',
 'Residency Road',
 'Sarjapur Road',
 'Whitefield'}

In [70]:
# Look at unique areas in the geoJSON

import json

# Load the GeoJSON file
with open(r'/Users/I589637/Documents/Zomato Analysis/02 - Data/Prepared Data/BBMP.geojson', encoding="utf-8") as file:
    data = json.load(file)

# Extract unique KGISWardName values
ward_names = set()
for feature in data.get("features", []):
    properties = feature.get("properties", {})
    ward_name = properties.get("KGISWardName")
    if ward_name:
        ward_names.add(ward_name)

# Display the unique ward names
ward_names

{'A Narayanapura',
 'AECS Layout',
 'Adugodi',
 'Agara',
 'Agaram',
 'Agrahara Dasarahalli',
 'Amrutahalli',
 'Anjanapura',
 'Arakere',
 'Aramane Nagara',
 'Ashoka Pillar',
 'Attiguppe',
 'Atturu Layout',
 'Avalahalli',
 'Azad Nagar',
 'BTM Layout',
 'Babusab Palya',
 'Bagalakunte',
 'Banasavadi',
 'Banashankari Temple ward',
 'Bande Mutt',
 'Bapuji Nagar',
 'Basavanagudi',
 'Basavanapura',
 'Basaveshwara Nagar',
 'Begur',
 'Belathur',
 'Bellanduru',
 'Bharathi Nagar',
 'Bilekhalli',
 'Binnipete',
 'Bommanahalli',
 'Byatarayanapura',
 'Byrasandra',
 'C V Raman Nagar',
 'Chalavadipalya',
 'Chamrajapet',
 'Chamundi Nagara',
 'Chanakya',
 'Chandra Layout',
 'Chatrapati Shivaji',
 'Chickpete',
 'Chokkasandra',
 'Chowdeswari Ward',
 'Chunchaghatta',
 'Cottonpete',
 'Dattatreya Temple',
 'Dayananda Nagar',
 'Deen Dayalu Ward',
 'Defence Colony',
 'Devara Jeevanahalli',
 'Devarachikkanahalli',
 'Devaraj Urs Nagar',
 'Devasandra',
 'Dharmaraya Swamy Temple Ward',
 'Dodda Bidarakallu',
 'Dodda 

# Matches between location in Data Set and ward name geoJSON

### Exact Matches:
1. 'Basavanagudi' - Exact match.
2. 'Basaveshwara Nagar' - Exact match.
3. 'Bommanahalli' - Exact match.
4. 'Domlur' - Exact match.
5. 'Ejipura' - Exact match.
6. 'Marathahalli' - Exact match.
7. 'Peenya' - Exact match.
8. 'Rajarajeshwari Nagar' - Exact match.
9. 'Ulsoor' - Exact match.
10. 'Uttarahalli' - Exact match.
11. 'Vasanth Nagar' - Exact match.
12. 'Whitefield' - Exact match.
13. 'Yelahanka' - Exact match.

### Potential Matches:
(with Slightly Different Spelling)
1. 'BTM' (Data Set 1) vs 'BTM Layout' (Data Set 2)
2. 'Banaswadi' (Data Set 1) vs 'Banasavadi' (Data Set 2)
3. 'Bellandur' (Data Set 1) vs 'Bellanduru' (Data Set 2)
4. 'CV Raman Nagar' (Data Set 1) vs 'C V Raman Nagar' (Data Set 2)
5. 'HSR' (Data Set 1) vs 'HSR - Singasandra' (Data Set 2)
6. 'JP Nagar' (Data Set 1) vs 'J P Nagar' (Data Set 2)
7. 'KR Puram' (Data Set 1) vs 'K R Puram' (Data Set 2)
8. 'Malleshwaram' (Data Set 1) vs 'Malleswaram' (Data Set 2)
9. 'Nagarbhavi' (Data Set 1) vs 'Nagarabhavi' (Data Set 2)
10. 'Rammurthy Nagar' (Data Set 1) vs 'Ramamurthy Nagara' (Data Set 2)
11. 'Sanjay Nagar' (Data Set 1) vs 'Sanjaya Nagar' (Data Set 2)
12. 'Varthur Main Road, Whitefield' (Data Set 1) vs 'Varthuru' (Data Set 2)
13. 'Vijay Nagar' (Data Set 1) vs 'Vijayanagar' (Data Set 2)

# Matches between city in Data Set and ward name geoJSON

### Exact Matches:
1. 'Basavanagudi' - Exact match.
2. 'Bellandur' - Exact match.
3. 'JP Nagar' - Exact match.
4. 'Kammanahalli' - Exact match.
5. 'Malleshwaram' (spelling varies in Dataset 4 as 'Malleswaram') - Exact match.
6. 'Marathahalli' - Exact match.
7. 'Rajajinagar' - Exact match.
8. 'Whitefield' - Exact match.

### Potential Matches with Slightly Different Spelling:
1. 'BTM' (Data Set) vs 'BTM Layout' (geoJSON)
2. 'HSR' (Data Set) vs 'HSR - Singasandra' (geoJSON)
3. 'JP Nagar' (Data Set) vs 'J P Nagar' (geoJSON)
4. 'Malleshwaram' (Data Set) vs 'Malleswaram' (geoJSON)



# Conclusion

#### I will match the potential matches from the geoJSON in both data set columns 'locations' and 'listed_in_city' by creating new columns in the data set.

In [124]:
df_zomato_geo

Unnamed: 0,name,rate,votes,approx_cost_2ppl,location,listed_in_city,rest_type,book_table,online_order,cuisines,ward_name_location,ward_name_city
0,Jalsa,4.1,775,800,Banashankari,Banashankari,Casual Dining,Yes,Yes,"North Indian, Mughlai, Chinese",Banashankari,Banashankari
1,Spice Elephant,4.1,787,800,Banashankari,Banashankari,Casual Dining,No,Yes,"Chinese, North Indian, Thai",Banashankari,Banashankari
2,San Churro Cafe,3.8,918,800,Banashankari,Banashankari,"Cafe, Casual Dining",No,Yes,"Cafe, Mexican, Italian",Banashankari,Banashankari
3,Addhuri Udupi Bhojana,3.7,88,300,Banashankari,Banashankari,Quick Bites,No,No,"South Indian, North Indian",Banashankari,Banashankari
4,Grand Village,3.8,166,600,Basavanagudi,Banashankari,Casual Dining,No,No,"North Indian, Rajasthani",Basavanagudi,Banashankari
...,...,...,...,...,...,...,...,...,...,...,...,...
51143,Best Brews - Four Points by Sheraton Bengaluru...,3.6,27,1500,Whitefield,Whitefield,Bar,No,No,Continental,Whitefield,Whitefield
51144,Vinod Bar And Restaurant,3.7,0,600,Whitefield,Whitefield,Bar,No,No,Finger Food,Whitefield,Whitefield
51145,Plunge - Sheraton Grand Bengaluru Whitefield H...,3.7,0,2000,Whitefield,Whitefield,Bar,No,No,Finger Food,Whitefield,Whitefield
51146,Chime - Sheraton Grand Bengaluru Whitefield Ho...,4.3,236,2500,"ITPL Main Road, Whitefield",Whitefield,Bar,Yes,No,Finger Food,"ITPL Main Road, Whitefield",Whitefield


In [129]:
# Define the mapping of location values to ward name values
ward_name_mapping = {
    'BTM': 'BTM Layout',
    'Banaswadi': 'Banasavadi',
    'Bellandur': 'Bellanduru',
    'CV Raman Nagar': 'C V Raman Nagar',
    'HSR': 'HSR - Singasandra',
    'JP Nagar': 'J P Nagar',
    'KR Puram': 'K R Puram',
    'Malleshwaram': 'Malleswaram',
    'Nagarbhavi': 'Nagarabhavi',
    'Rammurthy Nagar': 'Ramamurthy Nagara',
    'Sanjay Nagar': 'Sanjaya Nagar',
    'Varthur Main Road, Whitefield': 'Varthuru',
    'Vijay Nagar': 'Vijayanagar'
}

# Apply the mapping to the "ward_names_location" column
df_zomato_geo["ward_name_location"] = df_zomato_geo["ward_name_location"].replace(ward_name_mapping)


In [131]:
# Define the mapping of old values to new values
ward_name_city_mapping = {
    'BTM': 'BTM Layout',
    'HSR': 'HSR - Singasandra',
    'JP Nagar': 'J P Nagar',
    'Malleshwaram': 'Malleswaram'
}

# Apply the mapping to the "ward_name_city" column
df_zomato_geo["ward_name_city"] = df_zomato_geo["ward_name_city"].replace(ward_name_city_mapping)


In [133]:

# Get unique names
dataset_locations_upd = set(df_zomato_geo['ward_name_location'].unique())
dataset_cities_upd = set(df_zomato_geo['ward_name_city'].unique())

In [135]:
dataset_locations_upd

{'BTM Layout',
 'Banasavadi',
 'Banashankari',
 'Bannerghatta Road',
 'Basavanagudi',
 'Basaveshwara Nagar',
 'Bellanduru',
 'Bommanahalli',
 'Brigade Road',
 'Brookefield',
 'C V Raman Nagar',
 'Central Bangalore',
 'Church Street',
 'City Market',
 'Commercial Street',
 'Cunningham Road',
 'Domlur',
 'East Bangalore',
 'Ejipura',
 'Electronic City',
 'Frazer Town',
 'HBR Layout',
 'HSR - Singasandra',
 'Hebbal',
 'Hennur',
 'Hosur Road',
 'ITPL Main Road, Whitefield',
 'Indiranagar',
 'Infantry Road',
 'J P Nagar',
 'Jakkur',
 'Jalahalli',
 'Jayanagar',
 'Jeevan Bhima Nagar',
 'K R Puram',
 'Kaggadasapura',
 'Kalyan Nagar',
 'Kammanahalli',
 'Kanakapura Road',
 'Kengeri',
 'Koramangala',
 'Koramangala 1st Block',
 'Koramangala 2nd Block',
 'Koramangala 3rd Block',
 'Koramangala 4th Block',
 'Koramangala 5th Block',
 'Koramangala 6th Block',
 'Koramangala 7th Block',
 'Koramangala 8th Block',
 'Kumaraswamy Layout',
 'Langford Town',
 'Lavelle Road',
 'MG Road',
 'Magadi Road',
 'Majes

# Re-checking Matches between Data Set and geoJSON for 'ward_name_location'

### Exact Matches:
1. BTM Layout
2. Banasavadi
3. Basavanagudi
4. Basaveshwara Nagar
5. Bellanduru
6. Bommanahalli
7. C V Raman Nagar
8. Domlur
9. Ejipura
10. HSR - Singasandra
11. Hennur
12. J P Nagar
13. Kammanahalli
14. Kengeri
15. Koramangala
16. Kumaraswamy Layout
17. Malleswaram
18. Marathahalli
19. Nagarabhavi
20. Nagavara
21. Peenya
22. Rajarajeshwari Nagar
23. Ramamurthy Nagara
24. Sanjaya Nagar
25. Shanthi Nagar
26. Ulsoor
27. Uttarahalli
28. Varthuru
29. Vasanth Nagar
30. Vijayanagar
31. Whitefield

### Potential Matches (with slightly different spelling or additional descriptors):
1. Banashankari → Banashankari Temple ward
2. Hebbal → Hebbala
3. Indiranagar → Jeevanbhima Nagar 
4. Jeevan Bhima Nagar → Jeevanbhima Nagar
5. K R Puram → K R Puram
6. Rajajinagar → Rajaji Nagar
7. Richmond Road → Shanti Nagar
8. Sadashiv Nagar → Shivanagara
9. Shantinagar → Shanthi Nagar
10. Yelahanka → Yelahanka Satellite Town
11. Yesvantpur → Yeshwantpur
12. Bannerghatta Road → Bannergatta Road
13. Bellanduru → Bellandur
14. Koramangala 1st Block → Koramangala
15. Koramangala 2nd Block → Koramangala
16. Koramangala 3rd Block → Koramangala
17. Koramangala 4th Block → Koramangala
18. Koramangala 5th Block → Koramangala
19. Koramangala 6th Block → Koramangala
20. Koramangala 7th Block → Koramangala
21. Koramangala 8th Block → Koramangala

In [156]:
# Define the mapping of location values to ward name values
ward_name_mapping2 = {
    'Banashankari': 'Banashankari Temple ward',
    'Hebbal': 'Hebbala',
    'Jeevan Bhima Nagar': 'Jeevanbhima Nagar',
    'K R Puram': 'K R Puram',
    'Rajajinagar': 'Rajaji Nagar',
    'Shantinagar': 'Shanti Nagar',
    'Yelahanka': 'Yelahanka Satellite Town',
    'Bannerghatta Road': 'Bannergatta Road',
    'Bellanduru': 'Bellandur',
    'Koramangala 1st Block': 'Koramangala',
    'Koramangala 2nd Block': 'Koramangala',
    'Koramangala 3rd Block': 'Koramangala',
    'Koramangala 4th Block': 'Koramangala',
    'Koramangala 5th Block': 'Koramangala',
    'Koramangala 6th Block': 'Koramangala',
    'Koramangala 7th Block': 'Koramangala',
    'Koramangala 8th Block': 'Koramangala',
    'Yesvantpur': 'Yeshwantpur',
    'Jakkur': 'Jakkuru',
}

# Apply the mapping to the "ward_names_location" column
df_zomato_geo["ward_name_location"] = df_zomato_geo["ward_name_location"].replace(ward_name_mapping2)


In [138]:
dataset_cities_upd

{'BTM Layout',
 'Banashankari',
 'Bannerghatta Road',
 'Basavanagudi',
 'Bellandur',
 'Brigade Road',
 'Brookefield',
 'Church Street',
 'Electronic City',
 'Frazer Town',
 'HSR - Singasandra',
 'Indiranagar',
 'J P Nagar',
 'Jayanagar',
 'Kalyan Nagar',
 'Kammanahalli',
 'Koramangala 4th Block',
 'Koramangala 5th Block',
 'Koramangala 6th Block',
 'Koramangala 7th Block',
 'Lavelle Road',
 'MG Road',
 'Malleswaram',
 'Marathahalli',
 'New BEL Road',
 'Old Airport Road',
 'Rajajinagar',
 'Residency Road',
 'Sarjapur Road',
 'Whitefield'}

# Re-checking Matches between Data Set and geoJSON for 'ward_name_city'¶

### Exact Matches:
1. BTM Layout
2. Basavanagudi
3. HSR - Singasandra
4. J P Nagar
5. Kammanahalli
6. Malleswaram
7. Marathahalli
8. Whitefield

### Potential Matches (with slightly different spelling or additional descriptors):
1. Bellandur → Bellanduru
2. Banashankari → Banashankari Temple ward
4. Rajajinagar → Rajaji Nagar
2. Koramangala 4th Block → Koramangala
3. Koramangala 5th Block → Koramangala
4. Koramangala 6th Block → Koramangala
5. Koramangala 7th Block → Koramangala

In [147]:
# Define the mapping of city values to ward name values
ward_name_mapping3 = {
    'Banashankari': 'Banashankari Temple ward',
    'Bellanduru': 'Bellandur',
    'Rajajinagar': 'Rajaji Nagar',
    'Koramangala 4th Block': 'Koramangala',
    'Koramangala 5th Block': 'Koramangala',
    'Koramangala 6th Block': 'Koramangala',
    'Koramangala 7th Block': 'Koramangala',
}

# Apply the mapping to the "ward_names_location" column
df_zomato_geo["ward_name_city"] = df_zomato_geo["ward_name_city"].replace(ward_name_mapping3)


In [152]:

# Get unique names
df_locations = set(df_zomato_geo['ward_name_location'].unique())
df_cities = set(df_zomato_geo['ward_name_city'].unique())

In [154]:
df_locations

{'BTM Layout',
 'Banasavadi',
 'Banashankari Temple ward',
 'Bannergatta Road',
 'Basavanagudi',
 'Basaveshwara Nagar',
 'Bellandur',
 'Bommanahalli',
 'Brigade Road',
 'Brookefield',
 'C V Raman Nagar',
 'Central Bangalore',
 'Church Street',
 'City Market',
 'Commercial Street',
 'Cunningham Road',
 'Domlur',
 'East Bangalore',
 'Ejipura',
 'Electronic City',
 'Frazer Town',
 'HBR Layout',
 'HSR - Singasandra',
 'Hebbala',
 'Hennur',
 'Hosur Road',
 'ITPL Main Road, Whitefield',
 'Indiranagar',
 'Infantry Road',
 'J P Nagar',
 'Jakkur',
 'Jalahalli',
 'Jayanagar',
 'Jeevanbhima Nagar',
 'K R Puram',
 'Kaggadasapura',
 'Kalyan Nagar',
 'Kammanahalli',
 'Kanakapura Road',
 'Kengeri',
 'Koramangala',
 'Kumaraswamy Layout',
 'Langford Town',
 'Lavelle Road',
 'MG Road',
 'Magadi Road',
 'Majestic',
 'Malleswaram',
 'Marathahalli',
 'Mysore Road',
 'Nagarabhavi',
 'Nagawara',
 'New BEL Road',
 'North Bangalore',
 'Old Airport Road',
 'Old Madras Road',
 'Peenya',
 'RT Nagar',
 'Race Cours

In [158]:
df_cities

{'BTM Layout',
 'Banashankari Temple ward',
 'Bannerghatta Road',
 'Basavanagudi',
 'Bellandur',
 'Brigade Road',
 'Brookefield',
 'Church Street',
 'Electronic City',
 'Frazer Town',
 'HSR - Singasandra',
 'Indiranagar',
 'J P Nagar',
 'Jayanagar',
 'Kalyan Nagar',
 'Kammanahalli',
 'Koramangala',
 'Lavelle Road',
 'MG Road',
 'Malleswaram',
 'Marathahalli',
 'New BEL Road',
 'Old Airport Road',
 'Rajaji Nagar',
 'Residency Road',
 'Sarjapur Road',
 'Whitefield'}

In [160]:

# Define the mapping of city values to ward name values
ward_name_mapping5 = {
    'Bannerghatta Road': 'Bannergatta Road',
    'Hebbala': 'Hebbala',
    'Ulsoor': 'Ulsoor',
}

# Apply the mapping to the "ward_names_location" column
df_zomato_geo["ward_name_city"] = df_zomato_geo["ward_name_city"].replace(ward_name_mapping5)


# 4. Plotting a Choropleth

In [165]:
# Create a data frame with just the area_names and the values for rating we want plotted

data_to_plot_city = df_zomato_geo[['ward_name_city','rate']]
data_to_plot_city.head()

Unnamed: 0,ward_name_city,rate
0,Banashankari Temple ward,4.1
1,Banashankari Temple ward,4.1
2,Banashankari Temple ward,3.8
3,Banashankari Temple ward,3.7
4,Banashankari Temple ward,3.8


In [179]:
import folium

# Setup a folium map at a high-level zoom
map = folium.Map(location=[12.9716, 77.5946], zoom_start=11)  # Bengaluru coordinates

# Choropleth maps bind Pandas DataFrames and GeoJSON geometries
folium.Choropleth(
    geo_data=bengaluru_geo, 
    data=data_to_plot_city,
    columns=["ward_name_city", "rate"],
    key_on="feature.properties.KGISWardName",
    fill_color="YlOrBr", 
    fill_opacity=0.6, 
    line_opacity=0.1,
    legend_name="Average Rating"
).add_to(map)

folium.LayerControl().add_to(map)

map

In [169]:
# Create a data frame with just the area_names and the values for rating we want plotted

data_to_plot_location = df_zomato_geo[['ward_name_location','rate']]
data_to_plot_location.head()

Unnamed: 0,ward_name_location,rate
0,Banashankari Temple ward,4.1
1,Banashankari Temple ward,4.1
2,Banashankari Temple ward,3.8
3,Banashankari Temple ward,3.7
4,Basavanagudi,3.8


In [171]:
import folium

# Setup a folium map at a high-level zoom
map = folium.Map(location=[12.9716, 77.5946], zoom_start=11)  # Bengaluru coordinates

# Choropleth maps bind Pandas DataFrames and GeoJSON geometries
folium.Choropleth(
    geo_data=bengaluru_geo, 
    data=data_to_plot_location,
    columns=["ward_name_location", "rate"],
    key_on="feature.properties.KGISWardName",
    fill_color="YlOrBr", 
    fill_opacity=0.6, 
    line_opacity=0.1,
    legend_name="Average Rating"
).add_to(map)

folium.LayerControl().add_to(map)

map

# Conclusion

Testing a different geoJSON did not yield better results

In [181]:
# Create a data frame with just the area_names and the values for cost we want plotted

data_to_plot_cost = df_zomato_geo[['ward_name_location','approx_cost_2ppl']]
data_to_plot_cost.head()

Unnamed: 0,ward_name_location,approx_cost_2ppl
0,Banashankari Temple ward,800
1,Banashankari Temple ward,800
2,Banashankari Temple ward,800
3,Banashankari Temple ward,300
4,Basavanagudi,600


In [183]:
import folium

# Setup a folium map at a high-level zoom
map = folium.Map(location=[12.9716, 77.5946], zoom_start=11)  # Bengaluru coordinates

# Choropleth maps bind Pandas DataFrames and GeoJSON geometries
folium.Choropleth(
    geo_data=bengaluru_geo, 
    data=data_to_plot_cost,
    columns=["ward_name_location", "approx_cost_2ppl"],
    key_on="feature.properties.KGISWardName",
    fill_color="YlOrBr", 
    fill_opacity=0.6, 
    line_opacity=0.1,
    legend_name="Approx. Cost for 2 People"
).add_to(map)

folium.LayerControl().add_to(map)

map