In [1]:
import pandas as pd
import numpy as np
import scipy 
import matplotlib.pyplot as plt
import seaborn as sns 
import warnings 
import os
import googlemaps
from datetime import datetime

# Setting the working directory
os.chdir(r'../')

# Pandas option:
pd.set_option('max_colwidth',40)  # This allows us to see the data upto 40 characters per each column

pd.options.display.max_columns = None   # This allows us to see all the columns without collapsing 


# The below code is for for showing all expressions, not just the result of the last expression
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

# Table styles, this below code is for decorating the dataframe no functional use.
table_styles = {
        'cerulean_palette' : [
        dict(selector="th", props=[("color", "#FFFFFF"), ("background", "#004D80"), ("text-transform", "capitalize")]),
        dict(selector="td", props=[("color", "#333333")]),
        dict(selector="table", props=[("font-family", 'Arial'), ("border-collapse", "collapse")]),
        dict(selector='tr:nth-child(even)', props=[('background', '#D3EEFF')]),
        dict(selector='tr:nth-child(odd)', props=[('background', '#FFFFFF')]),
        dict(selector="th", props=[("border", "1px solid #0070BA")]),
        dict(selector="td", props=[("border", "1px solid #0070BA")]),
        dict(selector="tr:hover", props=[("background", "#80D0FF")])
    ]
}

# Filtering all the warnings:
warnings.filterwarnings('ignore')

In [30]:
from pyproj import Geod
from shapely.geometry import Point, LineString, Polygon

In [2]:
with open(r'../my api token/googlemaps.txt', 'r') as f:
    api_token = f.read()
    f.close()
gmaps = googlemaps.Client(key=api_token)

In [27]:
df = pd.read_csv(r'../data/zomato.csv', na_values=['-','',' ','NEW','[]'])
drop_df = df.dropna(subset=['location'])
locations = pd.unique(drop_df['location']).tolist()
location_ids = range(1, len(locations)+1)
location_ids = [str(i).zfill(3) for i in location_ids]
location_id_map_dict = {
    str(i): str(j) for i,j in zip(locations, location_ids)
}
# df['location_id'] = df['location'].apply(lambda x: location_id_map_dict[str(x)])

In [28]:
print(df['location'].nunique(), drop_df['location'].nunique())

93 93


In [25]:
location_df = pd.DataFrame({'location': locations, 'location_id': location_ids})

In [26]:
location_df

Unnamed: 0,location,location_id
0,Banashankari,001
1,Basavanagudi,002
2,Mysore Road,003
3,Jayanagar,004
4,Kumaraswamy Layout,005
...,...,...
88,West Bangalore,089
89,Magadi Road,090
90,Yelahanka,091
91,Sahakara Nagar,092


In [43]:
location_df['latitude'] = np.nan
location_df['longitude'] = np.nan
location_df['northeast_lat'] = np.nan
location_df['northeast_lng'] = np.nan
location_df['southwest_lat'] = np.nan
location_df['northeast_lng'] = np.nan
location_df

Unnamed: 0,location,location_id,latitude,longitude,northeast_lat,northeast_lng,southwest_lat
0,Banashankari,001,,,,,
1,Basavanagudi,002,,,,,
2,Mysore Road,003,,,,,
3,Jayanagar,004,,,,,
4,Kumaraswamy Layout,005,,,,,
...,...,...,...,...,...,...,...
88,West Bangalore,089,,,,,
89,Magadi Road,090,,,,,
90,Yelahanka,091,,,,,
91,Sahakara Nagar,092,,,,,


In [101]:
def get_geoinfo(gmaps, address):
    try:
        if address.split(',')[-1].strip() == 'India':
            address = address
        else:
            address = address + ', Bangalore, India'
        # print(address)
        geocode_result = gmaps.geocode(address)
        try:
            lat = geocode_result[0]['geometry']['location']['lat']
            lng = geocode_result[0]['geometry']['location']['lng']
            bounds = geocode_result[0]['geometry']['bounds']
            northeast = bounds['northeast']
            northeast_lat = northeast['lat']
            northeast_lng = northeast['lng']
            southwest = bounds['southwest']
            southwest_lat = southwest['lat']
            southwest_lng = southwest['lng']
        except KeyError:
            lat = geocode_result[0]['geometry']['location']['lat']
            lng = geocode_result[0]['geometry']['location']['lng']
            viewport = geocode_result[0]['geometry']['viewport']
            northeast_lat = viewport['northeast']['lat']
            northeast_lng = viewport['northeast']['lng']
            southwest_lat = viewport['southwest']['lat']
            southwest_lng = viewport['southwest']['lng']
        return lat, lng, southwest_lat, southwest_lng, northeast_lat, northeast_lng
    except Exception as e:
        print(f'Error: {e}')
        return np.nan, np.nan, np.nan, np.nan, np.nan, np.nan

In [102]:
location_df['latitude'], location_df['longitude'], location_df['southwest_lat'], location_df['southwest_lng'], location_df['northeast_lat'], location_df['northeast_lng'] = zip(*location_df['location'].apply(lambda x: get_geoinfo(gmaps, x)))

In [103]:
def compute_area(northeast_lat, northeast_lng, southwest_lat, southwest_lng):
    geod = Geod(ellps='WGS84')
    polygon = Polygon([
        (southwest_lng, southwest_lat),  # 西南角
        (southwest_lng, northeast_lat),  # 西北角
        (northeast_lng, northeast_lat),  # 东北角
        (northeast_lng, southwest_lat),   # 东南角
        (southwest_lng, southwest_lat)   # 闭合环，重复第一个点
    ])
    poly_area, _ = geod.geometry_area_perimeter(polygon)
    return abs(poly_area)

In [104]:
location_df['area'] = np.nan

In [105]:
for i in range(location_df.shape[0]):
    try:
        area = compute_area(location_df['northeast_lat'][i], location_df['northeast_lng'][i], location_df['southwest_lat'][i], location_df['southwest_lng'][i])
        location_df.loc[i,'area'] = area
    except Exception as e:
        print(f'Error: {i}')

In [107]:
location_df['area_proportion'] = location_df['area']/location_df['area'].sum()

In [110]:
location_df

Unnamed: 0,location,location_id,latitude,longitude,northeast_lat,northeast_lng,southwest_lat,southwest_lng,area,area_proportion
0,Banashankari,001,12.925453,77.546757,12.956544,77.574414,12.912888,77.531272,2.260970e+07,0.002952
1,Basavanagudi,002,12.940600,77.573763,12.957252,77.580142,12.928614,77.561375,6.451724e+06,0.000842
2,Mysore Road,003,12.901548,77.468285,12.966943,77.587932,12.792788,77.379910,4.350043e+08,0.056791
3,Jayanagar,004,12.930811,77.583858,12.946204,77.600701,12.912221,77.573609,1.105272e+07,0.001443
4,Kumaraswamy Layout,005,12.904438,77.564928,12.912197,77.573425,12.893798,77.552433,4.637250e+06,0.000605
...,...,...,...,...,...,...,...,...,...,...
88,West Bangalore,089,12.971599,77.594563,13.173706,77.882681,12.734289,77.379198,2.655736e+09,0.346712
89,Magadi Road,090,12.986927,77.476277,12.991974,77.564558,12.964547,77.378785,6.115682e+07,0.007984
90,Yelahanka,091,13.115466,77.606998,13.141923,77.617303,13.094701,77.580825,2.066397e+07,0.002698
91,Sahakara Nagar,092,13.062342,77.587103,13.077169,77.600632,13.055129,77.573844,7.083918e+06,0.000925


In [137]:
population_df = pd.read_csv(r'../data/bangalore_population.csv')
population_df['Year'] = population_df['Year'].astype(int)
population_df = population_df[(population_df['Year']>=2011) & (population_df['Year']<=2022)]
population_df.reset_index(drop=True, inplace=True)
population_df

Unnamed: 0,Year,Population,Growth Rate
0,2022,13193000,3.35%
1,2021,12765000,3.55%
2,2020,12327000,3.74%
3,2019,11883000,3.87%
4,2018,11440000,4.09%
5,2017,10990000,4.10%
6,2016,10557000,4.10%
7,2015,10141000,4.10%
8,2014,9742000,4.10%
9,2013,9358000,4.09%


In [146]:
visitors_df = pd.read_csv(r'../data/bangalore_visitors.csv')
visitors_df['Year'] = visitors_df['Year'].astype(int)
visitors_df

Unnamed: 0,Year,Local Visitors,Foreign Visitors
0,2011,84107390,574005
1,2012,94052729,595359
2,2013,98010140,636378
3,2014,118283220,561970
4,2015,119863942,636502
5,2016,129762600,461752
6,2017,179980191,498148
7,2018,214306456,543716
8,2019,227934714,608754
9,2020,77453339,165325


In [152]:
# local_tourists = 38014015/365
local_tourists = visitors_df['Local Visitors'].mean()/365   # daily average local tourists
foreign_tourists = visitors_df['Foreign Visitors'].mean()/365   # daily average foreign tourists
local_residents = population_df['Population'].mean()*0.066  # 6.6% of the population choose to eat out
area_population = local_residents + foreign_tourists + local_tourists

location_df['area_population'] = area_population
location_df['area_customer_flow'] = location_df['area_proportion']*location_df['area_population']

In [153]:
location_df[['area_population', 'area_customer_flow']] = location_df[['area_population', 'area_customer_flow']].applymap(lambda x: round(x, 4))

In [154]:
location_df

Unnamed: 0,location,location_id,latitude,longitude,northeast_lat,northeast_lng,southwest_lat,southwest_lng,area,area_proportion,area_population,area_customer_flow
0,Banashankari,001,12.925453,77.546757,12.956544,77.574414,12.912888,77.531272,2.260970e+07,0.002952,1.083382e+06,3197.8646
1,Basavanagudi,002,12.940600,77.573763,12.957252,77.580142,12.928614,77.561375,6.451724e+06,0.000842,1.083382e+06,912.5172
2,Mysore Road,003,12.901548,77.468285,12.966943,77.587932,12.792788,77.379910,4.350043e+08,0.056791,1.083382e+06,61526.0160
3,Jayanagar,004,12.930811,77.583858,12.946204,77.600701,12.912221,77.573609,1.105272e+07,0.001443,1.083382e+06,1563.2719
4,Kumaraswamy Layout,005,12.904438,77.564928,12.912197,77.573425,12.893798,77.552433,4.637250e+06,0.000605,1.083382e+06,655.8821
...,...,...,...,...,...,...,...,...,...,...,...,...
88,West Bangalore,089,12.971599,77.594563,13.173706,77.882681,12.734289,77.379198,2.655736e+09,0.346712,1.083382e+06,375621.2581
89,Magadi Road,090,12.986927,77.476277,12.991974,77.564558,12.964547,77.378785,6.115682e+07,0.007984,1.083382e+06,8649.8822
90,Yelahanka,091,13.115466,77.606998,13.141923,77.617303,13.094701,77.580825,2.066397e+07,0.002698,1.083382e+06,2922.6654
91,Sahakara Nagar,092,13.062342,77.587103,13.077169,77.600632,13.055129,77.573844,7.083918e+06,0.000925,1.083382e+06,1001.9332


In [155]:
location_df.loc[0, 'area_population']

np.float64(1083382.2959)

In [135]:
location_df.to_csv(r'../data/location_info.csv', index=False)