In [1]:
import numpy as np
import pandas as pd

import requests
import json
from geopy.geocoders import Nominatim
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans
import folium

In [2]:
#reading the data into a pandas dataframe
df=pd.read_csv('all_india_PO_list_without_APS_offices_ver2_lat_long.csv')
df.head()

Unnamed: 0,officename,pincode,officeType,Deliverystatus,divisionname,regionname,circlename,Taluk,Districtname,statename,Telephone,Related Suboffice,Related Headoffice,longitude,latitude
0,Achalapur B.O,504273,B.O,Delivery,Adilabad,Hyderabad,Andhra Pradesh,Asifabad,Adilabad,TELANGANA,,Rechini S.O,Mancherial H.O,,
1,Ada B.O,504293,B.O,Delivery,Adilabad,Hyderabad,Andhra Pradesh,Asifabad,Adilabad,TELANGANA,,Asifabad S.O,Mancherial H.O,,
2,Adegaon B.O,504307,B.O,Delivery,Adilabad,Hyderabad,Andhra Pradesh,Boath,Adilabad,TELANGANA,,Echoda S.O,Adilabad H.O,,
3,Adilabad Collectorate S.O,504001,S.O,Non-Delivery,Adilabad,Hyderabad,Andhra Pradesh,Adilabad,Adilabad,TELANGANA,08732-226703,,Adilabad H.O,,
4,Adilabad H.O,504001,H.O,Delivery,Adilabad,Hyderabad,Andhra Pradesh,Adilabad,Adilabad,TELANGANA,08732-226738,,,,


In [3]:
#cleaning the data
df.rename(columns={'officename':'Neighborhood'}, inplace=True)
df=df[['Neighborhood','pincode','regionname','longitude','latitude']]
df.head()

Unnamed: 0,Neighborhood,pincode,regionname,longitude,latitude
0,Achalapur B.O,504273,Hyderabad,,
1,Ada B.O,504293,Hyderabad,,
2,Adegaon B.O,504307,Hyderabad,,
3,Adilabad Collectorate S.O,504001,Hyderabad,,
4,Adilabad H.O,504001,Hyderabad,,


In [4]:
#Neighborhood names with the same Pincode will be combined as a single row.
#Firstly join neighborhoods based on pincode
temp_df=df.groupby('pincode')['Neighborhood'].apply(lambda x: "%s" % ', '.join(x))
temp_df=temp_df.reset_index(drop=False)
temp_df.rename(columns={'Neighborhood':'Neighborhood_joined'},inplace=True)

#Then merge the original dataframe along with joined neighborhoods
df_merge = pd.merge(df, temp_df, on='pincode')

#Drop the extra neighborhood rows
df_merge.drop(['Neighborhood'],axis=1,inplace=True)
#Drop duplicates
df_merge.drop_duplicates(inplace=True)

df_merge.rename(columns={'Neighborhood_joined':'Neighborhood'},inplace=True)

df_merge.head()

Unnamed: 0,pincode,regionname,longitude,latitude,Neighborhood
0,504273,Hyderabad,,,"Achalapur B.O, Ainam B.O, Bhimini B.O, Bibra B..."
14,504293,Hyderabad,,,"Ada B.O, Asifabad S.O, Babapur B.O, Burguda B...."
27,504307,Hyderabad,,,"Adegaon B.O, Boregaon B.O, Echoda S.O, Girjam ..."
44,504001,Hyderabad,,,"Adilabad Collectorate S.O, Adilabad H.O, Bhagy..."
51,504251,Hyderabad,,,"Agm Colony S.O, Aknepalli B.O, Bellampalli R.S..."


In [5]:
df_merge=df_merge[['pincode','Neighborhood','regionname','latitude','longitude']]
df_merge.reset_index(inplace=True, drop=True)
df_merge.head()

Unnamed: 0,pincode,Neighborhood,regionname,latitude,longitude
0,504273,"Achalapur B.O, Ainam B.O, Bhimini B.O, Bibra B...",Hyderabad,,
1,504293,"Ada B.O, Asifabad S.O, Babapur B.O, Burguda B....",Hyderabad,,
2,504307,"Adegaon B.O, Boregaon B.O, Echoda S.O, Girjam ...",Hyderabad,,
3,504001,"Adilabad Collectorate S.O, Adilabad H.O, Bhagy...",Hyderabad,,
4,504251,"Agm Colony S.O, Aknepalli B.O, Bellampalli R.S...",Hyderabad,,


In [6]:
#Mumbai dataset
df_mumbai=df_merge[df_merge['regionname']=='Mumbai']
df_mumbai.reset_index(inplace=True, drop=True)
df_mumbai

Unnamed: 0,pincode,Neighborhood,regionname,latitude,longitude
0,400037,"Antop Hill S.O, B P T Colony S.O, C G S Colony...",Mumbai,,
1,400003,"B.P.Lane S.O, Mandvi S.O (Mumbai), Masjid S.O,...",Mumbai,,
2,400012,"BEST STaff Quarters S.O, Chamarbaug S.O, Haffk...",Mumbai,,
3,400009,"Chinchbunder H.O, Noor Baug S.O, Princess Dock...",Mumbai,,
4,400033,"Cotton Exchange S.O, Kalachowki S.O, L B S N E...",Mumbai,,
...,...,...,...,...,...
235,401404,Palghar H.O,Mumbai,,
236,401504,"Parnali B.O, Salwad B.O, Tarapur App S.O",Mumbai,,
237,401609,Suryanagar S.O (Thane),Mumbai,,
238,401506,Tarapur Ti S.O,Mumbai,,


In [7]:
#Delhi dataset
df_delhi=df_merge[df_merge['regionname']=='Delhi']
df_delhi.reset_index(inplace=True, drop=True)
df_delhi

Unnamed: 0,pincode,Neighborhood,regionname,latitude,longitude
0,110092,"Anand Vihar S.O, IP Extension S.O, Laxmi Nagar...",Delhi,,
1,110051,"Azad Nagar S.O (East Delhi), Govind Pura S.O, ...",Delhi,,
2,110032,"Babarpur S.O (North East Delhi), Balbir Nagar ...",Delhi,,
3,110090,"Badarpur Khadar B.O, Karawal Nagar S.O, Shahee...",Delhi,,
4,110053,"Bhajan Pura S.O, Brahampuri S.O, Garhi Mandu B...",Delhi,,
...,...,...,...,...,...
90,110027,"Janta Market S.O, Rajouri Garden J-6 S.O, Rajo...",Delhi,,
91,110063,"Jawala Heri B.O, Madipur Slum Quarter S.O, Mad...",Delhi,,
92,110087,"Jwala Puri S.O, Sunder Vihar S.O",Delhi,,
93,110028,"Kair B.O, Khaira B.O, Naraina Industrial Estat...",Delhi,,


In [8]:
#finding the latitude and longitude for each neighborhood in mumbai
for index,row in df_mumbai.iterrows():
    address=row['pincode']
    print(address)
    geolocator = Nominatim(user_agent="ny_explorer")
    location = geolocator.geocode(address)
    if(location==None):
        continue
    df_mumbai.at[index,'latitude']=location.latitude
    df_mumbai.at[index,'longitude']=location.longitude
df_mumbai.head()

400037
400003
400012
400009
400033
400014
400010
400031
400015
400027
400029
400099
400069
400053
400058
400051
400050
400093
400052
400057
400056
400059
400049
400061
400054
400055
400096
400098
400094
400085
400084
400075
400082
400042
400078
400074
400071
400089
400022
400043
400086
400088
400083
400070
400019
400081
400080
400024
400087
400076
400079
400077
400072
400065
400104
400066
400091
400092
400067
400068
400063
400095
400060
400102
400101
400064
400097
400103
400004
400005
400001
400020
400032
400002
400021
400011
400007
400028
400030
400026
400013
400017
400008
400034
400016
400006
400025
400035
400018
410206
400708
410221
410201
410207
400614
400702
410101
400706
410208
410203
400701
410216
400707
400703
410218
410202
410210
410204
400709
410102
410220
400710
410222
400704
402305
402303
402403
402103
402401
402203
402126
402204
402201
402307
402107
402101
402301
402106
402113
402109
410205
402208
402114
402104
402306
402120
402404
402302
402308
402111
402110
402202
402122

Unnamed: 0,pincode,Neighborhood,regionname,latitude,longitude
0,400037,"Antop Hill S.O, B P T Colony S.O, C G S Colony...",Mumbai,19.023074,72.867622
1,400003,"B.P.Lane S.O, Mandvi S.O (Mumbai), Masjid S.O,...",Mumbai,18.951606,72.834797
2,400012,"BEST STaff Quarters S.O, Chamarbaug S.O, Haffk...",Mumbai,18.996311,72.842493
3,400009,"Chinchbunder H.O, Noor Baug S.O, Princess Dock...",Mumbai,18.958095,72.83659
4,400033,"Cotton Exchange S.O, Kalachowki S.O, L B S N E...",Mumbai,18.98178,72.840388


In [9]:
#neighborhoods for which geopy package could not find latitude and longitude
df_mumbai[df_mumbai['latitude'].isnull()]

Unnamed: 0,pincode,Neighborhood,regionname,latitude,longitude
101,410216,Jagdish Nagar S.O,Mumbai,,
107,410204,Khopoli Power House S.O,Mumbai,,
131,402208,"Awas B.O, Chondhi B.O, Dhokawade B.O, Kihim B....",Mumbai,,
132,402114,"Bagmandla S.O, Kolmandla B.O",Mumbai,,
136,402404,"Bharadkhol B.O, Dandguri B.O, Diveagar S.O, Na...",Mumbai,,
142,402122,"Chandore B.O, Govele B.O, Manjaravane B.O, Sai...",Mumbai,,
144,415213,Chimbhave S.O,Mumbai,,
150,402115,"Jui Budruk B.O, Narvan B.O, Ravdhal B.O, Upper...",Mumbai,,
154,402125,PCTS Nagothane S.O,Mumbai,,
156,402207,Thal S.O (Raigarh(MH)),Mumbai,,


In [10]:
#removing the rows which contain nan values
df_mumbai=df_mumbai.dropna()
df_mumbai

Unnamed: 0,pincode,Neighborhood,regionname,latitude,longitude
0,400037,"Antop Hill S.O, B P T Colony S.O, C G S Colony...",Mumbai,19.023074,72.867622
1,400003,"B.P.Lane S.O, Mandvi S.O (Mumbai), Masjid S.O,...",Mumbai,18.951606,72.834797
2,400012,"BEST STaff Quarters S.O, Chamarbaug S.O, Haffk...",Mumbai,18.996311,72.842493
3,400009,"Chinchbunder H.O, Noor Baug S.O, Princess Dock...",Mumbai,18.958095,72.836590
4,400033,"Cotton Exchange S.O, Kalachowki S.O, L B S N E...",Mumbai,18.981780,72.840388
...,...,...,...,...,...
235,401404,Palghar H.O,Mumbai,19.636472,72.961391
236,401504,"Parnali B.O, Salwad B.O, Tarapur App S.O",Mumbai,19.803563,72.723248
237,401609,Suryanagar S.O (Thane),Mumbai,43.749472,-79.839834
238,401506,Tarapur Ti S.O,Mumbai,19.781606,72.788900


In [11]:
#finding the latitude and longitude for each neighborhood in delhi
for index,row in df_delhi.iterrows():
    address=row['pincode']
    print(address)
    geolocator = Nominatim(user_agent="ny_explorer")
    location = geolocator.geocode(address)
    if(location==None):
        continue
    df_delhi.at[index,'latitude']=location.latitude
    df_delhi.at[index,'longitude']=location.longitude
df_delhi.head()

110092
110051
110032
110090
110053
110091
110094
110095
110031
110096
110093
110006
110033
110036
110034
110054
110052
110039
110085
110042
110040
110086
110084
110007
110081
110009
110035
110088
110082
110083
110056
110089
110002
110055
110005
110001
110008
110003
110012
110011
110060
110004
110069
110025
110062
110019
110076
110024
110049
110044
110020
110013
110017
110065
110048
110014
110080
110010
110038
110021
110029
110047
110061
110074
110030
110016
110037
110068
110067
110097
110023
110070
110066
110022
110057
110075
110018
110026
110071
110077
110041
110043
110072
110059
110058
110045
110015
110073
110078
110064
110027
110063
110087
110028
110046


Unnamed: 0,pincode,Neighborhood,regionname,latitude,longitude
0,110092,"Anand Vihar S.O, IP Extension S.O, Laxmi Nagar...",Delhi,28.553967,77.414816
1,110051,"Azad Nagar S.O (East Delhi), Govind Pura S.O, ...",Delhi,28.636566,77.296432
2,110032,"Babarpur S.O (North East Delhi), Balbir Nagar ...",Delhi,28.676474,77.285675
3,110090,"Badarpur Khadar B.O, Karawal Nagar S.O, Shahee...",Delhi,-8.01663,-34.926281
4,110053,"Bhajan Pura S.O, Brahampuri S.O, Garhi Mandu B...",Delhi,28.693288,77.270634


In [12]:
#something seems wrong about the data at index 3
address='Badarpur, Delhi'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
df_delhi.at[3,'latitude']=location.latitude
df_delhi.at[3,'longitude']=location.longitude
df_delhi

Unnamed: 0,pincode,Neighborhood,regionname,latitude,longitude
0,110092,"Anand Vihar S.O, IP Extension S.O, Laxmi Nagar...",Delhi,28.553967,77.414816
1,110051,"Azad Nagar S.O (East Delhi), Govind Pura S.O, ...",Delhi,28.636566,77.296432
2,110032,"Babarpur S.O (North East Delhi), Balbir Nagar ...",Delhi,28.676474,77.285675
3,110090,"Badarpur Khadar B.O, Karawal Nagar S.O, Shahee...",Delhi,28.493170,77.303024
4,110053,"Bhajan Pura S.O, Brahampuri S.O, Garhi Mandu B...",Delhi,28.693288,77.270634
...,...,...,...,...,...
90,110027,"Janta Market S.O, Rajouri Garden J-6 S.O, Rajo...",Delhi,28.644449,77.116296
91,110063,"Jawala Heri B.O, Madipur Slum Quarter S.O, Mad...",Delhi,28.671194,77.106597
92,110087,"Jwala Puri S.O, Sunder Vihar S.O",Delhi,28.663571,77.089019
93,110028,"Kair B.O, Khaira B.O, Naraina Industrial Estat...",Delhi,28.629716,77.142217


In [13]:
#Define Foursquare Credentials and Version
CLIENT_ID = '23CKFIQ2Y0FFIGMLDUMPJIEQGWSZ2L55MN4IIXQHPMLQPUCA' # your Foursquare ID
CLIENT_SECRET = 'KYWR3BLG5KEZA313Y45QTVN4SDZG1BIGIMIOZKD1J2D3I02S' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 23CKFIQ2Y0FFIGMLDUMPJIEQGWSZ2L55MN4IIXQHPMLQPUCA
CLIENT_SECRET:KYWR3BLG5KEZA313Y45QTVN4SDZG1BIGIMIOZKD1J2D3I02S


In [14]:
LIMIT=100
radius=500

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [15]:
mumbai_venues = getNearbyVenues(names=df_mumbai['Neighborhood'],
                                   latitudes=df_mumbai['latitude'],
                                   longitudes=df_mumbai['longitude']
                                  )


Antop Hill S.O, B P T Colony S.O, C G S Colony S.O, Wadala Truck Terminal S.O
B.P.Lane S.O, Mandvi S.O (Mumbai), Masjid S.O, Null Bazar S.O
BEST STaff Quarters S.O, Chamarbaug S.O, Haffkin Institute S.O, Lal Baug S.O, Parel Naka S.O, Parel Rly Work Shop S.O, Parel S.O
Chinchbunder H.O, Noor Baug S.O, Princess Dock S.O
Cotton Exchange S.O, Kalachowki S.O, L B S N E collage S.O, Reay Road S.O, Tank Road S.O
Dadar Colony S.O, Dadar H.O, Naigaon S.O (Mumbai)
Dockyard Road S.O, Mazgaon Dock S.O, Mazgaon Road S.O, Mazgaon S.O, V K Bhavan S.O
Kidwai Nagar S.O (Mumbai), Wadala Rs S.O, Wadala S.O
Sewri S.O
V J B Udyan S.O
A I Staff Colony S.O, Santacruz P&t Colony S.O
Airport S.O (Mumbai), International Airport S.O, Sahar P & T Colony S.O, Sahargaon B.O
Andheri East S.O, Nagardas Road S.O
Andheri H.O, Azad Nagar S.O (Mumbai)
Andheri Railway Station S.O, H.M.P. School S.O
Audit Bhavan S.O, B.N. Bhavan S.O, Bandra(East) S.O, Government Colony S.O, Kherwadi S.O
Bandra West S.O
Chakala Midc S.O
Dan

Bamnoli B.O, Hodgaon B.O, Kavil Vahal B.O, Kharavali B.O, Lonshi B.O, Mangaon S.O (Raigarh(MH)), Palasgaon Khurd B.O, Pen Tarfe Tale B.O, Talegaon Tarfe Goregaon B.O, Tempale B.O, Unegaon B.O, Usar Khurd B.O
Barasgaon B.O, Kumbheshivthar B.O, Mazeri B.O, Padvi B.O, Taliye B.O, Varandh S.O
Bhale B.O, Borwadi B.O, Jite B.O, Koste Khurd B.O, Nizampur S.O (Raigarh(MH)), Shirawali B.O
Bhave B.O, Birwadi S.O, Dahivad B.O, Dapoli B.O, Dhamne B.O, Jite B.O, Mangrun B.O, Mohot B.O, Nigade B.O, Wagholi B.O, Waki Budruk B.O, Walan Budruk B.O, Walan Khurd B.O
Bhira S.O (Raigarh(MH)), Jamgaon B.O, Kudli B.O, Patnus B.O, Vile B.O
Bhonang B.O, Borghar Haveli B.O, Mandad B.O, Pitsai B.O, Rahatad B.O, Tala S.O (Raigarh(MH)), Talegaon B.O, Tokarde B.O
Bhoste B.O, Galsure B.O, Hareshwar B.O, Jasawali B.O, Ranawali B.O, Saigaon B.O, Shriwardhan S.O, Vadghar B.O, Vadshetvave B.O
Borlimandla B.O, Korlai B.O, Revdanda S.O, Vikram Baug B.O, Walke B.O
Chikhlap B.O, Gondghar B.O, Kelte B.O, Khamgaon B.O, Kharsa

In [16]:
mumbai_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Antop Hill S.O, B P T Colony S.O, C G S Colony...",19.023074,72.867622,Decent Restaurant,19.026312,72.865045,Asian Restaurant
1,"Antop Hill S.O, B P T Colony S.O, C G S Colony...",19.023074,72.867622,naaz supermarket,19.022164,72.863284,Grocery Store
2,"B.P.Lane S.O, Mandvi S.O (Mumbai), Masjid S.O,...",18.951606,72.834797,Patel Restaurant,18.949798,72.834655,Restaurant
3,"B.P.Lane S.O, Mandvi S.O (Mumbai), Masjid S.O,...",18.951606,72.834797,Gulshan-E-Iran,18.948118,72.835427,Middle Eastern Restaurant
4,"B.P.Lane S.O, Mandvi S.O (Mumbai), Masjid S.O,...",18.951606,72.834797,Bhagat Tarachand Restaurant,18.951802,72.830486,Indian Restaurant
...,...,...,...,...,...,...,...
3258,Tarapur Ti S.O,19.781606,72.788900,D-Mart,19.781531,72.788952,Department Store
3259,Virar East S.O,19.452024,72.817016,Vavtewadi Cricket Club,19.452798,72.817329,Playground
3260,Virar East S.O,19.452024,72.817016,RJ hotel,19.454709,72.818562,Diner
3261,Virar East S.O,19.452024,72.817016,balaji chinese,19.448584,72.818075,Indian Chinese Restaurant


In [17]:
mumbai_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"A I Staff Colony S.O, Santacruz P&t Colony S.O",17,17,17,17,17,17
"Abitghar B.O, Abje B.O, Alonde B.O, Baliwali B.O, Dohe B.O, Gargoan B.O, Gorhe B.O, Hamarapur B.O, Kalambe B.O, Kanchad B.O, Khare Ambiwali B.O, Khariwali B.O, Kone B.O, Mala B.O, Malwada B.O, Maniwali B.O, Moj B.O, Parali B.O, Pik B.O, Posheri B.O, Sonale B.O, Tuse B.O, Utawali B.O, Varale B.O, Vilkos B.O, Wada S.O, Waki B.O",1,1,1,1,1,1
Additional Ambernath S.O,4,4,4,4,4,4
"Adgaon B.O, Borlipanchatan S.O, Sarve B.O, Velas B.O, Wadwali B.O",1,1,1,1,1,1
"Agarwadi B.O, Dahisar tymanor B.O, Datiware B.O, Dhekale B.O, Edwan B.O, Navghar B.O, Pargaon B.O, Tandulwadi B.O, Tembhikhodave B.O, Umbarpada S.O, Usarani B.O, Virathan Budruk B.O",1,1,1,1,1,1
...,...,...,...,...,...,...
Vidyashram S.O,1,1,1,1,1,1
Virar East S.O,4,4,4,4,4,4
Vishnunagar S.O,3,3,3,3,3,3
Wagle I.E. S.O,24,24,24,24,24,24


In [19]:
# one hot encoding
mumbai_onehot = pd.get_dummies(mumbai_venues[['Venue Category']], prefix="", prefix_sep="")
#mumbai_onehot.drop(['Neighborhood'],axis=1,inplace=True) 
mumbai_onehot.insert(loc=0, column='Neighborhood', value=mumbai_venues['Neighborhood'] )

mumbai_onehot

Unnamed: 0,Neighborhood,ATM,Accessories Store,Airport Service,American Restaurant,Antique Shop,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,...,Tram Station,Travel & Transport,Tree,Turkish Restaurant,Vegetarian / Vegan Restaurant,Whisky Bar,Wine Bar,Women's Store,Yoga Studio,Zoo
0,"Antop Hill S.O, B P T Colony S.O, C G S Colony...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Antop Hill S.O, B P T Colony S.O, C G S Colony...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"B.P.Lane S.O, Mandvi S.O (Mumbai), Masjid S.O,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"B.P.Lane S.O, Mandvi S.O (Mumbai), Masjid S.O,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"B.P.Lane S.O, Mandvi S.O (Mumbai), Masjid S.O,...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3258,Tarapur Ti S.O,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3259,Virar East S.O,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3260,Virar East S.O,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3261,Virar East S.O,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:
#let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
mumbai_grouped = mumbai_onehot.groupby('Neighborhood').mean().reset_index()
mumbai_grouped

Unnamed: 0,Neighborhood,ATM,Accessories Store,Airport Service,American Restaurant,Antique Shop,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,...,Tram Station,Travel & Transport,Tree,Turkish Restaurant,Vegetarian / Vegan Restaurant,Whisky Bar,Wine Bar,Women's Store,Yoga Studio,Zoo
0,"A I Staff Colony S.O, Santacruz P&t Colony S.O",0.000000,0.0,0.0,0.0,0.0,0.000000,0.058824,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.058824,0.0,0.000000,0.0,0.0,0.0
1,"Abitghar B.O, Abje B.O, Alonde B.O, Baliwali B...",0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0
2,Additional Ambernath S.O,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0
3,"Adgaon B.O, Borlipanchatan S.O, Sarve B.O, Vel...",0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0
4,"Agarwadi B.O, Dahisar tymanor B.O, Datiware B....",0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157,Vidyashram S.O,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0
158,Virar East S.O,0.000000,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0
159,Vishnunagar S.O,0.333333,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,...,0.0,0.333333,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0
160,Wagle I.E. S.O,0.000000,0.0,0.0,0.0,0.0,0.041667,0.000000,0.0,0.0,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.041667,0.0,0.0,0.0


In [21]:
#write a function to sort the venues in descending order.
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]


In [22]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
mumbai_neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
mumbai_neighborhoods_venues_sorted['Neighborhood'] = mumbai_grouped['Neighborhood']

for ind in np.arange(mumbai_grouped.shape[0]):
    mumbai_neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(mumbai_grouped.iloc[ind, :], num_top_venues)

mumbai_neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,"A I Staff Colony S.O, Santacruz P&t Colony S.O",Restaurant,Park,Football Stadium,Recreation Center,Café
1,"Abitghar B.O, Abje B.O, Alonde B.O, Baliwali B...",Bus Station,Zoo,Food & Drink Shop,Flower Shop,Flea Market
2,Additional Ambernath S.O,Indian Restaurant,Platform,Burger Joint,Dumpling Restaurant,Flower Shop
3,"Adgaon B.O, Borlipanchatan S.O, Sarve B.O, Vel...",Bus Station,Zoo,Food & Drink Shop,Flower Shop,Flea Market
4,"Agarwadi B.O, Dahisar tymanor B.O, Datiware B....",Train Station,Zoo,Dumpling Restaurant,Food,Flower Shop
...,...,...,...,...,...,...
157,Vidyashram S.O,Snack Place,Food & Drink Shop,Food,Flower Shop,Flea Market
158,Virar East S.O,Playground,Indian Chinese Restaurant,Diner,Multiplex,Eastern European Restaurant
159,Vishnunagar S.O,ATM,Harbor / Marina,Travel & Transport,Dumpling Restaurant,Flower Shop
160,Wagle I.E. S.O,Gym / Fitness Center,Restaurant,Bakery,Pizza Place,Italian Restaurant


In [23]:
#Run k-means to cluster the neighborhood into 5 clusters.


# set number of clusters
kclusters = 5

mumbai_grouped_clustering = mumbai_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(mumbai_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 1, 4, 1, 1, 0, 0, 0, 0, 0], dtype=int32)

In [24]:
# add clustering labels
mumbai_neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

mumbai_merged = df_mumbai

mumbai_merged = mumbai_merged.join(mumbai_neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
mumbai_merged=mumbai_merged.dropna()
mumbai_merged


Unnamed: 0,pincode,Neighborhood,regionname,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,400037,"Antop Hill S.O, B P T Colony S.O, C G S Colony...",Mumbai,19.023074,72.867622,0.0,Grocery Store,Asian Restaurant,Zoo,Dumpling Restaurant,Flower Shop
1,400003,"B.P.Lane S.O, Mandvi S.O (Mumbai), Masjid S.O,...",Mumbai,18.951606,72.834797,4.0,Indian Restaurant,Restaurant,Electronics Store,Café,American Restaurant
2,400012,"BEST STaff Quarters S.O, Chamarbaug S.O, Haffk...",Mumbai,18.996311,72.842493,0.0,Coffee Shop,Chinese Restaurant,Hotel,Roof Deck,Hotel Pool
3,400009,"Chinchbunder H.O, Noor Baug S.O, Princess Dock...",Mumbai,18.958095,72.836590,4.0,Indian Restaurant,Hotel,Furniture / Home Store,Chinese Restaurant,Zoo
4,400033,"Cotton Exchange S.O, Kalachowki S.O, L B S N E...",Mumbai,18.981780,72.840388,0.0,Hookah Bar,Arts & Crafts Store,Furniture / Home Store,Pizza Place,Convenience Store
...,...,...,...,...,...,...,...,...,...,...,...
233,401207,"Naigaon B.O, Papdi S.O",Mumbai,19.342852,72.823456,0.0,Bakery,Spa,Chinese Restaurant,Zoo,Food
234,401209,Nallosapare E S.O,Mumbai,19.417110,72.827193,4.0,Platform,Indian Restaurant,Drugstore,Flower Shop,Flea Market
236,401504,"Parnali B.O, Salwad B.O, Tarapur App S.O",Mumbai,19.803563,72.723248,0.0,Mobile Phone Shop,Food & Drink Shop,Food,Flower Shop,Flea Market
238,401506,Tarapur Ti S.O,Mumbai,19.781606,72.788900,0.0,Department Store,Zoo,Eastern European Restaurant,Food,Flower Shop


In [25]:
address = 'Mumbai, India'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Mumbai are {}, {}.'.format(latitude, longitude))


The geograpical coordinate of Mumbai are 19.0759899, 72.8773928.


In [26]:
# create map for mumbai
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(mumbai_merged['latitude'], mumbai_merged['longitude'], mumbai_merged['Neighborhood'], mumbai_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [27]:
#Cluster label 0
mumbai_merged.loc[mumbai_merged['Cluster Labels'] == 0, mumbai_merged.columns[[1] + list(range(5, mumbai_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,"Antop Hill S.O, B P T Colony S.O, C G S Colony...",0.0,Grocery Store,Asian Restaurant,Zoo,Dumpling Restaurant,Flower Shop
2,"BEST STaff Quarters S.O, Chamarbaug S.O, Haffk...",0.0,Coffee Shop,Chinese Restaurant,Hotel,Roof Deck,Hotel Pool
4,"Cotton Exchange S.O, Kalachowki S.O, L B S N E...",0.0,Hookah Bar,Arts & Crafts Store,Furniture / Home Store,Pizza Place,Convenience Store
5,"Dadar Colony S.O, Dadar H.O, Naigaon S.O (Mumbai)",0.0,Cricket Ground,Concert Hall,Movie Theater,Indian Restaurant,Bar
6,"Dockyard Road S.O, Mazgaon Dock S.O, Mazgaon R...",0.0,Snack Place,Café,Ice Cream Shop,Indian Restaurant,Garden
...,...,...,...,...,...,...,...
230,"Kelwa Road B.O, Kelwa S.O (Thane)",0.0,Hotel,Hardware Store,Turkish Restaurant,Bus Station,Zoo
233,"Naigaon B.O, Papdi S.O",0.0,Bakery,Spa,Chinese Restaurant,Zoo,Food
236,"Parnali B.O, Salwad B.O, Tarapur App S.O",0.0,Mobile Phone Shop,Food & Drink Shop,Food,Flower Shop,Flea Market
238,Tarapur Ti S.O,0.0,Department Store,Zoo,Eastern European Restaurant,Food,Flower Shop


In [28]:
#Cluster label 1
mumbai_merged.loc[mumbai_merged['Cluster Labels'] == 1, mumbai_merged.columns[[1] + list(range(5, mumbai_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
100,Ghansoli S.O,1.0,Bus Station,Train Station,Zoo,Dumpling Restaurant,Flower Shop
116,"Adgaon B.O, Borlipanchatan S.O, Sarve B.O, Vel...",1.0,Bus Station,Zoo,Food & Drink Shop,Flower Shop,Flea Market
197,"Abitghar B.O, Abje B.O, Alonde B.O, Baliwali B...",1.0,Bus Station,Zoo,Food & Drink Shop,Flower Shop,Flea Market
199,"Agarwadi B.O, Dahisar tymanor B.O, Datiware B....",1.0,Train Station,Zoo,Dumpling Restaurant,Food,Flower Shop
204,"Asangaon B.O, Bavada B.O, Chandigaon B.O, Gowa...",1.0,Train Station,Zoo,Dumpling Restaurant,Food,Flower Shop


In [29]:
# Cluster label 2
mumbai_merged.loc[mumbai_merged['Cluster Labels'] == 2, mumbai_merged.columns[[1] + list(range(5, mumbai_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
34,"Bhandup Ind. Estate S.O, Bhandup West S.O, J.M...",2.0,ATM,Restaurant,Snack Place,Dumpling Restaurant,Flower Shop
39,"D.M. Colony S.O, Shivaji Nagar S.O (Mumbai)",2.0,ATM,Indian Restaurant,Dumpling Restaurant,Food,Flower Shop
95,"Bokadvira B.O, Chanje B.O, Karanja B.O, Kegaon...",2.0,ATM,Creperie,Food,Flower Shop,Flea Market
159,"Amane B.O, Angaon B.O, Anjur B.O, Chimbipada B...",2.0,ATM,Creperie,Food,Flower Shop,Flea Market
177,"Jambhul B.O, Netaji Bazar S.O",2.0,ATM,Multiplex,Food & Drink Shop,Flower Shop,Flea Market
195,Vishnunagar S.O,2.0,ATM,Harbor / Marina,Travel & Transport,Dumpling Restaurant,Flower Shop


In [30]:
#Cluster label 3
mumbai_merged.loc[mumbai_merged['Cluster Labels'] == 3, mumbai_merged.columns[[1] + list(range(5, mumbai_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
189,O.E.Ambernath S.O,3.0,Ice Cream Shop,Zoo,Dumpling Restaurant,Food,Flower Shop


In [33]:
#Cluster label 4
mumbai_merged.loc[mumbai_merged['Cluster Labels'] == 4, mumbai_merged.columns[[1] + list(range(5, mumbai_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,"B.P.Lane S.O, Mandvi S.O (Mumbai), Masjid S.O,...",4.0,Indian Restaurant,Restaurant,Electronics Store,Café,American Restaurant
3,"Chinchbunder H.O, Noor Baug S.O, Princess Dock...",4.0,Indian Restaurant,Hotel,Furniture / Home Store,Chinese Restaurant,Zoo
17,Chakala Midc S.O,4.0,Indian Restaurant,Chinese Restaurant,Cosmetics Shop,Seafood Restaurant,Fast Food Restaurant
40,"Ghatkopar West S.O, Rifle Range S.O, Sahakar B...",4.0,Indian Restaurant,Men's Store,Electronics Store,Restaurant,Bakery
73,"Kalbadevi H.O, Ramwadi S.O, S. C. Court S.O, T...",4.0,Indian Restaurant,Chinese Restaurant,Hotel,Juice Bar,Dessert Shop
88,"Worli Naka S.O, Worli S.O",4.0,Indian Restaurant,Bakery,Seafood Restaurant,Zoo,Flower Shop
106,Kharghar S.O,4.0,Indian Restaurant,Ice Cream Shop,Fast Food Restaurant,Outdoor Sculpture,Zoo
111,Millenium Business Park S.O,4.0,Indian Restaurant,Food Court,Restaurant,Hotel,Flower Shop
123,"Ambavade B.O, Kavale B.O, Pangari B.O, Tamhane...",4.0,Indian Restaurant,Zoo,Dumpling Restaurant,Food,Flower Shop
157,Additional Ambernath S.O,4.0,Indian Restaurant,Platform,Burger Joint,Dumpling Restaurant,Flower Shop


In [34]:
#lets analyse the delhi dataset
df_delhi


Unnamed: 0,pincode,Neighborhood,regionname,latitude,longitude
0,110092,"Anand Vihar S.O, IP Extension S.O, Laxmi Nagar...",Delhi,28.553967,77.414816
1,110051,"Azad Nagar S.O (East Delhi), Govind Pura S.O, ...",Delhi,28.636566,77.296432
2,110032,"Babarpur S.O (North East Delhi), Balbir Nagar ...",Delhi,28.676474,77.285675
3,110090,"Badarpur Khadar B.O, Karawal Nagar S.O, Shahee...",Delhi,28.493170,77.303024
4,110053,"Bhajan Pura S.O, Brahampuri S.O, Garhi Mandu B...",Delhi,28.693288,77.270634
...,...,...,...,...,...
90,110027,"Janta Market S.O, Rajouri Garden J-6 S.O, Rajo...",Delhi,28.644449,77.116296
91,110063,"Jawala Heri B.O, Madipur Slum Quarter S.O, Mad...",Delhi,28.671194,77.106597
92,110087,"Jwala Puri S.O, Sunder Vihar S.O",Delhi,28.663571,77.089019
93,110028,"Kair B.O, Khaira B.O, Naraina Industrial Estat...",Delhi,28.629716,77.142217


In [35]:
delhi_venues = getNearbyVenues(names=df_delhi['Neighborhood'],
                                   latitudes=df_delhi['latitude'],
                                   longitudes=df_delhi['longitude']
                                  )


Anand Vihar S.O, IP Extension S.O, Laxmi Nagar S.O (East Delhi), Mandawali Fazalpur S.O, Nirman Vihar S.O, Shakarpur S.O, Surajmal Vihar S.O, Yozna Vihar S.O
Azad Nagar S.O (East Delhi), Govind Pura S.O, Krishna Nagar H.O, Ram Nagar S.O (East Delhi)
Babarpur S.O (North East Delhi), Balbir Nagar S.O, Bhola Nath Nagar S.O, Distt. Court (KKD) S.O, Goverdhan Bihari Colony S.O, Loni Road S.O, Man Sarovar Park S.O, Rohtash Nagar S.O, Shahdara Mandi S.O, Shahdara S.O, Shivaji Park S.O (East Delhi), Telewara S.O, Vishwas Nagar S.O
Badarpur Khadar B.O, Karawal Nagar S.O, Shaheed Bhagat Singh Colony S.O, Shriram Colony Rajeev Nagar S.O, Sonia Vihar S.O
Bhajan Pura S.O, Brahampuri S.O, Garhi Mandu B.O, Ghonda B.O, Jafrabad S.O (East Delhi), Jagjit Nagar S.O, Maujpur S.O, New Usmanpur B.O, Seelampur S.O, Yamuna Vihar S.O
Chilla B.O, Himmatpuri S.O, Kalyanpuri S.O, Kalyanvas S.O, Mayur Vihar Ph-I S.O, Patparganj S.O, Trilok Puri S.O
Dayalpur B.O, Gokal Puri S.O, Johripur B.O, Khazuri Khas B.O, Sabh

Masood Pur B.O, Vasant Kunj Pkt-A S.O, Vasant Kunj S.O
R K Puram (Main) S.O, R K Puram West S.O
R K Puram Sect-1 S.O, R K Puram Sect-12 S.O, R K Puram Sect-3 S.O, R K Puram Sect-4 S.O, R K Puram Sect-5 S.O, R K Puram Sect7 S.O, R K Puram Sect-8 S.O, R K Puram Sector - 6 Postal SB S.O
Vasant Vihar-1 S.O, Vasant Vihar-2 S.O
Amberhai B.O, District Court Complex Dwarka S.O, Dwarka Sec-6 S.O
Ashok Nagar S.O (West Delhi), Chand Nagar S.O, Chaukhandi B.O, Fateh Nagar S.O, Khyala Phase -  I S.O, Khyala Phase - II S.O, M.B.S. Nagar S.O, Mahabir Nagar S.O, Tilak Nagar East S.O, Tilak Nagar S.O (West Delhi), Vikas Puri S.O, Vishnu Garden S.O
Ashoka Park Extn. S.O, Punjabi Bagh S.O, Punjabi Bagh Sec - III S.O, Shivaji Park S.O (West Delhi)
Badusarai B.O, Chhawla S.O, DC Goyla B.O, Kangan Heri B.O
Bagdola B.O, Barthal B.O, Dhulsiras B.O, Palam Extn (Harijan Basti) S.O, Raj Nagar - II S.O
Bakkarwala B.O, Hirankudna B.O, Mundka B.O, Nangloi - II B.O, Nangloi - III S.O, Nangloi S.O, Nilothi B.O, Ranho

In [36]:
delhi_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Azad Nagar S.O (East Delhi), Govind Pura S.O, ...",28.636566,77.296432,Domino's Pizza,28.639000,77.293000,Pizza Place
1,"Azad Nagar S.O (East Delhi), Govind Pura S.O, ...",28.636566,77.296432,"DDA Market, IP Extn.",28.636093,77.299462,Shop & Service
2,"Azad Nagar S.O (East Delhi), Govind Pura S.O, ...",28.636566,77.296432,Takle Lala Ki Dukan,28.635093,77.299170,Grocery Store
3,"Azad Nagar S.O (East Delhi), Govind Pura S.O, ...",28.636566,77.296432,"Purbasha Kali Bari, Delhi, India",28.634302,77.298804,Arts & Entertainment
4,"Babarpur S.O (North East Delhi), Balbir Nagar ...",28.676474,77.285675,Gym Box Health Care,28.676403,77.282890,Gym / Fitness Center
...,...,...,...,...,...,...,...
419,"Kair B.O, Khaira B.O, Naraina Industrial Estat...",28.629716,77.142217,PVR Naraina,28.632789,77.138895,Multiplex
420,"Kair B.O, Khaira B.O, Naraina Industrial Estat...",28.629716,77.142217,McDonald's,28.632558,77.138786,Fast Food Restaurant
421,"Kair B.O, Khaira B.O, Naraina Industrial Estat...",28.629716,77.142217,Domino,28.631226,77.138393,Pizza Place
422,"Kair B.O, Khaira B.O, Naraina Industrial Estat...",28.629716,77.142217,Fusion Cafe,28.632988,77.138700,Café


In [37]:
delhi_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"505 A B Workshop S.O, A F Palam S.O, Aps Colony B.O, Bazar Road S.O, C.V.D. S.O, COD S.O (South West Delhi), Delhi Cantt S.O, Dhaula Kuan S.O, Kirby Place S.O, Pinto Park S.O, R R Hospital S.O, Signal Enclave S.O, Station Road S.O (South West Delhi), Subroto Park S.O",3,3,3,3,3,3
"A.G.C.R. S.O, Ajmeri Gate Extn. S.O, Darya Ganj S.O, Gandhi Smarak Nidhi S.O, I.P.Estate S.O, Indraprastha H.O, Minto Road S.O",4,4,4,4,4,4
"A.K.Market S.O, Multani Dhanda S.O, Pahar Ganj S.O, Swami Ram Tirth Nagar S.O",2,2,2,2,2,2
"Abul Fazal Enclave-I S.O, Jamia Nagar S.O, New Friends Colony S.O, Sukhdev Vihar S.O, Zakir Nagar S.O",5,5,5,5,5,5
"Air Force Station Tugalkabad S.O, BSF Camp Tigri S.O, Dakshinpuri Phase-I S.O, Dakshinpuri Phase-II S.O, Dakshinpuri Phase-III S.O, Deoli B.O, Dr. Ambedkar Nagar S.O (South Delhi), Hamdard Nagar S.O, Khanpur S.O (South Delhi), Pushpa Bhawan S.O, Talimabad S.O",2,2,2,2,2,2
...,...,...,...,...,...,...
Rashtrapati Bhawan S.O,3,3,3,3,3,3
Rohini Sector 15 S.O,4,4,4,4,4,4
Sangam Vihar S.O,1,1,1,1,1,1
Union Public Service Commission S.O,6,6,6,6,6,6


In [40]:
# one hot encoding
delhi_onehot = pd.get_dummies(delhi_venues[['Venue Category']], prefix="", prefix_sep="")
#delhi_onehot.drop(['Neighborhood'],axis=1,inplace=True) 
delhi_onehot.insert(loc=0, column='Neighborhood', value=delhi_venues['Neighborhood'] )

delhi_onehot

Unnamed: 0,Neighborhood,ATM,Accessories Store,Arcade,Art Gallery,Arts & Entertainment,Asian Restaurant,Athletics & Sports,Austrian Restaurant,BBQ Joint,...,Steakhouse,Supermarket,Sushi Restaurant,Tapas Restaurant,Tennis Court,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Women's Store
0,"Azad Nagar S.O (East Delhi), Govind Pura S.O, ...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Azad Nagar S.O (East Delhi), Govind Pura S.O, ...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Azad Nagar S.O (East Delhi), Govind Pura S.O, ...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Azad Nagar S.O (East Delhi), Govind Pura S.O, ...",0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Babarpur S.O (North East Delhi), Balbir Nagar ...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
419,"Kair B.O, Khaira B.O, Naraina Industrial Estat...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
420,"Kair B.O, Khaira B.O, Naraina Industrial Estat...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
421,"Kair B.O, Khaira B.O, Naraina Industrial Estat...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
422,"Kair B.O, Khaira B.O, Naraina Industrial Estat...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [41]:
#let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
delhi_grouped = delhi_onehot.groupby('Neighborhood').mean().reset_index()
delhi_grouped

Unnamed: 0,Neighborhood,ATM,Accessories Store,Arcade,Art Gallery,Arts & Entertainment,Asian Restaurant,Athletics & Sports,Austrian Restaurant,BBQ Joint,...,Steakhouse,Supermarket,Sushi Restaurant,Tapas Restaurant,Tennis Court,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Women's Store
0,"505 A B Workshop S.O, A F Palam S.O, Aps Colon...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"A.G.C.R. S.O, Ajmeri Gate Extn. S.O, Darya Gan...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"A.K.Market S.O, Multani Dhanda S.O, Pahar Ganj...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Abul Fazal Enclave-I S.O, Jamia Nagar S.O, New...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Air Force Station Tugalkabad S.O, BSF Camp Tig...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,Rashtrapati Bhawan S.O,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
74,Rohini Sector 15 S.O,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75,Sangam Vihar S.O,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76,Union Public Service Commission S.O,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [42]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
delhi_neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
delhi_neighborhoods_venues_sorted['Neighborhood'] = delhi_grouped['Neighborhood']

for ind in np.arange(delhi_grouped.shape[0]):
    delhi_neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(delhi_grouped.iloc[ind, :], num_top_venues)

delhi_neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,"505 A B Workshop S.O, A F Palam S.O, Aps Colon...",Cricket Ground,Playground,Multiplex,Eastern European Restaurant,Food
1,"A.G.C.R. S.O, Ajmeri Gate Extn. S.O, Darya Gan...",Road,Cricket Ground,History Museum,Soccer Stadium,Women's Store
2,"A.K.Market S.O, Multani Dhanda S.O, Pahar Ganj...",Hotel,Dhaba,Women's Store,Food Court,Department Store
3,"Abul Fazal Enclave-I S.O, Jamia Nagar S.O, New...",Pizza Place,Lounge,Soccer Field,Restaurant,Deli / Bodega
4,"Air Force Station Tugalkabad S.O, BSF Camp Tig...",Indian Restaurant,Diner,Women's Store,Food Court,Department Store
...,...,...,...,...,...,...
73,Rashtrapati Bhawan S.O,Noodle House,Japanese Restaurant,Chinese Restaurant,Eastern European Restaurant,Food
74,Rohini Sector 15 S.O,Pizza Place,Burger Joint,Dessert Shop,Café,Electronics Store
75,Sangam Vihar S.O,Athletics & Sports,Women's Store,Food Stand,Dessert Shop,Dhaba
76,Union Public Service Commission S.O,Breakfast Spot,Nightclub,Bus Station,Café,Restaurant


In [43]:
#Run k-means to cluster the neighborhood into 5 clusters.


# set number of clusters
kclusters = 5

delhi_grouped_clustering = delhi_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(delhi_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 1, 1, 1, 0, 1, 1, 1, 1, 1], dtype=int32)

In [44]:
# add clustering labels
delhi_neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

delhi_merged = df_delhi

delhi_merged = delhi_merged.join(delhi_neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
delhi_merged=delhi_merged.dropna()
delhi_merged


Unnamed: 0,pincode,Neighborhood,regionname,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,110051,"Azad Nagar S.O (East Delhi), Govind Pura S.O, ...",Delhi,28.636566,77.296432,1.0,Pizza Place,Arts & Entertainment,Grocery Store,Shop & Service,Dessert Shop
2,110032,"Babarpur S.O (North East Delhi), Balbir Nagar ...",Delhi,28.676474,77.285675,1.0,ATM,Vegetarian / Vegan Restaurant,Gym / Fitness Center,Light Rail Station,Eastern European Restaurant
3,110090,"Badarpur Khadar B.O, Karawal Nagar S.O, Shahee...",Delhi,28.493170,77.303024,1.0,IT Services,Indian Restaurant,Train Station,Art Gallery,Asian Restaurant
4,110053,"Bhajan Pura S.O, Brahampuri S.O, Garhi Mandu B...",Delhi,28.693288,77.270634,2.0,ATM,Wine Bar,Department Store,Dessert Shop,Dhaba
5,110091,"Chilla B.O, Himmatpuri S.O, Kalyanpuri S.O, Ka...",Delhi,28.613859,77.289806,1.0,Food Truck,Indian Restaurant,Market,Moving Target,Women's Store
...,...,...,...,...,...,...,...,...,...,...,...
90,110027,"Janta Market S.O, Rajouri Garden J-6 S.O, Rajo...",Delhi,28.644449,77.116296,1.0,BBQ Joint,Cosmetics Shop,Pub,Café,Snack Place
91,110063,"Jawala Heri B.O, Madipur Slum Quarter S.O, Mad...",Delhi,28.671194,77.106597,1.0,Indian Restaurant,ATM,Sandwich Place,Market,Food & Drink Shop
92,110087,"Jwala Puri S.O, Sunder Vihar S.O",Delhi,28.663571,77.089019,1.0,Hotel,Indian Restaurant,Food Truck,Coffee Shop,Fast Food Restaurant
93,110028,"Kair B.O, Khaira B.O, Naraina Industrial Estat...",Delhi,28.629716,77.142217,1.0,Pizza Place,Café,Indian Restaurant,Sandwich Place,Fast Food Restaurant


In [45]:
address = 'Delhi, India'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Delhi are {}, {}.'.format(latitude, longitude))


The geograpical coordinate of Delhi are 28.6517178, 77.2219388.


In [46]:
# create map for delhi
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(delhi_merged['latitude'], delhi_merged['longitude'], delhi_merged['Neighborhood'], delhi_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [47]:
#Cluster label 0
delhi_merged.loc[delhi_merged['Cluster Labels'] == 0, delhi_merged.columns[[1] + list(range(5, delhi_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
44,"Air Force Station Tugalkabad S.O, BSF Camp Tig...",0.0,Indian Restaurant,Diner,Women's Store,Food Court,Department Store
70,"Kidwai Nagar East S.O, Kidwai Nagar West S.O, ...",0.0,Indian Restaurant,Fast Food Restaurant,Women's Store,Restaurant,Market
85,"Dabri B.O, Indira Park B.O, Nasirpur B.O, Pala...",0.0,Indian Restaurant,Cosmetics Shop,Bar,Women's Store,Electronics Store
94,"Nangal Raya S.O, Sagarpur S.O",0.0,Indian Restaurant,Women's Store,Food Court,Department Store,Dessert Shop


In [48]:
#Cluster label 1
delhi_merged.loc[delhi_merged['Cluster Labels'] == 1, delhi_merged.columns[[1] + list(range(5, delhi_merged.shape[1]))]]



Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,"Azad Nagar S.O (East Delhi), Govind Pura S.O, ...",1.0,Pizza Place,Arts & Entertainment,Grocery Store,Shop & Service,Dessert Shop
2,"Babarpur S.O (North East Delhi), Balbir Nagar ...",1.0,ATM,Vegetarian / Vegan Restaurant,Gym / Fitness Center,Light Rail Station,Eastern European Restaurant
3,"Badarpur Khadar B.O, Karawal Nagar S.O, Shahee...",1.0,IT Services,Indian Restaurant,Train Station,Art Gallery,Asian Restaurant
5,"Chilla B.O, Himmatpuri S.O, Kalyanpuri S.O, Ka...",1.0,Food Truck,Indian Restaurant,Market,Moving Target,Women's Store
7,"Dilshad Garden S.O, G.T.B. Hospital S.O, Jhilm...",1.0,Metro Station,Indian Restaurant,Business Service,Flower Shop,Electronics Store
...,...,...,...,...,...,...,...
89,"Hari Nagar BE Block S.O, Hari Nagar Dadb Block...",1.0,Restaurant,Arcade,Garden,Women's Store,Flower Shop
90,"Janta Market S.O, Rajouri Garden J-6 S.O, Rajo...",1.0,BBQ Joint,Cosmetics Shop,Pub,Café,Snack Place
91,"Jawala Heri B.O, Madipur Slum Quarter S.O, Mad...",1.0,Indian Restaurant,ATM,Sandwich Place,Market,Food & Drink Shop
92,"Jwala Puri S.O, Sunder Vihar S.O",1.0,Hotel,Indian Restaurant,Food Truck,Coffee Shop,Fast Food Restaurant


In [49]:
#Cluster label 2
delhi_merged.loc[delhi_merged['Cluster Labels'] == 2, delhi_merged.columns[[1] + list(range(5, delhi_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
4,"Bhajan Pura S.O, Brahampuri S.O, Garhi Mandu B...",2.0,ATM,Wine Bar,Department Store,Dessert Shop,Dhaba
6,"Dayalpur B.O, Gokal Puri S.O, Johripur B.O, Kh...",2.0,ATM,Bus Station,Deli / Bodega,Dessert Shop,Dhaba
10,"Harsh Vihar S.O, Loni Road Housing Complex S.O...",2.0,ATM,Smoke Shop,Cricket Ground,Department Store,Dessert Shop
16,"Ashok Vihar H.O, Nimri S.O, Satyawati Nagar S....",2.0,ATM,Indian Restaurant,Market,Electronics Store,Food & Drink Shop
72,"R K Puram (Main) S.O, R K Puram West S.O",2.0,ATM,Home Service,Multiplex,Food & Drink Shop,Department Store
78,"Badusarai B.O, Chhawla S.O, DC Goyla B.O, Kang...",2.0,ATM,Wine Bar,Department Store,Dessert Shop,Dhaba
83,"D. K. Mohan Garden S.O, Hastal Village B.O, Je...",2.0,ATM,Train Station,Food Court,Department Store,Dessert Shop


In [50]:
#Cluster label 3
delhi_merged.loc[delhi_merged['Cluster Labels'] == 3, delhi_merged.columns[[1] + list(range(5, delhi_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
49,"Badarpur S.O (South Delhi), BTPS S.O (South De...",3.0,Bus Station,Women's Store,Deli / Bodega,Dessert Shop,Dhaba


In [51]:
#Cluster label 4
delhi_merged.loc[delhi_merged['Cluster Labels'] == 4, delhi_merged.columns[[1] + list(range(5, delhi_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
30,"New Multan Nagar B.O, Shakur Basti Depot S.O",4.0,Train Station,Women's Store,Food Court,Department Store,Dessert Shop
