In [50]:
#!conda install -c conda-forge folium=0.5.0 --yes

In [51]:

import numpy as np 
import pandas as pd 
import requests
from pandas.io.json import json_normalize
from bs4 import BeautifulSoup 
from sklearn.cluster import KMeans

import folium

print("Imported some libraries.")

Imported some libraries.


Using Beautiful Soup to scrape data about Neighbourhoods in Mumbai, India

In [52]:
#URL 
url = "https://en.wikipedia.org/wiki/List_of_neighbourhoods_in_Mumbai#Mumbai_neighbourhood_coordintes"
req = requests.get(url)

In [53]:
#Using Beautiful Soup
soup = BeautifulSoup(req.text, "html.parser")

print(soup.title)

<title>List of neighbourhoods in Mumbai - Wikipedia</title>


In [54]:
#Storing data into a dataframe
table = soup.find_all('table')[0] 
lst = pd.read_html(str(table))

df_mum = pd.DataFrame(lst[0])

In [55]:
df_mum.head()

Unnamed: 0,Area,Location,Latitude,Longitude
0,Amboli,"Andheri,Western Suburbs",19.1293,72.8434
1,"Chakala, Andheri",Western Suburbs,19.111388,72.860833
2,D.N. Nagar,"Andheri,Western Suburbs",19.124085,72.831373
3,Four Bungalows,"Andheri,Western Suburbs",19.124714,72.82721
4,Lokhandwala,"Andheri,Western Suburbs",19.130815,72.82927


In [56]:
df_mum.shape

(93, 4)

Visualising the locations of the areas in Mumbai using Folium

In [57]:
#Data for the location of Mumbai
latitude = 19.0760
longitude = 72.8777
print('The geographical coordinate of Mumbai, India : {}\u00b0N, {}\u00b0E'.format(latitude, longitude))

The geographical coordinate of Mumbai, India : 19.076°N, 72.8777°E


In [58]:
#Making the map
map_mum = folium.Map(location=[latitude, longitude], zoom_start=11)

# adding markers to map
for lat, lng, area in zip(df_mum['Latitude'], df_mum['Longitude'], df_mum['Area']):
    label = '{}'.format(area)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_mum)  
    
map_mum


Using Foursquare API to query

In [61]:
#For accessing Foursquare
Client_Id = '4A0POD12V3MZ2AXAWLLVAVLPLUHCTUUEYA4QFHJJBTJ5PQ44'

Client_Secret = 'VPQ1MQGSH5E2PLZ2TJXCNSI4TJXM314OZUKRK0URVWBETIKI'

Version = '20180604'

In [62]:
# radius = 2km; limit = 100 
radius = 2000
limit = 100

venues = []

for lat, long, area in zip(df_mum['Latitude'], df_mum['Longitude'], df_mum['Area']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        Client_Id,
        Client_Secret,
        Version,
        lat,
        long,
        radius, 
        limit)
    
    # make the GET request
    getresults = requests.get(url).json()
    results = getresults['response']['groups'][0]['items']
    
    # Get the name of the venue, the category of the venue and the latitude, longitude information
    for venue in results:
        venues.append((
            area,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))
    

In [63]:
#Storing the JSON file into a dataframe
df_venues = pd.DataFrame(venues)
df_venues.columns = ['Area', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']
df_venues.head()

Unnamed: 0,Area,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Amboli,19.1293,72.8434,Shawarma Factory,19.124591,72.840398,Falafel Restaurant
1,Amboli,19.1293,72.8434,Merwans Cake shop,19.1193,72.845418,Bakery
2,Amboli,19.1293,72.8434,Jaffer Bhai's Delhi Darbar,19.137714,72.845909,Mughlai Restaurant
3,Amboli,19.1293,72.8434,Hard Rock Cafe Andheri,19.135995,72.835335,American Restaurant
4,Amboli,19.1293,72.8434,"5 Spice , Bandra",19.130421,72.847206,Chinese Restaurant


In [64]:
#Total number of unique categories
print('There are {} uniques categories.'.format(len(df_venues['VenueCategory'].unique())))

There are 221 uniques categories.


In [65]:
#The different categories
print(df_venues['VenueCategory'].unique())

['Falafel Restaurant' 'Bakery' 'Mughlai Restaurant' 'American Restaurant'
 'Chinese Restaurant' 'Pizza Place' 'Brewery' 'Pub' 'Multiplex'
 'Ice Cream Shop' 'Café' 'Mediterranean Restaurant' 'Indian Restaurant'
 'BBQ Joint' 'Sandwich Place' 'Lounge' 'Diner' 'Coffee Shop' 'Juice Bar'
 'Spa' "Women's Store" 'Jewelry Store' 'Fast Food Restaurant' 'Gym'
 'Comfort Food Restaurant' 'Bar' 'Movie Theater' 'Dessert Shop'
 'Residential Building (Apartment / Condo)' 'Italian Restaurant'
 'Vegetarian / Vegan Restaurant' 'Deli / Bodega' 'Asian Restaurant'
 'Seafood Restaurant' 'Snack Place' 'Fish Market' 'Clothing Store' 'Hotel'
 'Camera Store' 'Shopping Mall' 'Bus Station' 'Department Store' 'Market'
 'Boutique' 'Light Rail Station' 'Restaurant' 'Maharashtrian Restaurant'
 'Food Truck' 'Airport Service' 'Nightclub' 'Cocktail Bar' 'College Gym'
 'Tea Room' 'Martial Arts Dojo' 'Pool' 'Resort' 'Hotel Bar' 'Beach'
 'Donut Shop' 'South Indian Restaurant' 'Liquor Store'
 'Gym / Fitness Center' 'Recreatio

In [66]:
#Check whether there is an entry for Italian Restaurants
"Italian Restaurant" in df_venues['VenueCategory'].unique()

True

In [67]:
#The list of areas 
df_venues['Area'].unique()

array(['Amboli', 'Chakala, Andheri', 'D.N. Nagar', 'Four Bungalows',
       'Lokhandwala', 'Marol', 'Sahar', 'Seven Bungalows', 'Versova',
       'Mira Road', 'Bhayandar', 'Uttan', 'Bandstand Promenade',
       'Kherwadi', 'Pali Hill', 'I.C. Colony', 'Gorai', 'Dahisa',
       'Aarey Milk Colony', 'Bangur Nagar', 'Jogeshwari West', 'Juhu',
       'Charkop', 'Poisar', 'Mahavir Nagar', 'Thakur village',
       'Pali Naka', 'Khar Danda', 'Dindoshi', 'Sunder Nagar', 'Kalina',
       'Naigaon', 'Nalasopara', 'Virar', 'Irla', 'Vile Parle', 'Bhandup',
       'Amrut Nagar', 'Asalfa', 'Pant Nagar', 'Kanjurmarg', 'Nehru Nagar',
       'Nahur', 'Chandivali', 'Hiranandani Gardens',
       'Indian Institute of Technology Bombay\xa0campus', 'Vidyavihar',
       'Vikhroli', 'Chembur', 'Deonar', 'Mankhurd', 'Mahul', 'Agripada',
       'Altamount Road', 'Bhuleshwar', 'Breach Candy', 'Carmichael Road',
       'Cavel', 'Churchgate', 'Cotton Green', 'Cuffe Parade',
       'Cumbala Hill', 'Currey Road', 'Dh

In [68]:
print(len(df_venues[df_venues['VenueCategory']=='Italian Restaurant']))

141


In [69]:
#Entire list of all Italian restaurants
df_venues[df_venues['VenueCategory']=='Italian Restaurant']

Unnamed: 0,Area,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
51,Amboli,19.129300,72.843400,Apicius,19.136980,72.833115,Italian Restaurant
56,Amboli,19.129300,72.843400,Indigo Deli,19.136430,72.827553,Italian Restaurant
111,"Chakala, Andheri",19.111388,72.860833,Le Cirque Signature,19.109986,72.873555,Italian Restaurant
116,"Chakala, Andheri",19.111388,72.860833,Stax,19.103659,72.871614,Italian Restaurant
141,"Chakala, Andheri",19.111388,72.860833,Sun Moon & Potatoes,19.124097,72.869883,Italian Restaurant
156,"Chakala, Andheri",19.111388,72.860833,Romano's,19.103115,72.877408,Italian Restaurant
248,D.N. Nagar,19.124085,72.831373,Indigo Deli,19.136430,72.827553,Italian Restaurant
250,D.N. Nagar,19.124085,72.831373,Olio,19.108844,72.823929,Italian Restaurant
267,D.N. Nagar,19.124085,72.831373,Levo,19.133069,72.823040,Italian Restaurant
288,D.N. Nagar,19.124085,72.831373,Apicius,19.136980,72.833115,Italian Restaurant


In [70]:
#Store this list in a dataframe
df_mumItalian = df_venues[df_venues['VenueCategory']=='Italian Restaurant']

In [71]:
df_mumItalian.head()

Unnamed: 0,Area,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
51,Amboli,19.1293,72.8434,Apicius,19.13698,72.833115,Italian Restaurant
56,Amboli,19.1293,72.8434,Indigo Deli,19.13643,72.827553,Italian Restaurant
111,"Chakala, Andheri",19.111388,72.860833,Le Cirque Signature,19.109986,72.873555,Italian Restaurant
116,"Chakala, Andheri",19.111388,72.860833,Stax,19.103659,72.871614,Italian Restaurant
141,"Chakala, Andheri",19.111388,72.860833,Sun Moon & Potatoes,19.124097,72.869883,Italian Restaurant


Visualise the distribution of the Restaurants

In [73]:
#Visualising the distribution of Italian restaurants in Mumbai
#Making the map
map_mumItalian = folium.Map(location=[latitude, longitude], zoom_start=11)

# adding markers to map
for lat, lng, area in zip(df_mumItalian['Latitude'], df_mumItalian['Longitude'], df_mumItalian['Area']):
    label = '{}'.format(area)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#FA8072',
        fill_opacity=0.7).add_to(map_mumItalian)  
    
map_mumItalian

In [74]:
# one hot encoding
mum_onehot = pd.get_dummies(df_venues[['VenueCategory']], prefix="", prefix_sep="")
mum_onehot['Area'] = df_venues['Area'] 


# move neighborhood column to the first column
fixed = [mum_onehot.columns[-1]] + list(mum_onehot.columns[:-1])
mum_onehot = mum_onehot[fixed]


mum_onehot.head(10)

Unnamed: 0,Area,Afghan Restaurant,Airport,Airport Lounge,Airport Service,American Restaurant,Antique Shop,Arcade,Art Gallery,Arts & Crafts Store,...,Track,Track Stadium,Train Station,Vegetarian / Vegan Restaurant,Water Park,Whisky Bar,Wine Bar,Women's Store,Yoga Studio,Zoo
0,Amboli,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Amboli,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Amboli,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Amboli,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Amboli,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,Amboli,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,Amboli,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,Amboli,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,Amboli,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,Amboli,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [75]:
mum_group = mum_onehot.groupby(["Area"]).mean().reset_index()
mum_group.shape

(92, 222)

In [76]:
mum_group = mum_group[["Area","Italian Restaurant"]]
mum_Ita=mum_group[mum_group['Italian Restaurant']>0]
mum_Ita.head()

Unnamed: 0,Area,Italian Restaurant
1,Agripada,0.03
2,Altamount Road,0.02
3,Amboli,0.022222
6,Ballard Estate,0.010753
7,Bandstand Promenade,0.012987


k-means Clustering

In [77]:
# set number of clusters
k = 7
mumIta_clusters = mum_Ita.copy()


mumIta_kmeans = mum_Ita.drop(['Area'],1)

# run k-means clustering
kmeans = KMeans(n_clusters=k, random_state=0).fit(mumIta_kmeans)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([4, 1, 1, 3, 3, 1, 1, 4, 4, 1], dtype=int32)

In [78]:
mumIta_clusters.head()

Unnamed: 0,Area,Italian Restaurant
1,Agripada,0.03
2,Altamount Road,0.02
3,Amboli,0.022222
6,Ballard Estate,0.010753
7,Bandstand Promenade,0.012987


In [79]:
#Inserting the cluster labels
mumIta_clusters['Cluster'] = kmeans.labels_
mumIta_clusters.head(10)

Unnamed: 0,Area,Italian Restaurant,Cluster
1,Agripada,0.03,4
2,Altamount Road,0.02,1
3,Amboli,0.022222,1
6,Ballard Estate,0.010753,3
7,Bandstand Promenade,0.012987,3
8,Bangur Nagar,0.02,1
11,Bhuleshwar,0.02,1
12,Breach Candy,0.03,4
14,Carmichael Road,0.03,4
15,Cavel,0.02,1


In [80]:
#Merge data from this dataframe and the previous dataframe to include the geospatial information
df_mumIta_clusters = mumIta_clusters.join(df_mum.set_index("Area"), on="Area")
df_mumIta_clusters.shape

(63, 6)

In [81]:
df_mumIta_clusters.head(10)

Unnamed: 0,Area,Italian Restaurant,Cluster,Location,Latitude,Longitude
1,Agripada,0.03,4,South Mumbai,18.9777,72.8273
2,Altamount Road,0.02,1,South Mumbai,18.9681,72.8095
3,Amboli,0.022222,1,"Andheri,Western Suburbs",19.1293,72.8434
6,Ballard Estate,0.010753,3,"Fort,South Mumbai",18.95,72.84
7,Bandstand Promenade,0.012987,3,"Bandra,Western Suburbs",19.042718,72.819132
8,Bangur Nagar,0.02,1,"Goregaon,Western Suburbs",19.167362,72.832252
11,Bhuleshwar,0.02,1,South Mumbai,18.95,72.83
12,Breach Candy,0.03,4,South Mumbai,18.967,72.805
14,Carmichael Road,0.03,4,South Mumbai,18.9722,72.8113
15,Cavel,0.02,1,South Mumbai,18.9474,72.8272


In [82]:
df_mumIta_clusters.sort_values(["Cluster"], inplace=True)

In [83]:
df_mumIta_clusters

Unnamed: 0,Area,Italian Restaurant,Cluster,Location,Latitude,Longitude
91,Worli,0.040000,0,South Mumbai,19.000000,72.815000
27,D.N. Nagar,0.040000,0,"Andheri,Western Suburbs",19.124085,72.831373
28,Dagdi Chawl,0.040000,0,"Byculla,South Mumbai",18.977129,72.829131
62,Marol,0.040000,0,"Andheri,Western Suburbs",19.119219,72.882743
16,"Chakala, Andheri",0.040000,0,Western Suburbs,19.111388,72.860833
78,Sahar,0.040000,0,"Andheri,Western Suburbs",19.098889,72.867222
79,Seven Bungalows,0.040000,0,"Andheri,Western Suburbs",19.129052,72.817018
76,Poisar,0.038961,0,"Kandivali West,Western Suburbs",19.204511,72.837639
74,Pant Nagar,0.016667,1,"Ghatkopar,Eastern Suburbs",19.080000,72.910000
72,Pali Hill,0.020000,1,"Bandra,Western Suburbs",19.068000,72.826000


Visualising the different clusters

In [84]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map for Italian restaurants
map_mumItalianRes = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(k)
y = [i+x+(i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(y)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df_mumIta_clusters['Latitude'], df_mumIta_clusters['Longitude'], df_mumIta_clusters['Area'], df_mumIta_clusters['Cluster']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=10,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_mumItalianRes)
       
map_mumItalianRes

In [85]:
#Storing different clusters as an array
res_list = np.empty(k, dtype=object)
for i in range(k):
    res_list[i]=df_mumIta_clusters.loc[df_mumIta_clusters['Cluster']==i]

In [87]:
res_list[0]

Unnamed: 0,Area,Italian Restaurant,Cluster,Location,Latitude,Longitude
91,Worli,0.04,0,South Mumbai,19.0,72.815
27,D.N. Nagar,0.04,0,"Andheri,Western Suburbs",19.124085,72.831373
28,Dagdi Chawl,0.04,0,"Byculla,South Mumbai",18.977129,72.829131
62,Marol,0.04,0,"Andheri,Western Suburbs",19.119219,72.882743
16,"Chakala, Andheri",0.04,0,Western Suburbs,19.111388,72.860833
78,Sahar,0.04,0,"Andheri,Western Suburbs",19.098889,72.867222
79,Seven Bungalows,0.04,0,"Andheri,Western Suburbs",19.129052,72.817018
76,Poisar,0.038961,0,"Kandivali West,Western Suburbs",19.204511,72.837639


In [88]:
res_list[1]

Unnamed: 0,Area,Italian Restaurant,Cluster,Location,Latitude,Longitude
74,Pant Nagar,0.016667,1,"Ghatkopar,Eastern Suburbs",19.08,72.91
72,Pali Hill,0.02,1,"Bandra,Western Suburbs",19.068,72.826
69,Nariman Point,0.02,1,South Mumbai,18.926,72.823
65,Mumbai Central,0.02,1,South Mumbai,18.9697,72.8194
77,Prabhadevi,0.02,1,South Mumbai,19.0166,72.8295
61,Marine Lines,0.02,1,South Mumbai,18.9447,72.8244
60,Marine Drive,0.02,1,South Mumbai,18.944,72.823
58,Malabar Hill,0.022727,1,South Mumbai,18.95,72.795
49,Kemps Corner,0.02,1,South Mumbai,18.9629,72.8054
40,Hiranandani Gardens,0.024096,1,"Powai,Eastern Suburbs",19.118986,72.911767


In [89]:
res_list[2]

Unnamed: 0,Area,Italian Restaurant,Cluster,Location,Latitude,Longitude
70,Navy Nagar,0.1,2,"Colaba,South Mumbai",18.9012,72.8101


In [90]:
res_list[3]

Unnamed: 0,Area,Italian Restaurant,Cluster,Location,Latitude,Longitude
45,Juhu,0.01,3,Western Suburbs,19.1,72.83
43,Irla,0.01,3,"Vile Parle,Western Suburbs",19.108056,72.838056
6,Ballard Estate,0.010753,3,"Fort,South Mumbai",18.95,72.84
7,Bandstand Promenade,0.012987,3,"Bandra,Western Suburbs",19.042718,72.819132
55,Mahavir Nagar,0.012821,3,"Kandivali West,Western Suburbs",19.211319,72.842737
56,Mahim,0.01,3,South Mumbai,19.035,72.84
41,I.C. Colony,0.0125,3,"Borivali (West),Western Suburbs",19.247039,72.84983
86,Vidyavihar,0.01,3,Eastern Suburbs,19.08,72.896
63,Matunga,0.01,3,South Mumbai,19.01798,72.844763
73,Pali Naka,0.01,3,"Khar,Western Suburbs",19.062742,72.829396


In [91]:
res_list[4]

Unnamed: 0,Area,Italian Restaurant,Cluster,Location,Latitude,Longitude
81,Sunder Nagar,0.03,4,"Malad,Western Suburbs",19.175,72.842
75,Parel,0.03,4,South Mumbai,18.99,72.84
83,Thane,0.03125,4,Mumbai,19.2,72.97
85,Versova,0.03,4,"Andheri,Western Suburbs",19.12,72.82
33,Dhobitalao,0.03,4,South Mumbai,18.9433,72.8286
14,Carmichael Road,0.03,4,South Mumbai,18.9722,72.8113
17,Chandivali,0.03,4,"Powai,Eastern Suburbs",19.11,72.9
54,Mahalaxmi,0.026316,4,South Mumbai,18.983333,72.8
53,Lower Parel,0.03,4,South Mumbai,18.995278,72.83
52,Lokhandwala,0.03,4,"Andheri,Western Suburbs",19.130815,72.82927


In [92]:
res_list[5]

Unnamed: 0,Area,Italian Restaurant,Cluster,Location,Latitude,Longitude
24,Cuffe Parade,0.074074,5,South Mumbai,18.91,72.81


In [93]:
res_list[6]

Unnamed: 0,Area,Italian Restaurant,Cluster,Location,Latitude,Longitude
47,Kalina,0.048193,6,"Sanctacruz,Western Suburbs",19.081667,72.841389
42,Indian Institute of Technology Bombay campus,0.046154,6,"Powai,Eastern Suburbs",19.133636,72.915358
