**Let's import relevant libraries**

In [52]:
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium 
import json
import requests
from pandas.io.json import json_normalize
from sklearn.cluster import KMeans

print('Libraries imported.')

Libraries imported.


In [53]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

**I've prepared an excel file for the data and then used pandas library to read the data.**

**Website link: https://mumbai7.com/postal-codes-in-mumbai/**

In [54]:
df=pd.read_excel('Mumbai_Data.xlsx')
df.head()

Unnamed: 0,Neighborhood,PostalCode
0,August Kranti Marg,400036
1,Aarey Milk Colony,400065
2,Andheri (East),400069
3,Andheri (West),400058
4,Antop Hill,400037


In [55]:
df.describe(include='all')

Unnamed: 0,Neighborhood,PostalCode
count,106,106.0
unique,106,
top,Malabar Hill,
freq,1,
mean,,400122.830189
std,,200.502629
min,,400001.0
25%,,400027.25
50%,,400061.5
75%,,400088.75


In [56]:
df.shape

(106, 2)

In [57]:
df.isna().sum()

Neighborhood    0
PostalCode      0
dtype: int64

In [58]:
print('Unique entries in PostalCode columns are', df.PostalCode.nunique())
print('Unique entries in Neighborhood columns are',df.Neighborhood.nunique())

Unique entries in PostalCode columns are 106
Unique entries in Neighborhood columns are 106


**Let's now define a function to use geocoder and get us desired co-ordinates.**

In [59]:
import geocoder

In [60]:
def get_coordinates(PostalCode):
    coordinates = None
    while(coordinates is None):
        g = geocoder.arcgis('{}, Mumbai, Maharastra'.format(PostalCode))
        coordinates = g.latlng
    return coordinates
    
get_coordinates('400069')

[19.11929808000008, 72.85110000000003]

**Let's now use the above function to get co-ordinates for all the enries in our dataframe.**

In [61]:
postal_codes = df.PostalCode    
coords = [ get_coordinates(i) for i in postal_codes.tolist() ]

**Let's finally concatinate these new columns in our datframe**

In [62]:
df_latlng = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
df['Latitude'] = df_latlng['Latitude']
df['Longitude'] = df_latlng['Longitude']

In [63]:
df.head(10)

Unnamed: 0,Neighborhood,PostalCode,Latitude,Longitude
0,August Kranti Marg,400036,18.964005,72.807983
1,Aarey Milk Colony,400065,19.161085,72.884394
2,Andheri (East),400069,19.119298,72.8511
3,Andheri (West),400058,19.122935,72.84061
4,Antop Hill,400037,19.020313,72.86828
5,Anu Shakti Nagar,400094,19.033945,72.9252
6,B A R C,400085,19.016345,72.926988
7,Ballard Estate,400038,18.94017,72.83483
8,Bandra (East),400051,19.060715,72.854564
9,Bandra (West),400050,19.052259,72.829405


In [64]:
df.tail(10)

Unnamed: 0,Neighborhood,PostalCode,Latitude,Longitude
96,Ghansoli,400701,19.131355,73.005961
97,JNPT Town Ship,400707,18.94017,72.83483
98,Konkan Bhawan,400614,19.02344,73.040135
99,Krishi Utpanna Bazar,400705,19.074753,73.021375
100,Mumbra,400612,18.94017,72.83483
101,N A D Karanja,400704,18.94017,72.83483
102,Nerul Mode,400706,19.029846,73.02676
103,Turbhe,400613,18.94017,72.83483
104,Uran,400702,18.94017,72.83483
105,Vashi,400703,19.077505,72.993795


In [65]:
df.shape

(106, 4)

In [66]:
address = 'Mumbai, India'

geolocator = Nominatim(user_agent="mumbai_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Mumbai City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Mumbai City are 19.0759899, 72.8773928.


In [242]:
map_mumbai = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='black',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_mumbai)  
    
map_mumbai

Next, we are going to start utilizing the Foursquare API to explore the neighborhoods and segment them.


#### Define Foursquare Credentials and Version


In [83]:
CLIENT_ID = 'M3ULYLY1P5ZFJEZT13YE34HAQMNXKAOROWM3EXXCBSERZTCP' 
CLIENT_SECRET = '301CTKGQJ23NU3YIYYOYVE11JBQKGBECKQVO3ICX0B2RTE1W' 
VERSION = '20201201'
LIMIT = 10000 

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: M3ULYLY1P5ZFJEZT13YE34HAQMNXKAOROWM3EXXCBSERZTCP
CLIENT_SECRET:301CTKGQJ23NU3YIYYOYVE11JBQKGBECKQVO3ICX0B2RTE1W


#### Let's create a function to get near by venues for all neighborhoods of Mumbai


In [93]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng,
            radius, 
            LIMIT)
            
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue Name', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

#### Now write the code to run the above function on each neighborhood and create a new dataframe called _mumbai_venues_.


In [94]:
mumbai_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

August Kranti Marg
Aarey Milk Colony
Andheri (East)
Andheri (West)
Antop Hill
Anu Shakti Nagar
B A R C
Ballard Estate
Bandra (East)
Bandra (West)
Bangur Nagar
Barve Nagar
Bhandup
Bhandup (East)
Bhavani Shankar Road
Mumbai Central
Mumbai G P O
Borivli (East)
Borivli (West)
Borivli HO
Chakala MIDC
Chembur
Chinch Bunder
Colaba
Council Hall
Cumballa Hill
Dadar
Dahisar
Delisle Road
Dharavi
F C I Mumbai
Ghatkopar (West)
Girgaon
Goregaon (East)
Goregaon (West)
Grant Road
Hutatma Chowk
I I T Mumbai
J B Nagar
Jacob Circle
Jogeshwari (East)
Jogeshwari (West)
Juhu
Kalbadevi
Kandivli (East)
Kandivli (West)
Khar
Kharodi
Kurla
Mahim
Malabar Hill
Malad (East)
Malad (West)
Mandpeshwar
Mandvi
Mantralaya
Marine Lines
Matunga
Mazgaon
Motilal Nagar
Mulund (East)
Mulund (West)
Mulund Colony
N I T I E
Nariman Point
Nehru Nagar
Pant Nagar
Parel
Poonam Ngr Jogeshwari (E)
Prabhadevi
Rajawadi
Rajbhavan
SEEPZ
Sahar
Saki Naka
Santacruz (East)
Santacruz (West)
Santacruz P&T Colony
Sewri
Shivaji Nagar (Kurla)
Sion


#### Let's check the size of the resulting dataframe


In [95]:
print(mumbai_venues.shape)
mumbai_venues.head()

(3973, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue Name,Venue Latitude,Venue Longitude,Venue Category
0,August Kranti Marg,18.964005,72.807983,Doolally Taproom,18.963809,72.807695,Brewery
1,August Kranti Marg,18.964005,72.807983,symphony,18.963347,72.810251,Restaurant
2,August Kranti Marg,18.964005,72.807983,Crossword,18.963474,72.807773,Bookstore
3,August Kranti Marg,18.964005,72.807983,Swati Snacks,18.966442,72.813531,Indian Restaurant
4,August Kranti Marg,18.964005,72.807983,Francesco's Pizzeria,18.96478,72.80463,Pizza Place


Let's check how many venues were returned for each neighborhood


In [96]:
mumbai_venues.groupby('Neighborhood').count().sort_values(by='Venue Name', ascending=False)

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue Name,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Hutatma Chowk,100,100,100,100,100,100
Khar,100,100,100,100,100,100
Delisle Road,100,100,100,100,100,100
Mantralaya,100,100,100,100,100,100
Marine Lines,100,100,100,100,100,100
Nariman Point,100,100,100,100,100,100
Bandra (West),95,95,95,95,95,95
Grant Road,80,80,80,80,80,80
Juhu,80,80,80,80,80,80
Malad (West),80,80,80,80,80,80


#### Let's find out how many unique categories can be curated from all the returned venues


In [97]:
print('There are {} unique categories.'.format(len(mumbai_venues['Venue Category'].unique())))

There are 238 unique categories.


In [110]:
mumbai_venues['Venue Category'].value_counts()

Indian Restaurant                           573
Café                                        232
Coffee Shop                                 164
Fast Food Restaurant                        158
Chinese Restaurant                          129
Bakery                                      127
Bar                                         113
Ice Cream Shop                               95
Pizza Place                                  92
Restaurant                                   91
Hotel                                        91
Seafood Restaurant                           74
Dessert Shop                                 70
Lounge                                       65
Train Station                                57
Italian Restaurant                           57
Sandwich Place                               53
Vegetarian / Vegan Restaurant                52
Snack Place                                  52
Clothing Store                               51
Multiplex                               

In [192]:
mumbai_venues['Venue Category'].unique()

array(['Brewery', 'Restaurant', 'Bookstore', 'Indian Restaurant',
       'Pizza Place', 'Bakery', 'History Museum', 'Fast Food Restaurant',
       'Hotel', 'Bar', 'Donut Shop', 'Sandwich Place',
       'Salon / Barbershop', 'Japanese Restaurant', 'Coffee Shop',
       'Snack Place', 'Italian Restaurant', 'Dessert Shop', "Men's Store",
       'Gastropub', 'Other Great Outdoors', 'Theater', 'Café', 'Stadium',
       'Breakfast Spot', 'Gym / Fitness Center', 'Department Store',
       'Concert Hall', 'Park', 'Chinese Restaurant', 'Deli / Bodega',
       'Vegetarian / Vegan Restaurant', 'Clothing Store',
       'Food & Drink Shop', 'Bus Station', 'Smoke Shop', 'Salad Place',
       'Soccer Field', 'Yoga Studio', 'Event Space', 'Farm', 'Resort',
       'Golf Course', 'Camera Store', 'Shopping Mall', 'Food Court',
       'Electronics Store', 'Platform', 'Ice Cream Shop',
       'Seafood Restaurant', 'Fish Market', 'Falafel Restaurant', 'Pub',
       'Athletics & Sports', 'Burger Joint', 'Bow

Now we will filter this dataframe using 2 generalize categories of venues.
    1. Outing and recreational places
    2. Food venues

Then we will see how clustering differs as the parameters we take in consideration differ.
We will also check the effect of imbalanced frequency distribution of venue categories on clustering.

## 1. Fun Category


In [193]:
fun_categories=['Arts & Entertainment','Amphitheater','Aquarium','Amphitheater','Aquarium','Arcade','Art Gallery',
                'Bowling Alley','Casino','Circus','Comedy Club','Concert Hall','Country Dance Club','Disc Golf',
                'Exhibit','General Entertainment','Go Kart Track','Historic Site','Karaoke Box','Laser Tag',
                'Memorial Site','Mini Golf','Movie Theater','Drive-in Theater','Indie Movie Theater','Multiplex',
                'Museum','Art Museum','Erotic Museum','History Museum','Planetarium','Science Museum','Music Venue',
                'Jazz Club','Piano Bar','Rock Club','Pachinko Parlor','Performing Arts Venue','Dance Studio','Indie Theater',
                'Opera House','Theater','Pool Hall','Public Art','Outdoor Sculpture','Street Art','Racecourse','Racetrack',
                'Roller Rink','Salsa Club','Samba School','Stadium','Baseball Stadium','Basketball Stadium','Cricket Ground',
                'Football Stadium','Hockey Arena','Rugby Stadium','Soccer Stadium','Tennis Stadium','Track Stadium',
                'Theme Park','Theme Park Ride / Attraction','Tour Provider','Water Park','Zoo','Zoo Exhibit']

Above are the categories I've snatched out from total 238 unique categories in resultant dataframe of FourSquare request. These categories are what I think should be put under this section.

In [194]:
mumbai_fun_places=mumbai_venues[mumbai_venues['Venue Category'].isin (fun_categories)]
print(mumbai_fun_places.shape)
mumbai_fun_places.head(40)

(220, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue Name,Venue Latitude,Venue Longitude,Venue Category
7,August Kranti Marg,18.964005,72.807983,Mani Bhavan Gandhi Sangrahalaya (Gandhi Museum),18.959689,72.811475,History Museum
27,August Kranti Marg,18.964005,72.807983,Sophia Bhabha Hall,18.969819,72.807082,Theater
31,August Kranti Marg,18.964005,72.807983,Sports Complex @ NSCI,18.972295,72.806262,Stadium
37,August Kranti Marg,18.964005,72.807983,Tejpal Hall,18.962769,72.809078,Concert Hall
52,August Kranti Marg,18.964005,72.807983,Bharatiya Vidya Bhavan,18.957283,72.810485,Theater
128,Andheri (West),19.122935,72.84061,Andheri sports complex,19.129168,72.835627,Bowling Alley
143,Antop Hill,19.020313,72.86828,Durgha,19.028652,72.866355,Historic Site
160,Ballard Estate,18.94017,72.83483,Wankhede Stadium,18.938792,72.825944,Cricket Ground
162,Ballard Estate,18.94017,72.83483,Sterling Cineplex,18.938296,72.833104,Multiplex
174,Ballard Estate,18.94017,72.83483,Sachin Tendulkar Stand,18.939601,72.825633,Cricket Ground


In [195]:
print('There are {} unique categories in fun places.'.format(len(mumbai_fun_places['Venue Category'].unique())))

There are 28 unique categories in fun places.


In [196]:
mumbai_fun_places.groupby('Neighborhood').count().sort_values(by='Venue Name', ascending=False)

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue Name,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Nariman Point,11,11,11,11,11,11
Marine Lines,10,10,10,10,10,10
Worli,8,8,8,8,8,8
Council Hall,7,7,7,7,7,7
N A D Karanja,7,7,7,7,7,7
Mumbra,7,7,7,7,7,7
SEEPZ,7,7,7,7,7,7
Mantralaya,7,7,7,7,7,7
JNPT Town Ship,7,7,7,7,7,7
Turbhe,7,7,7,7,7,7


In [197]:
mumbai_fun_places_onehot = pd.get_dummies(mumbai_fun_places[['Venue Category']], prefix="", prefix_sep="")

mumbai_fun_places_onehot['Neighborhood'] = mumbai_fun_places['Neighborhood'] 

fixed_columns = [mumbai_fun_places_onehot.columns[-1]] + list(mumbai_fun_places_onehot.columns[:-1])
mumbai_fun_places_onehot = mumbai_fun_places_onehot[fixed_columns]

mumbai_fun_places_onehot.head()

Unnamed: 0,Neighborhood,Aquarium,Arcade,Art Gallery,Bowling Alley,Comedy Club,Concert Hall,Cricket Ground,Dance Studio,General Entertainment,Historic Site,History Museum,Hockey Arena,Indie Movie Theater,Movie Theater,Multiplex,Music Venue,Opera House,Performing Arts Venue,Planetarium,Racetrack,Soccer Stadium,Stadium,Theater,Theme Park,Theme Park Ride / Attraction,Track Stadium,Water Park,Zoo
7,August Kranti Marg,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
27,August Kranti Marg,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
31,August Kranti Marg,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
37,August Kranti Marg,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
52,August Kranti Marg,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0


And let's examine the new dataframe size.


In [198]:
mumbai_fun_places_onehot.shape

(220, 29)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category


In [199]:
mumbai_fun_places_grouped = mumbai_fun_places_onehot.groupby('Neighborhood').mean().reset_index()
mumbai_fun_places_grouped

Unnamed: 0,Neighborhood,Aquarium,Arcade,Art Gallery,Bowling Alley,Comedy Club,Concert Hall,Cricket Ground,Dance Studio,General Entertainment,Historic Site,History Museum,Hockey Arena,Indie Movie Theater,Movie Theater,Multiplex,Music Venue,Opera House,Performing Arts Venue,Planetarium,Racetrack,Soccer Stadium,Stadium,Theater,Theme Park,Theme Park Ride / Attraction,Track Stadium,Water Park,Zoo
0,Andheri (West),0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Antop Hill,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,August Kranti Marg,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.4,0.0,0.0,0.0,0.0,0.0
3,Ballard Estate,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.142857,0.142857,0.142857,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Bandra (East),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Bandra (West),0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0
6,Belapur,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.142857,0.142857,0.142857,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Bhandup (East),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Bhavani Shankar Road,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0
9,Borivli (East),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Let's confirm the new size


In [200]:
mumbai_fun_places_grouped.shape

(65, 29)

The above number shows that out of 106 different Neighborhoods in Mumbai, only 65 have returned some venues for this category.

#### Let's print each neighborhood along with the top 5 most common venues


In [201]:
num_top_venues = 5

for hood in mumbai_fun_places_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = mumbai_fun_places_grouped[mumbai_fun_places_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Andheri (West)----
           venue  freq
0  Bowling Alley   1.0
1       Aquarium   0.0
2    Music Venue   0.0
3     Water Park   0.0
4  Track Stadium   0.0


----Antop Hill----
           venue  freq
0  Historic Site   1.0
1       Aquarium   0.0
2    Music Venue   0.0
3     Water Park   0.0
4  Track Stadium   0.0


----August Kranti Marg----
            venue  freq
0         Theater   0.4
1    Concert Hall   0.2
2         Stadium   0.2
3  History Museum   0.2
4        Aquarium   0.0


----Ballard Estate----
                 venue  freq
0            Multiplex  0.29
1       Cricket Ground  0.29
2       History Museum  0.14
3         Hockey Arena  0.14
4  Indie Movie Theater  0.14


----Bandra (East)----
                 venue  freq
0  Indie Movie Theater   1.0
1             Aquarium   0.0
2          Music Venue   0.0
3           Water Park   0.0
4        Track Stadium   0.0


----Bandra (West)----
                   venue  freq
0                Theater  0.25
1  Performing Arts Venue

            venue  freq
0         Theater  0.27
1  Cricket Ground  0.18
2   Movie Theater  0.18
3     Art Gallery  0.09
4         Stadium  0.09


----Nerul Mode----
           venue  freq
0    Music Venue   1.0
1       Aquarium   0.0
2         Arcade   0.0
3     Water Park   0.0
4  Track Stadium   0.0


----Poonam Ngr Jogeshwari (E)----
                 venue  freq
0            Multiplex  0.29
1       Cricket Ground  0.29
2       History Museum  0.14
3         Hockey Arena  0.14
4  Indie Movie Theater  0.14


----Prabhadevi----
           venue  freq
0        Theater   0.5
1  Movie Theater   0.5
2       Aquarium   0.0
3    Music Venue   0.0
4     Water Park   0.0


----Rajawadi----
                          venue  freq
0                     Multiplex   1.0
1                        Arcade   0.0
2                    Water Park   0.0
3                 Track Stadium   0.0
4  Theme Park Ride / Attraction   0.0


----SEEPZ----
                 venue  freq
0            Multiplex  0.29
1      

#### Let's put that into a _pandas_ dataframe


First, let's write a function to sort the venues in descending order.


In [202]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.


In [203]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))


neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = mumbai_fun_places_grouped['Neighborhood']

for ind in np.arange(mumbai_fun_places_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(mumbai_fun_places_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Andheri (West),Bowling Alley,Zoo,Water Park,Arcade,Art Gallery,Comedy Club,Concert Hall,Cricket Ground,Dance Studio,General Entertainment
1,Antop Hill,Historic Site,Zoo,Water Park,Arcade,Art Gallery,Bowling Alley,Comedy Club,Concert Hall,Cricket Ground,Dance Studio
2,August Kranti Marg,Theater,Stadium,Concert Hall,History Museum,Zoo,Indie Movie Theater,Arcade,Art Gallery,Bowling Alley,Comedy Club
3,Ballard Estate,Cricket Ground,Multiplex,Indie Movie Theater,History Museum,Hockey Arena,Zoo,Arcade,Art Gallery,Bowling Alley,Comedy Club
4,Bandra (East),Indie Movie Theater,Zoo,Water Park,Arcade,Art Gallery,Bowling Alley,Comedy Club,Concert Hall,Cricket Ground,Dance Studio


<a id='item4'></a>


### Cluster Neighborhoods


Run _k_-means to cluster the neighborhood into 10 clusters.


In [204]:

kclusters = 10

mumbai_fun_places_grouped_clustering = mumbai_fun_places_grouped.drop('Neighborhood', 1)


kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(mumbai_fun_places_grouped_clustering)

kmeans.labels_[0:10] 

array([8, 5, 6, 4, 7, 2, 4, 0, 0, 5])

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.


In [205]:

neighborhoods_venues_sorted.insert(0, 'Cluster-Labels', kmeans.labels_)

mumbai_fun_places_merged = df

mumbai_fun_places_merged = mumbai_fun_places_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

mumbai_fun_places_merged.head() 

Unnamed: 0,Neighborhood,PostalCode,Latitude,Longitude,Cluster-Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,August Kranti Marg,400036,18.964005,72.807983,6.0,Theater,Stadium,Concert Hall,History Museum,Zoo,Indie Movie Theater,Arcade,Art Gallery,Bowling Alley,Comedy Club
1,Aarey Milk Colony,400065,19.161085,72.884394,,,,,,,,,,,
2,Andheri (East),400069,19.119298,72.8511,,,,,,,,,,,
3,Andheri (West),400058,19.122935,72.84061,8.0,Bowling Alley,Zoo,Water Park,Arcade,Art Gallery,Comedy Club,Concert Hall,Cricket Ground,Dance Studio,General Entertainment
4,Antop Hill,400037,19.020313,72.86828,5.0,Historic Site,Zoo,Water Park,Arcade,Art Gallery,Bowling Alley,Comedy Club,Concert Hall,Cricket Ground,Dance Studio


We can see below that 41 Neighborhood which had 0 venues in this category has a null value in Cluster column.
Let's replace null with a value to treat them as a cluster later on.

In [206]:
mumbai_fun_places_merged.isna().sum()

Neighborhood               0
PostalCode                 0
Latitude                   0
Longitude                  0
Cluster-Labels            41
1st Most Common Venue     41
2nd Most Common Venue     41
3rd Most Common Venue     41
4th Most Common Venue     41
5th Most Common Venue     41
6th Most Common Venue     41
7th Most Common Venue     41
8th Most Common Venue     41
9th Most Common Venue     41
10th Most Common Venue    41
dtype: int64

In [210]:
mumbai_fun_places_merged['Cluster-Labels'].replace(np.NaN, 11.0, inplace=True)

In [243]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters+1)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(mumbai_fun_places_merged['Latitude'], mumbai_fun_places_merged['Longitude'], mumbai_fun_places_merged['Neighborhood'], mumbai_fun_places_merged['Cluster-Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=6,
        popup=label,
        color='black',
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<a id='item5'></a>


### Examine Clusters


Let's first see Neighborhood counts for each Clusters.


In [215]:
mumbai_fun_places_merged['Cluster-Labels'].value_counts()

11.0    41
4.0     16
6.0     14
0.0      9
1.0      7
9.0      5
2.0      4
3.0      3
7.0      3
5.0      3
8.0      1
Name: Cluster-Labels, dtype: int64

We can see that cluster 11 has 41 Neighborhhods. Let's ignore this as of now. 
Cluster 4 and 6 are having larger counts.
Let's also focus on Category frequency ditribution.

In [217]:
mumbai_fun_places['Venue Category'].value_counts()

Multiplex                       46
Cricket Ground                  29
Movie Theater                   22
History Museum                  19
Indie Movie Theater             17
Theater                         17
Hockey Arena                    11
Arcade                           9
Stadium                          7
Performing Arts Venue            6
Music Venue                      5
Art Gallery                      5
Dance Studio                     5
Historic Site                    3
Comedy Club                      3
Opera House                      2
Aquarium                         2
General Entertainment            2
Concert Hall                     1
Track Stadium                    1
Soccer Stadium                   1
Water Park                       1
Planetarium                      1
Theme Park                       1
Racetrack                        1
Zoo                              1
Theme Park Ride / Attraction     1
Bowling Alley                    1
Name: Venue Category

In [219]:
mumbai_fun_places_merged.loc[mumbai_fun_places_merged['Cluster-Labels'] == 4]

Unnamed: 0,Neighborhood,PostalCode,Latitude,Longitude,Cluster-Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Ballard Estate,400038,18.94017,72.83483,4.0,Cricket Ground,Multiplex,Indie Movie Theater,History Museum,Hockey Arena,Zoo,Arcade,Art Gallery,Bowling Alley,Comedy Club
16,Mumbai G P O,400001,18.939031,72.837345,4.0,History Museum,Multiplex,Zoo,Indie Movie Theater,Arcade,Art Gallery,Bowling Alley,Comedy Club,Concert Hall,Cricket Ground
21,Chembur,400071,19.056035,72.89704,4.0,General Entertainment,Performing Arts Venue,Multiplex,Zoo,Indie Movie Theater,Arcade,Art Gallery,Bowling Alley,Comedy Club,Concert Hall
24,Council Hall,400039,18.94017,72.83483,4.0,Cricket Ground,Multiplex,Indie Movie Theater,History Museum,Hockey Arena,Zoo,Arcade,Art Gallery,Bowling Alley,Comedy Club
39,Jacob Circle,400011,18.983709,72.826845,4.0,Racetrack,History Museum,Multiplex,Zoo,Indie Movie Theater,Arcade,Art Gallery,Bowling Alley,Comedy Club,Concert Hall
43,Kalbadevi,400002,18.947905,72.826741,4.0,Multiplex,Aquarium,Indie Movie Theater,Cricket Ground,Arcade,Art Gallery,Bowling Alley,Comedy Club,Concert Hall,Dance Studio
56,Marine Lines,400020,18.935266,72.825745,4.0,Cricket Ground,Movie Theater,Multiplex,General Entertainment,Hockey Arena,Indie Movie Theater,Arcade,Art Gallery,Bowling Alley,Comedy Club
68,Poonam Ngr Jogeshwari (E),400041,18.94017,72.83483,4.0,Cricket Ground,Multiplex,Indie Movie Theater,History Museum,Hockey Arena,Zoo,Arcade,Art Gallery,Bowling Alley,Comedy Club
72,SEEPZ,400096,18.94017,72.83483,4.0,Cricket Ground,Multiplex,Indie Movie Theater,History Museum,Hockey Arena,Zoo,Arcade,Art Gallery,Bowling Alley,Comedy Club
86,Veer Jijamata Bhosle Udyan,400027,18.979079,72.83474,4.0,Zoo,History Museum,Multiplex,Indie Movie Theater,Arcade,Art Gallery,Bowling Alley,Comedy Club,Concert Hall,Cricket Ground


In [218]:
mumbai_fun_places_merged.loc[mumbai_fun_places_merged['Cluster-Labels'] == 6]

Unnamed: 0,Neighborhood,PostalCode,Latitude,Longitude,Cluster-Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,August Kranti Marg,400036,18.964005,72.807983,6.0,Theater,Stadium,Concert Hall,History Museum,Zoo,Indie Movie Theater,Arcade,Art Gallery,Bowling Alley,Comedy Club
19,Borivli HO,400091,19.237596,72.803,6.0,Theme Park Ride / Attraction,Theme Park,Water Park,Zoo,Hockey Arena,Arcade,Art Gallery,Bowling Alley,Comedy Club,Concert Hall
25,Cumballa Hill,400026,18.971712,72.807475,6.0,Theater,Stadium,Zoo,Indie Movie Theater,Arcade,Art Gallery,Bowling Alley,Comedy Club,Concert Hall,Cricket Ground
35,Grant Road,400007,18.960945,72.818459,6.0,Indie Movie Theater,Theater,Opera House,History Museum,Zoo,Arcade,Art Gallery,Bowling Alley,Comedy Club,Concert Hall
36,Hutatma Chowk,400023,18.926536,72.832575,6.0,History Museum,Movie Theater,Art Gallery,Stadium,Indie Movie Theater,Arcade,Bowling Alley,Comedy Club,Concert Hall,Cricket Ground
42,Juhu,400049,19.113245,72.829384,6.0,Theater,Multiplex,Movie Theater,Indie Movie Theater,Arcade,Art Gallery,Bowling Alley,Comedy Club,Concert Hall,Cricket Ground
55,Mantralaya,400032,18.929257,72.83018,6.0,Movie Theater,Cricket Ground,Art Gallery,Stadium,History Museum,Indie Movie Theater,Arcade,Bowling Alley,Comedy Club,Concert Hall
61,Mulund (West),400080,19.175222,72.951825,6.0,Theater,Multiplex,Zoo,Indie Movie Theater,Arcade,Art Gallery,Bowling Alley,Comedy Club,Concert Hall,Cricket Ground
64,Nariman Point,400021,18.92535,72.824172,6.0,Theater,Movie Theater,Cricket Ground,Art Gallery,Stadium,Performing Arts Venue,History Museum,Hockey Arena,Arcade,Bowling Alley
69,Prabhadevi,400025,19.013685,72.827108,6.0,Movie Theater,Theater,Indie Movie Theater,Arcade,Art Gallery,Bowling Alley,Comedy Club,Concert Hall,Cricket Ground,Dance Studio


We can se that 'Multiplex', 'Cricket Ground' and 'Movie Theater' has the highest frequency and as it can be seen they are everywhere in Cluster 4 and 6 which are Clusters with maximum Neighborhoods.

Let's reattempt the same process for Food venues.

## 2. Food Venues

In [220]:
food_categories=['Brewery', 'Restaurant', 'Indian Restaurant','Pizza Place', 'Bakery', 'Fast Food Restaurant',
       'Hotel', 'Bar', 'Donut Shop', 'Sandwich Place', 'Japanese Restaurant', 'Coffee Shop',
       'Snack Place', 'Italian Restaurant', 'Dessert Shop', 'Café',
       'Breakfast Spot', 'Chinese Restaurant','Vegetarian / Vegan Restaurant', 'Food & Drink Shop', 'Salad Place', 'Food Court', 'Ice Cream Shop',
       'Seafood Restaurant', 'Falafel Restaurant', 'Pub','Burger Joint','Hotel Bar', 'Diner','Parsi Restaurant','Irani Cafe',
       'Middle Eastern Restaurant','Mughlai Restaurant','American Restaurant','BBQ Joint', 'Tea Room',
        'Dim Sum Restaurant', 'Thai Restaurant','Brazilian Restaurant', 'Noodle House',
        'South American Restaurant', 'French Restaurant','German Restaurant', 'Gourmet Shop',
       'Asian Restaurant','Gluten-free Restaurant',
       'Mediterranean Restaurant','Beer Garden','Fried Chicken Joint', 'Frozen Yogurt Shop',
        'Maharashtrian Restaurant','Buffet','Food Truck',
       'Cupcake Shop', 'Juice Bar','Malay Restaurant','Punjabi Restaurant','Food', 'Bengali Restaurant',
       'Cafeteria', 'Molecular Gastronomy Restaurant', 'Nightclub','Hookah Bar',
       'Hot Dog Joint', 'Mexican Restaurant','Multicuisine Indian Restaurant',
       'New American Restaurant', 'Cocktail Bar','Chaat Place','North Indian Restaurant', 'Sushi Restaurant',
       'Tex-Mex Restaurant', 'Dhaba', 'Bistro','Modern European Restaurant','Comfort Food Restaurant', 'Beer Bar', 'Greek Restaurant',
       'Burrito Place', 'Goan Restaurant','Dumpling Restaurant','South Indian Restaurant', 'Soup Place',
       'Moroccan Restaurant', 'Mountain', 'Halal Restaurant','Indian Chinese Restaurant']

In [221]:
mumbai_venues['Venue Category'].unique()

array(['Brewery', 'Restaurant', 'Bookstore', 'Indian Restaurant',
       'Pizza Place', 'Bakery', 'History Museum', 'Fast Food Restaurant',
       'Hotel', 'Bar', 'Donut Shop', 'Sandwich Place',
       'Salon / Barbershop', 'Japanese Restaurant', 'Coffee Shop',
       'Snack Place', 'Italian Restaurant', 'Dessert Shop', "Men's Store",
       'Gastropub', 'Other Great Outdoors', 'Theater', 'Café', 'Stadium',
       'Breakfast Spot', 'Gym / Fitness Center', 'Department Store',
       'Concert Hall', 'Park', 'Chinese Restaurant', 'Deli / Bodega',
       'Vegetarian / Vegan Restaurant', 'Clothing Store',
       'Food & Drink Shop', 'Bus Station', 'Smoke Shop', 'Salad Place',
       'Soccer Field', 'Yoga Studio', 'Event Space', 'Farm', 'Resort',
       'Golf Course', 'Camera Store', 'Shopping Mall', 'Food Court',
       'Electronics Store', 'Platform', 'Ice Cream Shop',
       'Seafood Restaurant', 'Fish Market', 'Falafel Restaurant', 'Pub',
       'Athletics & Sports', 'Burger Joint', 'Bow

In [222]:
mumbai_eats=mumbai_venues[mumbai_venues['Venue Category'].isin (food_categories)]
print(mumbai_eats.shape)
mumbai_eats.head(40)

(2773, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue Name,Venue Latitude,Venue Longitude,Venue Category
0,August Kranti Marg,18.964005,72.807983,Doolally Taproom,18.963809,72.807695,Brewery
1,August Kranti Marg,18.964005,72.807983,symphony,18.963347,72.810251,Restaurant
3,August Kranti Marg,18.964005,72.807983,Swati Snacks,18.966442,72.813531,Indian Restaurant
4,August Kranti Marg,18.964005,72.807983,Francesco's Pizzeria,18.96478,72.80463,Pizza Place
5,August Kranti Marg,18.964005,72.807983,Love Sugar Dough,18.964757,72.804456,Bakery
6,August Kranti Marg,18.964005,72.807983,Soam,18.957492,72.808884,Indian Restaurant
8,August Kranti Marg,18.964005,72.807983,Theobroma,18.970735,72.809816,Bakery
9,August Kranti Marg,18.964005,72.807983,Santosh Sagar,18.961177,72.801425,Fast Food Restaurant
10,August Kranti Marg,18.964005,72.807983,Krishna Palace Residency Hotel,18.962266,72.81396,Hotel
11,August Kranti Marg,18.964005,72.807983,China Garden,18.963903,72.807052,Bar


In [223]:
print('There are {} unique categories in food places.'.format(len(mumbai_eats['Venue Category'].unique())))

There are 87 unique categories in food places.


In [224]:
mumbai_eats.groupby('Neighborhood').count().sort_values(by='Venue Name', ascending=False)

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue Name,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Hutatma Chowk,77,77,77,77,77,77
Khar,76,76,76,76,76,76
Nariman Point,74,74,74,74,74,74
Bandra (West),72,72,72,72,72,72
Delisle Road,71,71,71,71,71,71
Mantralaya,70,70,70,70,70,70
Marine Lines,66,66,66,66,66,66
Malad (West),63,63,63,63,63,63
J B Nagar,62,62,62,62,62,62
I I T Mumbai,58,58,58,58,58,58


In [225]:
mumbai_eats_onehot = pd.get_dummies(mumbai_eats[['Venue Category']], prefix="", prefix_sep="")

mumbai_eats_onehot['Neighborhood'] = mumbai_eats['Neighborhood'] 

fixed_columns = [mumbai_eats_onehot.columns[-1]] + list(mumbai_eats_onehot.columns[:-1])
mumbai_eats_onehot = mumbai_eats_onehot[fixed_columns]

mumbai_eats_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Asian Restaurant,BBQ Joint,Bakery,Bar,Beer Bar,Beer Garden,Bengali Restaurant,Bistro,Brazilian Restaurant,Breakfast Spot,Brewery,Buffet,Burger Joint,Burrito Place,Cafeteria,Café,Chaat Place,Chinese Restaurant,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Cupcake Shop,Dessert Shop,Dhaba,Dim Sum Restaurant,Diner,Donut Shop,Dumpling Restaurant,Falafel Restaurant,Fast Food Restaurant,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,German Restaurant,Gluten-free Restaurant,Goan Restaurant,Gourmet Shop,Greek Restaurant,Halal Restaurant,Hookah Bar,Hot Dog Joint,Hotel,Hotel Bar,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Irani Cafe,Italian Restaurant,Japanese Restaurant,Juice Bar,Maharashtrian Restaurant,Malay Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Molecular Gastronomy Restaurant,Moroccan Restaurant,Mountain,Mughlai Restaurant,Multicuisine Indian Restaurant,New American Restaurant,Nightclub,Noodle House,North Indian Restaurant,Parsi Restaurant,Pizza Place,Pub,Punjabi Restaurant,Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Snack Place,Soup Place,South American Restaurant,South Indian Restaurant,Sushi Restaurant,Tea Room,Tex-Mex Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant
0,August Kranti Marg,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,August Kranti Marg,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
3,August Kranti Marg,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,August Kranti Marg,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,August Kranti Marg,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


And let's examine the new dataframe size.


In [226]:
mumbai_eats_onehot.shape

(2773, 88)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category


In [227]:
mumbai_eats_grouped = mumbai_eats_onehot.groupby('Neighborhood').mean().reset_index()
mumbai_eats_grouped

Unnamed: 0,Neighborhood,American Restaurant,Asian Restaurant,BBQ Joint,Bakery,Bar,Beer Bar,Beer Garden,Bengali Restaurant,Bistro,Brazilian Restaurant,Breakfast Spot,Brewery,Buffet,Burger Joint,Burrito Place,Cafeteria,Café,Chaat Place,Chinese Restaurant,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Cupcake Shop,Dessert Shop,Dhaba,Dim Sum Restaurant,Diner,Donut Shop,Dumpling Restaurant,Falafel Restaurant,Fast Food Restaurant,Food,Food & Drink Shop,Food Court,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,German Restaurant,Gluten-free Restaurant,Goan Restaurant,Gourmet Shop,Greek Restaurant,Halal Restaurant,Hookah Bar,Hot Dog Joint,Hotel,Hotel Bar,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Irani Cafe,Italian Restaurant,Japanese Restaurant,Juice Bar,Maharashtrian Restaurant,Malay Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Molecular Gastronomy Restaurant,Moroccan Restaurant,Mountain,Mughlai Restaurant,Multicuisine Indian Restaurant,New American Restaurant,Nightclub,Noodle House,North Indian Restaurant,Parsi Restaurant,Pizza Place,Pub,Punjabi Restaurant,Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Snack Place,Soup Place,South American Restaurant,South Indian Restaurant,Sushi Restaurant,Tea Room,Tex-Mex Restaurant,Thai Restaurant,Vegetarian / Vegan Restaurant
0,Aarey Milk Colony,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Airoli Mode,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125
2,Andheri (East),0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.05,0.0,0.35,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.05,0.0,0.05,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Andheri (West),0.0,0.0,0.0,0.035714,0.107143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.035714,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.071429,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.035714,0.0,0.035714,0.0,0.035714,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429
4,Antop Hill,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Anu Shakti Nagar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,August Kranti Marg,0.0,0.0,0.0,0.12766,0.021277,0.0,0.0,0.0,0.0,0.0,0.021277,0.021277,0.0,0.0,0.0,0.0,0.106383,0.0,0.042553,0.0,0.085106,0.0,0.0,0.042553,0.0,0.0,0.0,0.021277,0.0,0.0,0.042553,0.0,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.0,0.148936,0.0,0.021277,0.021277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.042553,0.0,0.0,0.042553,0.021277,0.06383,0.0,0.042553,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277
7,B A R C,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Ballard Estate,0.019608,0.0,0.019608,0.078431,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.137255,0.0,0.039216,0.0,0.058824,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.039216,0.0,0.019608,0.0,0.294118,0.039216,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0,0.0,0.0,0.019608,0.058824,0.0,0.0,0.0,0.0,0.0,0.019608,0.0,0.0,0.0
9,Bandra (East),0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.029412,0.0,0.029412,0.0,0.0,0.029412,0.0,0.029412,0.058824,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.147059,0.029412,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.117647,0.0,0.0,0.058824,0.0,0.0,0.029412,0.0,0.0,0.029412,0.0,0.0,0.029412,0.0,0.029412,0.0


#### Let's confirm the new size


In [228]:
mumbai_eats_grouped.shape

(101, 88)

#### Let's print each neighborhood along with the top 5 most common venues


In [229]:
num_top_venues = 5

for hood in mumbai_eats_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = mumbai_eats_grouped[mumbai_eats_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Aarey Milk Colony----
                 venue  freq
0           Restaurant  0.25
1                Hotel  0.25
2    Indian Restaurant  0.25
3                 Café  0.25
4  American Restaurant  0.00


----Airoli Mode----
                           venue  freq
0              Indian Restaurant  0.25
1           Fast Food Restaurant  0.25
2  Vegetarian / Vegan Restaurant  0.12
3                   Burger Joint  0.12
4                    Coffee Shop  0.12


----Andheri (East)----
                  venue  freq
0     Indian Restaurant  0.35
1  Fast Food Restaurant  0.10
2    Chinese Restaurant  0.10
3           Pizza Place  0.10
4            Food Court  0.05


----Andheri (West)----
                           venue  freq
0              Indian Restaurant  0.25
1                            Bar  0.11
2  Vegetarian / Vegan Restaurant  0.07
3           Fast Food Restaurant  0.07
4                    Pizza Place  0.07


----Antop Hill----
                 venue  freq
0                Diner   0.5
1

                  venue  freq
0     Indian Restaurant  0.28
1  Fast Food Restaurant  0.09
2    Chinese Restaurant  0.09
3                  Café  0.06
4           Snack Place  0.06


----Mazgaon----
               venue  freq
0       Dessert Shop  0.12
1     Ice Cream Shop  0.12
2             Bakery  0.12
3  Indian Restaurant  0.12
4        Pizza Place  0.12


----Motilal Nagar----
                venue  freq
0   Indian Restaurant   0.4
1  Seafood Restaurant   0.2
2              Bakery   0.2
3                Café   0.2
4    Malay Restaurant   0.0


----Mulund (East)----
                 venue  freq
0    Indian Restaurant   0.5
1   Punjabi Restaurant   0.5
2  American Restaurant   0.0
3     Malay Restaurant   0.0
4             Mountain   0.0


----Mulund (West)----
                  venue  freq
0  Fast Food Restaurant  0.19
1     Indian Restaurant  0.16
2           Snack Place  0.13
3           Pizza Place  0.10
4          Dessert Shop  0.06


----Mulund Colony----
                  venu

#### Let's put that into a _pandas_ dataframe


First, let's write a function to sort the venues in descending order.


In [230]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.


In [231]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = mumbai_eats_grouped['Neighborhood']

for ind in np.arange(mumbai_eats_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(mumbai_eats_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Aarey Milk Colony,Hotel,Café,Indian Restaurant,Restaurant,Vegetarian / Vegan Restaurant,Food,Dhaba,Dim Sum Restaurant,Diner,Donut Shop
1,Airoli Mode,Indian Restaurant,Fast Food Restaurant,Vegetarian / Vegan Restaurant,Restaurant,Burger Joint,Coffee Shop,Beer Bar,Food & Drink Shop,Dim Sum Restaurant,Diner
2,Andheri (East),Indian Restaurant,Chinese Restaurant,Fast Food Restaurant,Pizza Place,Hotel,Restaurant,Sandwich Place,Seafood Restaurant,Ice Cream Shop,Food Court
3,Andheri (West),Indian Restaurant,Bar,Vegetarian / Vegan Restaurant,Coffee Shop,Fast Food Restaurant,Pizza Place,Restaurant,Chinese Restaurant,Falafel Restaurant,Burger Joint
4,Antop Hill,Bar,Diner,Vegetarian / Vegan Restaurant,Food,Dessert Shop,Dhaba,Dim Sum Restaurant,Donut Shop,Dumpling Restaurant,Falafel Restaurant


<a id='item4'></a>


### Cluster Neighborhoods


Run _k_-means to cluster the neighborhood into 10 clusters.


In [232]:
kclusters = 10

mumbai_eats_grouped_clustering = mumbai_eats_grouped.drop('Neighborhood', 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(mumbai_eats_grouped_clustering)

kmeans.labels_[0:10] 

array([9, 0, 9, 0, 4, 3, 0, 0, 9, 0])

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.


In [233]:
neighborhoods_venues_sorted.insert(0, 'Cluster-Labels', kmeans.labels_)

mumbai_eats_merged = df

mumbai_eats_merged = mumbai_eats_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

mumbai_eats_merged.head() 

Unnamed: 0,Neighborhood,PostalCode,Latitude,Longitude,Cluster-Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,August Kranti Marg,400036,18.964005,72.807983,0.0,Indian Restaurant,Bakery,Café,Coffee Shop,Sandwich Place,Pizza Place,Dessert Shop,Restaurant,Chinese Restaurant,Fast Food Restaurant
1,Aarey Milk Colony,400065,19.161085,72.884394,9.0,Hotel,Café,Indian Restaurant,Restaurant,Vegetarian / Vegan Restaurant,Food,Dhaba,Dim Sum Restaurant,Diner,Donut Shop
2,Andheri (East),400069,19.119298,72.8511,9.0,Indian Restaurant,Chinese Restaurant,Fast Food Restaurant,Pizza Place,Hotel,Restaurant,Sandwich Place,Seafood Restaurant,Ice Cream Shop,Food Court
3,Andheri (West),400058,19.122935,72.84061,0.0,Indian Restaurant,Bar,Vegetarian / Vegan Restaurant,Coffee Shop,Fast Food Restaurant,Pizza Place,Restaurant,Chinese Restaurant,Falafel Restaurant,Burger Joint
4,Antop Hill,400037,19.020313,72.86828,4.0,Bar,Diner,Vegetarian / Vegan Restaurant,Food,Dessert Shop,Dhaba,Dim Sum Restaurant,Donut Shop,Dumpling Restaurant,Falafel Restaurant


Finally, let's visualize the resulting clusters


In [234]:
mumbai_eats_merged.isna().sum()

Neighborhood              0
PostalCode                0
Latitude                  0
Longitude                 0
Cluster-Labels            5
1st Most Common Venue     5
2nd Most Common Venue     5
3rd Most Common Venue     5
4th Most Common Venue     5
5th Most Common Venue     5
6th Most Common Venue     5
7th Most Common Venue     5
8th Most Common Venue     5
9th Most Common Venue     5
10th Most Common Venue    5
dtype: int64

In [235]:
mumbai_eats_merged['Cluster-Labels'].replace(np.NaN, 11.0, inplace=True)

In [236]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters+1)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(mumbai_eats_merged['Latitude'], mumbai_eats_merged['Longitude'], mumbai_eats_merged['Neighborhood'], mumbai_eats_merged['Cluster-Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=6,
        popup=label,
        color='black',
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=1.0).add_to(map_clusters)
       
map_clusters

### Examine Clusters


In [237]:
mumbai_eats_merged['Cluster-Labels'].value_counts()

0.0     61
9.0     29
11.0     5
1.0      2
3.0      2
4.0      2
8.0      1
5.0      1
2.0      1
7.0      1
6.0      1
Name: Cluster-Labels, dtype: int64

In [239]:
mumbai_eats['Venue Category'].value_counts()

Indian Restaurant                  573
Café                               232
Coffee Shop                        164
Fast Food Restaurant               158
Chinese Restaurant                 129
Bakery                             127
Bar                                113
Ice Cream Shop                      95
Pizza Place                         92
Restaurant                          91
Hotel                               91
Seafood Restaurant                  74
Dessert Shop                        70
Italian Restaurant                  57
Sandwich Place                      53
Vegetarian / Vegan Restaurant       52
Snack Place                         52
Asian Restaurant                    43
Juice Bar                           34
Pub                                 31
Diner                               30
Tea Room                            28
BBQ Joint                           27
Irani Cafe                          23
Middle Eastern Restaurant           20
Mughlai Restaurant       

In [240]:
mumbai_eats_merged.loc[mumbai_eats_merged['Cluster-Labels'] == 0]

Unnamed: 0,Neighborhood,PostalCode,Latitude,Longitude,Cluster-Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,August Kranti Marg,400036,18.964005,72.807983,0.0,Indian Restaurant,Bakery,Café,Coffee Shop,Sandwich Place,Pizza Place,Dessert Shop,Restaurant,Chinese Restaurant,Fast Food Restaurant
3,Andheri (West),400058,19.122935,72.84061,0.0,Indian Restaurant,Bar,Vegetarian / Vegan Restaurant,Coffee Shop,Fast Food Restaurant,Pizza Place,Restaurant,Chinese Restaurant,Falafel Restaurant,Burger Joint
6,B A R C,400085,19.016345,72.926988,0.0,Ice Cream Shop,Vegetarian / Vegan Restaurant,Food,Dessert Shop,Dhaba,Dim Sum Restaurant,Diner,Donut Shop,Dumpling Restaurant,Falafel Restaurant
8,Bandra (East),400051,19.060715,72.854564,0.0,Indian Restaurant,Pizza Place,Restaurant,Café,Italian Restaurant,Bar,Diner,Fast Food Restaurant,Ice Cream Shop,Noodle House
9,Bandra (West),400050,19.052259,72.829405,0.0,Indian Restaurant,Café,Coffee Shop,Chinese Restaurant,Bakery,Bar,Pizza Place,Snack Place,Asian Restaurant,Tea Room
15,Mumbai Central,400008,18.967725,72.827071,0.0,Indian Restaurant,Fast Food Restaurant,Ice Cream Shop,Dessert Shop,Restaurant,Middle Eastern Restaurant,Chinese Restaurant,Diner,Pizza Place,Coffee Shop
18,Borivli (West),400092,19.22963,72.836607,0.0,Restaurant,Fast Food Restaurant,Gourmet Shop,Malay Restaurant,Pub,Coffee Shop,French Restaurant,Food Truck,Gluten-free Restaurant,Dessert Shop
20,Chakala MIDC,400093,19.12629,72.867028,0.0,Indian Restaurant,Asian Restaurant,Ice Cream Shop,Diner,Pizza Place,Restaurant,Seafood Restaurant,Bakery,Café,Italian Restaurant
23,Colaba,400005,18.915923,72.817565,0.0,Coffee Shop,Indian Restaurant,Italian Restaurant,Thai Restaurant,Bar,Chinese Restaurant,Food Truck,Hotel,Ice Cream Shop,Pizza Place
25,Cumballa Hill,400026,18.971712,72.807475,0.0,Chinese Restaurant,Bakery,Bar,Café,Pizza Place,Fast Food Restaurant,Indian Restaurant,Sandwich Place,Coffee Shop,Ice Cream Shop


In [241]:
mumbai_eats_merged.loc[mumbai_eats_merged['Cluster-Labels'] == 9]

Unnamed: 0,Neighborhood,PostalCode,Latitude,Longitude,Cluster-Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Aarey Milk Colony,400065,19.161085,72.884394,9.0,Hotel,Café,Indian Restaurant,Restaurant,Vegetarian / Vegan Restaurant,Food,Dhaba,Dim Sum Restaurant,Diner,Donut Shop
2,Andheri (East),400069,19.119298,72.8511,9.0,Indian Restaurant,Chinese Restaurant,Fast Food Restaurant,Pizza Place,Hotel,Restaurant,Sandwich Place,Seafood Restaurant,Ice Cream Shop,Food Court
7,Ballard Estate,400038,18.94017,72.83483,9.0,Indian Restaurant,Café,Bakery,Bar,Seafood Restaurant,Coffee Shop,Chinese Restaurant,Hotel,Irani Cafe,Parsi Restaurant
14,Bhavani Shankar Road,400028,19.020358,72.83628,9.0,Indian Restaurant,Chinese Restaurant,Ice Cream Shop,Fast Food Restaurant,Bar,Café,Coffee Shop,Breakfast Spot,Vegetarian / Vegan Restaurant,Hotel
16,Mumbai G P O,400001,18.939031,72.837345,9.0,Indian Restaurant,Café,Seafood Restaurant,Hotel,Coffee Shop,Chinese Restaurant,Bar,Irani Cafe,Parsi Restaurant,Fast Food Restaurant
21,Chembur,400071,19.056035,72.89704,9.0,Indian Restaurant,Café,Pizza Place,Seafood Restaurant,Fast Food Restaurant,Bar,Diner,Chinese Restaurant,Vegetarian / Vegan Restaurant,Asian Restaurant
22,Chinch Bunder,400009,18.957426,72.837665,9.0,Indian Restaurant,Dessert Shop,Restaurant,BBQ Joint,Chinese Restaurant,Hotel,Ice Cream Shop,Café,Food,Fast Food Restaurant
24,Council Hall,400039,18.94017,72.83483,9.0,Indian Restaurant,Café,Bakery,Bar,Seafood Restaurant,Coffee Shop,Chinese Restaurant,Hotel,Irani Cafe,Parsi Restaurant
30,F C I Mumbai,400074,19.022904,72.896845,9.0,Indian Restaurant,Vegetarian / Vegan Restaurant,Food,Dessert Shop,Dhaba,Dim Sum Restaurant,Diner,Donut Shop,Dumpling Restaurant,Falafel Restaurant
31,Ghatkopar (West),400086,19.09151,72.910692,9.0,Indian Restaurant,Vegetarian / Vegan Restaurant,Café,Coffee Shop,Diner,Donut Shop,Pizza Place,Restaurant,Bakery,Hot Dog Joint


Here also we can see that 'Indian Restaurant' and 'Cafe' categories have major impact on clusters with high count of Neighborhoods due to their higher frequency.

I'll conclude the results and findings in the report.
