<h3>Import Libraries</h3>

In [1]:
!pip install geopy
!pip install folium
!pip install BeautifulSoup4
!pip install geocoder

Collecting geopy
[?25l  Downloading https://files.pythonhosted.org/packages/80/93/d384479da0ead712bdaf697a8399c13a9a89bd856ada5a27d462fb45e47b/geopy-1.20.0-py2.py3-none-any.whl (100kB)
[K     |████████████████████████████████| 102kB 7.4MB/s ta 0:00:011
[?25hCollecting geographiclib<2,>=1.49 (from geopy)
  Downloading https://files.pythonhosted.org/packages/8b/62/26ec95a98ba64299163199e95ad1b0e34ad3f4e176e221c40245f211e425/geographiclib-1.50-py3-none-any.whl
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-1.50 geopy-1.20.0
Collecting BeautifulSoup4
[?25l  Downloading https://files.pythonhosted.org/packages/cb/a1/c698cf319e9cfed6b17376281bd0efc6bfc8465698f54170ef60a485ab5d/beautifulsoup4-4.8.2-py3-none-any.whl (106kB)
[K     |████████████████████████████████| 112kB 30.6MB/s eta 0:00:01
[?25hCollecting soupsieve>=1.2 (from BeautifulSoup4)
  Downloading https://files.pythonhosted.org/packages/81/94/03c0f04471fc245d08d0a99f7946ac228ca98da4fa757

In [2]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geocoder # to get coordinates

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print("Libraries imported.")

Libraries imported.


<h3>Scrape data from Wiki page</h3>

In [3]:
data = requests.get('https://en.wikipedia.org/wiki/Category:Neighbourhoods_in_Kolkata').text

In [4]:
soup = BeautifulSoup(data, 'html.parser')

In [5]:
neighborhoodList = []

In [6]:
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    if(row.text != 'Neighbourhoods in Kolkata Metropolitan Area'):
        neighborhoodList.append(row.text)
    

In [7]:
kol_df = pd.DataFrame({"Neighborhood": neighborhoodList})
kol_df.head()

Unnamed: 0,Neighborhood
0,Abhirampur
1,Agarpara
2,Ajoy Nagar
3,Alipore
4,Amodghata


In [8]:
kol_df.shape

(199, 1)

<h3>Get latitude and longitude of each neighbour</h3>

In [9]:
# define a function to get coordinates
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Kolkata, India'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [10]:
coords = [ get_latlng(neighborhood) for neighborhood in kol_df["Neighborhood"].tolist() ]

In [11]:
coords

[[22.530693771856434, 88.34650250291418],
 [22.68405000000007, 88.39165000000008],
 [22.48966000000007, 88.39640000000009],
 [22.52660000000003, 88.33510000000007],
 [22.98801000000003, 88.38838000000004],
 [22.482678048848754, 88.37658799417164],
 [22.512078083819034, 88.336961916181],
 [22.47640513266778, 88.38044069608748],
 [22.57054000000005, 88.37124000000006],
 [22.472170000000062, 88.25546000000008],
 [22.567620000000034, 88.34456000000006],
 [22.56729000000007, 88.34106000000008],
 [22.555079999999997, 88.24685148658381],
 [22.604020000000048, 88.36638000000005],
 [22.48395000000005, 88.37543000000005],
 [22.61674000000005, 88.42691000000008],
 [22.472510000000057, 88.36513000000008],
 [22.600192913959265, 88.41169347377229],
 [22.700780000000066, 88.38065000000006],
 [22.497570000000053, 88.23033000000004],
 [22.647860000000037, 88.34428000000008],
 [22.647860000000037, 88.34428000000008],
 [22.531780000000026, 88.36437000000006],
 [22.5303523517102, 88.35621146045604],
 [22.

In [15]:
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])
kol_df['Latitude'] = df_coords['Latitude']
kol_df['Longitude'] = df_coords['Longitude']

In [16]:
print(kol_df.shape)
kol_df

(199, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Abhirampur,22.530694,88.346503
1,Agarpara,22.68405,88.39165
2,Ajoy Nagar,22.48966,88.3964
3,Alipore,22.5266,88.3351
4,Amodghata,22.98801,88.38838
5,Amtala,22.482678,88.376588
6,"Anandapur, Kolkata",22.512078,88.336962
7,Ankurhati,22.476405,88.380441
8,Argari,22.57054,88.37124
9,Asuti,22.47217,88.25546


<h3>Create a map of Kolkata alongwith its neighbours</h3>

In [17]:
address = 'Kolkata, India'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Kolkata, India {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Kolkata, India 22.54541245, 88.3567751581234.


In [18]:
# create map of Kolkata using latitude and longitude values
map_kol = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(kol_df['Latitude'], kol_df['Longitude'], kol_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_kol)  
    
map_kol

<h3>Explore neighbourhood using Foursquare API</h3>

In [19]:
CLIENT_ID = 'JNOHPX1VT3FCJEOIGXCZQH50HSKGRD01OHM2JVAC3YGFZW2Z' # your Foursquare ID
CLIENT_SECRET = 'Q33TPF2P25S2K0MCOPQIXO23OU1YP0R4343TL0NUOFLXB35H' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: JNOHPX1VT3FCJEOIGXCZQH50HSKGRD01OHM2JVAC3YGFZW2Z
CLIENT_SECRET:Q33TPF2P25S2K0MCOPQIXO23OU1YP0R4343TL0NUOFLXB35H


<h5>Top 100 venues within a radius of 1 km</h5>

In [20]:
radius = 1000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(kol_df['Latitude'], kol_df['Longitude'], kol_df['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(CLIENT_ID,CLIENT_SECRET,VERSION,lat,long,radius, LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [21]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(1716, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Abhirampur,22.530694,88.346503,Balaram Mullick & Radharaman Mullick,22.533097,88.347082,Indian Sweet Shop
1,Abhirampur,22.530694,88.346503,Jai Hind Dhaba,22.533109,88.353268,Dhaba
2,Abhirampur,22.530694,88.346503,Balwant Singh's Eating House,22.537714,88.34422,Dhaba
3,Abhirampur,22.530694,88.346503,Oh! Calcutta,22.538357,88.351406,Bengali Restaurant
4,Abhirampur,22.530694,88.346503,Red Hot Chilli Pepper,22.529016,88.355805,Chinese Restaurant


<h5>No. of venues returned for each neighborhood</h5>

In [22]:
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Abhirampur,22,22,22,22,22,22
Agarpara,5,5,5,5,5,5
Ajoy Nagar,6,6,6,6,6,6
Alipore,9,9,9,9,9,9
Amtala,5,5,5,5,5,5
"Anandapur, Kolkata",5,5,5,5,5,5
Ankurhati,7,7,7,7,7,7
Argari,11,11,11,11,11,11
B. B. D. Bagh,19,19,19,19,19,19
Babughat,9,9,9,9,9,9


<h5>No. of unique categories that can be curated from all the returned venues</h5>

In [23]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))
venues_df['VenueCategory'].unique()[:50]

There are 145 uniques categories.


array(['Indian Sweet Shop', 'Dhaba', 'Bengali Restaurant',
       'Chinese Restaurant', 'Café', 'Nightclub', 'Hotel',
       'Fast Food Restaurant', 'American Restaurant', 'Shopping Mall',
       'Multiplex', 'Italian Restaurant', 'Tea Room', 'Department Store',
       'Ice Cream Shop', 'ATM', 'Train Station', 'Pharmacy',
       'Grocery Store', 'Bus Station', 'Bakery', 'Dessert Shop',
       'Pizza Place', 'South Indian Restaurant', 'Athletics & Sports',
       'Clothing Store', 'Mobile Phone Shop', 'Asian Restaurant', 'Park',
       'Diner', 'Indian Restaurant', 'Juice Bar', 'Plaza', 'Platform',
       'Breakfast Spot', 'Pool', 'Cricket Ground', 'Stadium',
       'Neighborhood', 'Thai Restaurant', 'Mughlai Restaurant', 'River',
       'Boat or Ferry', 'Metro Station', 'Restaurant', 'Bank', 'Market',
       'Vegetarian / Vegan Restaurant', 'Bistro', 'Fried Chicken Joint'],
      dtype=object)

<h3>Analyze each neighbourhood</h3>

In [25]:
#one hot encoding
kol_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
kol_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [kol_onehot.columns[-1]] + list(kol_onehot.columns[:-1])
kol_onehot = kol_onehot[fixed_columns]

print(kol_onehot.shape)
kol_onehot.head()

(1716, 146)


Unnamed: 0,Neighborhoods,ATM,American Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,Awadhi Restaurant,BBQ Joint,Bakery,Bank,Bar,Bed & Breakfast,Beer Bar,Beer Garden,Bengali Restaurant,Bistro,Boat or Ferry,Bookstore,Botanical Garden,Boutique,Bowling Alley,Breakfast Spot,Brewery,Burger Joint,Bus Station,Bus Stop,Business Service,Café,Campground,Chinese Restaurant,Clothing Store,Coffee Shop,Convenience Store,Cosmetics Shop,Cricket Ground,Department Store,Dessert Shop,Dhaba,Diner,Discount Store,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Service,Fast Food Restaurant,Field,Flea Market,Food,Food & Drink Shop,Food Court,Food Truck,Fried Chicken Joint,Furniture / Home Store,Garden,Gastropub,Golf Course,Grocery Store,Gujarati Restaurant,Gym / Fitness Center,Harbor / Marina,Health & Beauty Service,Historic Site,History Museum,Hookah Bar,Hostel,Hotel,Hotel Pool,Housing Development,IT Services,Ice Cream Shop,Indian Restaurant,Indian Sweet Shop,Indie Movie Theater,Indie Theater,Irish Pub,Italian Restaurant,Japanese Restaurant,Juice Bar,Kerala Restaurant,Lounge,Market,Mattress Store,Men's Store,Metro Station,Mexican Restaurant,Military Base,Mobile Phone Shop,Motorcycle Shop,Movie Theater,Moving Target,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Museum,Music Venue,Neighborhood,Nightclub,Optical Shop,Park,Performing Arts Venue,Pet Store,Pharmacy,Photography Studio,Pizza Place,Planetarium,Platform,Plaza,Pool,Port,Pub,Resort,Restaurant,River,Sandwich Place,Scenic Lookout,Shoe Store,Shopping Mall,Snack Place,Soccer Field,Soccer Stadium,South Indian Restaurant,Spa,Sports Club,Stadium,Steakhouse,Supermarket,Taxi Stand,Tea Room,Tex-Mex Restaurant,Thai Restaurant,Theme Park,Theme Restaurant,Tibetan Restaurant,Toll Booth,Tourist Information Center,Train Station,Tram Station,Used Bookstore,Vegetarian / Vegan Restaurant,Watch Shop,Zoo
0,Abhirampur,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Abhirampur,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Abhirampur,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Abhirampur,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Abhirampur,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [26]:
kol_grouped = kol_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(kol_grouped.shape)
kol_grouped

(171, 146)


Unnamed: 0,Neighborhoods,ATM,American Restaurant,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Workshop,Awadhi Restaurant,BBQ Joint,Bakery,Bank,Bar,Bed & Breakfast,Beer Bar,Beer Garden,Bengali Restaurant,Bistro,Boat or Ferry,Bookstore,Botanical Garden,Boutique,Bowling Alley,Breakfast Spot,Brewery,Burger Joint,Bus Station,Bus Stop,Business Service,Café,Campground,Chinese Restaurant,Clothing Store,Coffee Shop,Convenience Store,Cosmetics Shop,Cricket Ground,Department Store,Dessert Shop,Dhaba,Diner,Discount Store,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Service,Fast Food Restaurant,Field,Flea Market,Food,Food & Drink Shop,Food Court,Food Truck,Fried Chicken Joint,Furniture / Home Store,Garden,Gastropub,Golf Course,Grocery Store,Gujarati Restaurant,Gym / Fitness Center,Harbor / Marina,Health & Beauty Service,Historic Site,History Museum,Hookah Bar,Hostel,Hotel,Hotel Pool,Housing Development,IT Services,Ice Cream Shop,Indian Restaurant,Indian Sweet Shop,Indie Movie Theater,Indie Theater,Irish Pub,Italian Restaurant,Japanese Restaurant,Juice Bar,Kerala Restaurant,Lounge,Market,Mattress Store,Men's Store,Metro Station,Mexican Restaurant,Military Base,Mobile Phone Shop,Motorcycle Shop,Movie Theater,Moving Target,Mughlai Restaurant,Multicuisine Indian Restaurant,Multiplex,Museum,Music Venue,Neighborhood,Nightclub,Optical Shop,Park,Performing Arts Venue,Pet Store,Pharmacy,Photography Studio,Pizza Place,Planetarium,Platform,Plaza,Pool,Port,Pub,Resort,Restaurant,River,Sandwich Place,Scenic Lookout,Shoe Store,Shopping Mall,Snack Place,Soccer Field,Soccer Stadium,South Indian Restaurant,Spa,Sports Club,Stadium,Steakhouse,Supermarket,Taxi Stand,Tea Room,Tex-Mex Restaurant,Thai Restaurant,Theme Park,Theme Restaurant,Tibetan Restaurant,Toll Booth,Tourist Information Center,Train Station,Tram Station,Used Bookstore,Vegetarian / Vegan Restaurant,Watch Shop,Zoo
0,Abhirampur,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.136364,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.136364,0.0,0.0,0.0,0.045455,0.0,0.090909,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Agarpara,0.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0
2,Ajoy Nagar,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Alipore,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Amtala,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Anandapur, Kolkata",0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Ankurhati,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Argari,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.181818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.181818,0.090909,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0
8,B. B. D. Bagh,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.105263,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.210526,0.0,0.0,0.0,0.0,0.052632,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.052632,0.105263,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Babughat,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [43]:
len(kol_grouped[kol_grouped["Multiplex"] > 0])

24

<h5>Create a new dataframe for Metro Stations only</h5>

In [44]:
kol_mul = kol_grouped[["Neighborhoods","Multiplex"]]

In [45]:
kol_mul.head()

Unnamed: 0,Neighborhoods,Multiplex
0,Abhirampur,0.045455
1,Agarpara,0.0
2,Ajoy Nagar,0.166667
3,Alipore,0.0
4,Amtala,0.0


<h3>Cluster neighbourhoods</h3>

In [46]:
# set number of clusters
kclusters = 3

kol_clustering = kol_mul.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(kol_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 0, 1, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [47]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
kol_merged = kol_mul.copy()

# add clustering labels
kol_merged["Cluster Labels"] = kmeans.labels_

In [48]:
kol_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
kol_merged.head()

Unnamed: 0,Neighborhood,Multiplex,Cluster Labels
0,Abhirampur,0.045455,2
1,Agarpara,0.0,0
2,Ajoy Nagar,0.166667,1
3,Alipore,0.0,0
4,Amtala,0.0,0


In [49]:
# add latitude/longitude for each neighborhood
kol_merged = kol_merged.join(kol_df.set_index("Neighborhood"), on="Neighborhood")

print(kol_merged.shape)
kol_merged.head() # check the last columns!

(171, 5)


Unnamed: 0,Neighborhood,Multiplex,Cluster Labels,Latitude,Longitude
0,Abhirampur,0.045455,2,22.530694,88.346503
1,Agarpara,0.0,0,22.68405,88.39165
2,Ajoy Nagar,0.166667,1,22.48966,88.3964
3,Alipore,0.0,0,22.5266,88.3351
4,Amtala,0.0,0,22.482678,88.376588


In [50]:
kol_merged.sort_values(["Cluster Labels"], inplace=True)
kol_merged

Unnamed: 0,Neighborhood,Multiplex,Cluster Labels,Latitude,Longitude
170,Smita Bakshi,0.0,0,22.600193,88.411693
101,Garshyamnagar,0.0,0,22.82282,88.38058
102,Garulia,0.0,0,22.57808,88.44307
103,Girish Park,0.0,0,22.58627,88.36351
104,"Gobindapur, Bhangar",0.0,0,22.5028,88.61526
161,Kumortuli,0.0,0,22.600891,88.362104
106,Gondalpara,0.0,0,22.656265,88.387024
107,Halisahar,0.0,0,22.93218,88.41859
108,Haltu,0.0,0,22.50364,88.39039
109,Hanspukuria,0.0,0,22.44855,88.28924


<h5>Visualizing the clusters</h5>

In [51]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(kol_merged['Latitude'], kol_merged['Longitude'], kol_merged['Neighborhood'], kol_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<h3>Examine Clusters</h3>

In [52]:
kol_merged.loc[kol_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Multiplex,Cluster Labels,Latitude,Longitude
170,Smita Bakshi,0.0,0,22.600193,88.411693
101,Garshyamnagar,0.0,0,22.82282,88.38058
102,Garulia,0.0,0,22.57808,88.44307
103,Girish Park,0.0,0,22.58627,88.36351
104,"Gobindapur, Bhangar",0.0,0,22.5028,88.61526
161,Kumortuli,0.0,0,22.600891,88.362104
106,Gondalpara,0.0,0,22.656265,88.387024
107,Halisahar,0.0,0,22.93218,88.41859
108,Haltu,0.0,0,22.50364,88.39039
109,Hanspukuria,0.0,0,22.44855,88.28924


In [53]:
kol_merged.loc[kol_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Multiplex,Cluster Labels,Latitude,Longitude
67,Chak Garia,0.166667,1,22.48386,88.39996
32,Barrackpur Cantonment,0.142857,1,22.76361,88.36019
69,Champdani,0.111111,1,22.60614,88.41106
73,Chetla,0.1,1,22.51621,88.341
2,Ajoy Nagar,0.166667,1,22.48966,88.3964
88,Dum Dum Park,0.111111,1,22.60947,88.41606


In [54]:
kol_merged.loc[kol_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Multiplex,Cluster Labels,Latitude,Longitude
162,Lake Gardens,0.038462,2,22.50491,88.35628
0,Abhirampur,0.045455,2,22.530694,88.346503
143,Kankurgachi,0.037037,2,22.57835,88.39385
128,Jodhpur Park,0.041667,2,22.50586,88.36366
105,Golf Green,0.045455,2,22.49417,88.36331
64,Calcutta International School,0.076923,2,22.5201,88.39951
58,Bowbazar,0.055556,2,22.56628,88.36116
56,Bow Barracks,0.052632,2,22.57036,88.35603
52,Bikramgarh,0.041667,2,22.4973,88.35919
48,Bhowanipore,0.027778,2,22.53548,88.34623


<h3>Observation</h3>

In [None]:
<body>It can be easily seen that cluster 0 has very low number to totally no multiplex in their neighbourhood. On the other
hand cluster 1 has a high number a multiplex nearby while cluster 2 has a moderate count of multiplex nearby. Hence property
developers should try opening a multiplex in areas represented by cluster 0 as their is hardly any competition from any other
multiplex. At the same time establishing a new multiplex in areas represented by cluster 1 can be a bit challenging. Finally 
this project recommends the property developers not to open multiplex in cluster 2  