# Kochi Cafe Project

**First we import all the modules**

In [166]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files
!pip install geocoder
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geocoder # to get coordinates

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print("Libraries imported.")

Libraries imported.


### Import the data from wikipedia page to get Neighborhoods

In [3]:
data = requests.get("https://en.wikipedia.org/wiki/Category:Suburbs_of_Kochi").text

In [4]:
soup = BeautifulSoup(data, 'html.parser')

In [5]:
# create a list to store neighborhood data
neighborhoodList = []

In [6]:

# append the data into the list
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    neighborhoodList.append(row.text)

#### Adding one more Neighborhood

In [116]:
neighborhoodList.append("Kakkanad")

#### Creating Data Frame

In [117]:
kl_df = pd.DataFrame({"Neighborhood": neighborhoodList})

kl_df.head()

Unnamed: 0,Neighborhood
0,Alangad
1,Angamaly
2,Aroor
3,Chellanam
4,Chendamangalam


In [118]:
# print the number of rows of the dataframe
kl_df.shape

(45, 1)

#### Function to get coordinates

In [119]:
# define a function to get coordinates
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Ernakulam, Kerala, India'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [120]:
# call the function to get the coordinates, store in a new list using list comprehension
coords = [ get_latlng(neighborhood) for neighborhood in kl_df["Neighborhood"].tolist() ]

In [121]:
coords

[[10.122220000000027, 76.31579000000005],
 [10.20366000000007, 76.38268000000005],
 [10.174710000000061, 76.31031000000007],
 [9.835260000000062, 76.27029000000005],
 [10.172920000000033, 76.23346000000004],
 [10.15354000000002, 76.34068000000008],
 [10.179920000000038, 76.47664000000003],
 [9.96118000000007, 76.30659000000009],
 [10.081320000000062, 76.34155000000004],
 [9.920160000000067, 76.38924000000003],
 [10.128150000000062, 76.37217000000004],
 [10.086410000000058, 76.38181000000003],
 [9.957580000000064, 76.24239000000006],
 [9.966870000000029, 76.35720000000003],
 [10.06352000000004, 76.24660000000006],
 [9.988452830057541, 76.30342643172278],
 [9.947600000000023, 76.26079000000004],
 [10.107690000000048, 76.26171000000005],
 [10.055610000000058, 76.27164000000005],
 [10.103150000000028, 76.24615000000006],
 [9.90220000000005, 76.31064000000003],
 [9.940510000000074, 76.32395000000008],
 [9.952060000000074, 76.25080000000008],
 [9.999140000000068, 76.26241000000005],
 [9.9307

In [122]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])

In [123]:
# merge the coordinates into the original dataframe
kl_df['Latitude'] = df_coords['Latitude']
kl_df['Longitude'] = df_coords['Longitude']

In [124]:
# check the neighborhoods and the coordinates
print(kl_df.shape)
kl_df

(45, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Alangad,10.12222,76.31579
1,Angamaly,10.20366,76.38268
2,Aroor,10.17471,76.31031
3,Chellanam,9.83526,76.27029
4,Chendamangalam,10.17292,76.23346
5,"Chengamanad, Ernakulam district",10.15354,76.34068
6,Cheranallur,10.17992,76.47664
7,Chilavannoor,9.96118,76.30659
8,Choornikkara,10.08132,76.34155
9,Chottanikkara,9.92016,76.38924


#### save the DataFrame as CSV file

In [125]:
# save the DataFrame as CSV file
kl_df.to_csv("kl_df.csv", index=False)

In [126]:
# get the coordinates of Kochi
address = 'Edapally, Kerala, India'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Edapally, Kerala, India {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Edapally, Kerala, India 10.0250304, 76.3073744.


#### create map of Ernakulam using latitude and longitude values

In [127]:
# create map of Ernakulam using latitude and longitude values
map_kl = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(kl_df['Latitude'], kl_df['Longitude'], kl_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_kl)  
    
map_kl

In [128]:
# save the map as HTML file
map_kl.save('map_kl.html')

## Get FourSquare Credentials

In [129]:

# define Foursquare Credentials and Version
CLIENT_ID = 'U5K2KSNL1TWAUWYI01BISUVJNY0WYQXK3UZCM5ANATFTNPBC' # your Foursquare ID
CLIENT_SECRET = 'JULMAXF02HQ2OOURGR10AGPT0QIWSEON1RZUBOAWLPU5K0VK' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: U5K2KSNL1TWAUWYI01BISUVJNY0WYQXK3UZCM5ANATFTNPBC
CLIENT_SECRET:JULMAXF02HQ2OOURGR10AGPT0QIWSEON1RZUBOAWLPU5K0VK


### create the API request URL

In [130]:
radius = 3000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(kl_df['Latitude'], kl_df['Longitude'], kl_df['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [131]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(1327, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Alangad,10.12222,76.31579,@veliyathnadu rivers!,10.13389,76.323802,Bus Station
1,Alangad,10.12222,76.31579,Annalakshmi,10.122753,76.340661,Indian Restaurant
2,Alangad,10.12222,76.31579,"Desam, Aluva",10.129927,76.341062,Market
3,Alangad,10.12222,76.31579,Quality Bakers,10.119877,76.34273,Bakery
4,Angamaly,10.20366,76.38268,Carnival Cinemas,10.195147,76.386157,Multiplex


#### Count in each Venue Category

In [133]:
venues_df['VenueCategory'].value_counts()

Indian Restaurant                  179
Café                                90
Hotel                               81
Fast Food Restaurant                57
Restaurant                          54
Bakery                              52
Chinese Restaurant                  47
Vegetarian / Vegan Restaurant       43
Ice Cream Shop                      31
Clothing Store                      28
Asian Restaurant                    26
Shopping Mall                       22
Park                                20
Pizza Place                         19
Middle Eastern Restaurant           19
Hotel Bar                           19
Burger Joint                        19
Coffee Shop                         18
Juice Bar                           18
Fried Chicken Joint                 17
BBQ Joint                           16
Department Store                    16
Seafood Restaurant                  16
Multiplex                           16
Bar                                 15
Motorcycle Shop          

In [134]:
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Alangad,4,4,4,4,4,4
Angamaly,7,7,7,7,7,7
Aroor,1,1,1,1,1,1
Chellanam,1,1,1,1,1,1
Chendamangalam,5,5,5,5,5,5
"Chengamanad, Ernakulam district",9,9,9,9,9,9
Cheranallur,2,2,2,2,2,2
Chilavannoor,89,89,89,89,89,89
Choornikkara,5,5,5,5,5,5
Chottanikkara,4,4,4,4,4,4


In [135]:

print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 120 uniques categories.


In [136]:
# print out the list of categories
venues_df['VenueCategory'].unique()[:50]

array(['Bus Station', 'Indian Restaurant', 'Market', 'Bakery',
       'Multiplex', 'Train Station', 'Hotel', 'Restaurant', 'Bar',
       'Fish Market', 'Boat or Ferry', 'Kerala Restaurant',
       'Asian Restaurant', 'Comfort Food Restaurant',
       'Fast Food Restaurant', 'Department Store', 'Movie Theater',
       'Café', 'Thai Restaurant', 'French Restaurant', 'Stadium',
       'Nightclub', 'Athletics & Sports', 'Motorcycle Shop', 'Donut Shop',
       'Ice Cream Shop', 'Sandwich Place', 'Gastropub', 'Park',
       'Gym / Fitness Center', 'Chinese Restaurant', 'Pizza Place',
       'Burger Joint', 'Art Gallery', 'Seafood Restaurant',
       'Middle Eastern Restaurant', 'Juice Bar', 'Garden',
       'Vegetarian / Vegan Restaurant', 'Resort', 'Electronics Store',
       'Multicuisine Indian Restaurant', 'Jewelry Store', 'Coffee Shop',
       'Diner', 'Liquor Store', 'South Indian Restaurant',
       'Soccer Stadium', 'Dessert Shop', 'BBQ Joint'], dtype=object)

In [137]:
# check if the results contain "Café"
"Café" in venues_df['VenueCategory'].unique()

True

#### one hot encoding

In [138]:
# one hot encoding
kl_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
kl_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [kl_onehot.columns[-1]] + list(kl_onehot.columns[:-1])
kl_onehot = kl_onehot[fixed_columns]

print(kl_onehot.shape)
kl_onehot.head()


(1327, 121)


Unnamed: 0,Neighborhoods,Accessories Store,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Arcade,Arepa Restaurant,Art Gallery,Arts & Entertainment,Asian Restaurant,Athletics & Sports,Auditorium,BBQ Joint,Bakery,Bar,Beach,Boat or Ferry,Bookstore,Burger Joint,Bus Line,Bus Station,Café,Cajun / Creole Restaurant,Chinese Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Concert Hall,Convenience Store,Department Store,Dessert Shop,Dhaba,Diner,Donut Shop,Duty-free Shop,Electronics Store,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Fishing Spot,Flea Market,Food,Food Court,French Restaurant,Fried Chicken Joint,Garden,Gastropub,Golf Course,Gym,Gym / Fitness Center,Harbor / Marina,Historic Site,History Museum,Hookah Bar,Hotel,Hotel Bar,Hotel Pool,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kerala Restaurant,Light Rail Station,Lighthouse,Liquor Store,Market,Mediterranean Restaurant,Men's Store,Metro Station,Middle Eastern Restaurant,Motorcycle Shop,Movie Theater,Moving Target,Multicuisine Indian Restaurant,Multiplex,Museum,Nightclub,North Indian Restaurant,Park,Performing Arts Venue,Pizza Place,Playground,Plaza,Pool,Pool Hall,Portuguese Restaurant,Punjabi Restaurant,Recreation Center,Resort,Restaurant,River,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shoe Store,Shopping Mall,Smoke Shop,Snack Place,Soccer Stadium,Soup Place,South Indian Restaurant,Southern / Soul Food Restaurant,Sporting Goods Shop,Sports Bar,Stadium,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Theme Park,Train Station,Vegetarian / Vegan Restaurant,Warehouse Store,Water Park
0,Alangad,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Alangad,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Alangad,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Alangad,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Angamaly,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [139]:
kl_grouped = kl_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(kl_grouped.shape)
kl_grouped

(45, 121)


Unnamed: 0,Neighborhoods,Accessories Store,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Arcade,Arepa Restaurant,Art Gallery,Arts & Entertainment,Asian Restaurant,Athletics & Sports,Auditorium,BBQ Joint,Bakery,Bar,Beach,Boat or Ferry,Bookstore,Burger Joint,Bus Line,Bus Station,Café,Cajun / Creole Restaurant,Chinese Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Concert Hall,Convenience Store,Department Store,Dessert Shop,Dhaba,Diner,Donut Shop,Duty-free Shop,Electronics Store,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Fishing Spot,Flea Market,Food,Food Court,French Restaurant,Fried Chicken Joint,Garden,Gastropub,Golf Course,Gym,Gym / Fitness Center,Harbor / Marina,Historic Site,History Museum,Hookah Bar,Hotel,Hotel Bar,Hotel Pool,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kerala Restaurant,Light Rail Station,Lighthouse,Liquor Store,Market,Mediterranean Restaurant,Men's Store,Metro Station,Middle Eastern Restaurant,Motorcycle Shop,Movie Theater,Moving Target,Multicuisine Indian Restaurant,Multiplex,Museum,Nightclub,North Indian Restaurant,Park,Performing Arts Venue,Pizza Place,Playground,Plaza,Pool,Pool Hall,Portuguese Restaurant,Punjabi Restaurant,Recreation Center,Resort,Restaurant,River,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shoe Store,Shopping Mall,Smoke Shop,Snack Place,Soccer Stadium,Soup Place,South Indian Restaurant,Southern / Soul Food Restaurant,Sporting Goods Shop,Sports Bar,Stadium,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Theme Park,Train Station,Vegetarian / Vegan Restaurant,Warehouse Store,Water Park
0,Alangad,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Angamaly,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0
2,Aroor,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Chellanam,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Chendamangalam,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Chengamanad, Ernakulam district",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Cheranallur,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Chilavannoor,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011236,0.0,0.011236,0.011236,0.0,0.011236,0.05618,0.011236,0.0,0.0,0.0,0.011236,0.0,0.0,0.067416,0.0,0.033708,0.011236,0.022472,0.011236,0.0,0.0,0.0,0.011236,0.0,0.011236,0.022472,0.0,0.011236,0.033708,0.0,0.0,0.0,0.0,0.0,0.0,0.011236,0.0,0.011236,0.011236,0.0,0.0,0.011236,0.0,0.0,0.0,0.0,0.05618,0.0,0.0,0.033708,0.157303,0.0,0.0,0.0,0.0,0.011236,0.011236,0.0,0.0,0.0,0.011236,0.0,0.0,0.0,0.0,0.011236,0.033708,0.0,0.0,0.011236,0.011236,0.0,0.022472,0.0,0.022472,0.0,0.033708,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011236,0.05618,0.011236,0.022472,0.0,0.011236,0.0,0.0,0.0,0.0,0.011236,0.0,0.011236,0.0,0.0,0.0,0.011236,0.0,0.0,0.011236,0.0,0.0,0.0,0.011236,0.0,0.0
8,Choornikkara,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Chottanikkara,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### No: of Groups

In [140]:
len(kl_grouped[kl_grouped["Café"] > 0])

16

In [143]:
kl_caf = kl_grouped[["Neighborhoods","Café"]]

### KMEANS Clustering Implementation

In [152]:
# set number of clusters
kclusters = 3

kl_clustering = kl_caf.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(kl_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 1, 1, 1, 1, 1, 1, 2, 1, 1])

#### create a new dataframe

In [153]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
kl_merged = kl_caf.copy()

# add clustering labels
kl_merged["Cluster Labels"] = kmeans.labels_

In [154]:

kl_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
kl_merged.head()

Unnamed: 0,Neighborhood,Café,Cluster Labels
0,Alangad,0.0,1
1,Angamaly,0.0,1
2,Aroor,0.0,1
3,Chellanam,0.0,1
4,Chendamangalam,0.0,1


In [155]:
kl_merged = kl_merged.join(kl_df.set_index("Neighborhood"), on="Neighborhood")

print(kl_merged.shape)
kl_merged.head() # check the last columns!

(45, 5)


Unnamed: 0,Neighborhood,Café,Cluster Labels,Latitude,Longitude
0,Alangad,0.0,1,10.12222,76.31579
1,Angamaly,0.0,1,10.20366,76.38268
2,Aroor,0.0,1,10.17471,76.31031
3,Chellanam,0.0,1,9.83526,76.27029
4,Chendamangalam,0.0,1,10.17292,76.23346


### results sorted by Cluster Labels

In [156]:
# sort the results by Cluster Labels
print(kl_merged.shape)
kl_merged.sort_values(["Cluster Labels"], inplace=True)
kl_merged

(45, 5)


Unnamed: 0,Neighborhood,Café,Cluster Labels,Latitude,Longitude
44,Willingdon Island,0.0875,0,9.94416,76.28356
17,Kochangadi,0.086207,0,9.9476,76.26079
16,Karanakodam,0.114943,0,9.988453,76.303426
15,Kakkanad,0.125,0,10.01687,76.34536
23,Mattancherry,0.095238,0,9.95206,76.2508
12,Fort Kochi,0.097561,0,9.95758,76.24239
31,Thammanam,0.1,0,9.98557,76.3113
28,Pachalam,0.129412,0,10.00347,76.28122
24,Mulavukad,0.0,1,9.99914,76.26241
27,Nettoor,0.0,1,9.92726,76.31181


### Creating Map

In [167]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(kl_merged['Latitude'], kl_merged['Longitude'], kl_merged['Neighborhood'], kl_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [158]:
# save the map as HTML file
map_clusters.save('map_clusters.html')

### Cluster Labels- 0 Contains More number of Cafe (Red in color)

In [160]:
kl_merged.loc[kl_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Café,Cluster Labels,Latitude,Longitude
44,Willingdon Island,0.0875,0,9.94416,76.28356
17,Kochangadi,0.086207,0,9.9476,76.26079
16,Karanakodam,0.114943,0,9.988453,76.303426
15,Kakkanad,0.125,0,10.01687,76.34536
23,Mattancherry,0.095238,0,9.95206,76.2508
12,Fort Kochi,0.097561,0,9.95758,76.24239
31,Thammanam,0.1,0,9.98557,76.3113
28,Pachalam,0.129412,0,10.00347,76.28122


### Cluster Labels- 1 Contains very less number/No of Cafe (Violet in color)

In [161]:
kl_merged.loc[kl_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Café,Cluster Labels,Latitude,Longitude
24,Mulavukad,0.0,1,9.99914,76.26241
27,Nettoor,0.0,1,9.92726,76.31181
29,Palluruthy,0.0,1,9.91642,76.27567
30,Pathalam,0.0,1,10.07817,76.31857
33,Thiruvankulam,0.0,1,9.94635,76.36746
36,Thrippunithura,0.0,1,9.94111,76.34698
37,Twenty20 Kizhakkambalam,0.0,1,10.04626,76.40411
38,Vaduthala,0.0,1,10.01825,76.27586
39,Vallarpadam,0.0,1,9.99789,76.24981
40,Varappuzha,0.0,1,10.08261,76.27041


### Cluster Labels- 2 Contains moderate number of Cafe (Blue in color)

In [162]:
kl_merged.loc[kl_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Café,Cluster Labels,Latitude,Longitude
7,Chilavannoor,0.067416,2,9.96118,76.30659
34,Thrikkakkara,0.07,2,10.01736,76.31637
35,Thrikkakkara South,0.047619,2,10.03324,76.32519
26,Nedumbassery,0.038462,2,10.15669,76.3778
25,Mundamveli,0.076923,2,9.9307,76.2532
41,Vazhakkala,0.06,2,10.01783,76.32908
42,Vennala,0.070588,2,9.99538,76.32243
32,Thevara,0.043478,2,9.94209,76.29839
