In [None]:
# IBM Applied Data Science Capstone // Week 4 Part 1 A\n
### IBM Data Science Specialization \n
#### by Chan Jang

## Introduction
### A Fortune 500 company would like to set up a second headquarters in New York City to compare to their current location in Toronto, Canada
### The senior staff would like to analyze the two locations to validate if a relationship exists between the two cities
### Location data will be accessed through Foursquare API 
### We will collect the top venues around each location through the k-means clustering algorithm
### From there, we will compare and contrast the two locations to gain insight into the neighborhood demographics

## Segmenting and Clustering Neighborhoods in New York

In [230]:
#library for vectorized data
import numpy as np

#library for data analysis
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows',None)

#library for json files
import json

#convert address to longitude and latitude
#!pip install geopy
import geopy
from geopy.geocoders import Nominatim

#library for requests
import requests

#!pip install responses
import responses
from pandas.io.json import json_normalize

#library for graphing
import matplotlib.cm as cm
import matplotlib.colors as colors

#associated plotting modules
import seaborn as sns
#!pip install cufflinks
import cufflinks as cf
import plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as ff

#import k-means 
#!pip install scikit-learn
from sklearn.cluster import KMeans
from sklearn.preprocessing import OneHotEncoder

#library for maps
#!pip install folium
import folium
#!pip install foursquare
import foursquare as fs

#html data scraping
from bs4 import BeautifulSoup
import wikipedia as wp

In [128]:
### Toronto neighborhood data via webscrpaing

In [129]:
#Obtain Postal Code, Borough, and Neighborhood information from Wikipedia
table = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M', header = 0)

#Obtain the first table
df_toronto = table[0]
df_toronto.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


In [130]:
#Organize the data

#Use the Notebook to build the code to scrape the following Wikipedia page, https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M, in order to obtain the data that is in the table of postal codes and to transform the data into a pandas dataframe
html = wp.page("List of postal codes of Canada: M").html().encode("UTF-8")
df = pd.read_html(html, header = 0)[0]

#Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
df = df[df.Borough != 'Not assigned']

#More than one neighborhood can exist in one postal code area. For example, in the table on the Wikipedia page, you will notice that M5A is listed twice and has two neighborhoods: Harbourfront and Regent Park. These two rows will be combined into one row with the neighborhoods separated with a comma as shown in row 11 in the above table.
df = df.groupby(['Postal code','Borough'])['Neighborhood'].apply(list).apply(lambda x:', '.join(x)).to_frame().reset_index()

#If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
for index, row in df.iterrows():
    if row['Neighborhood'] == 'Not assigned':
        row['Neighborhood'] = row['Borough']
        
df.rename(columns = {"Postal code": "PostalCode"}, inplace=True)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,Malvern / Rouge
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek
2,M1E,Scarborough,Guildwood / Morningside / West Hill
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [131]:
### Find longitude and latitude data

In [132]:
latlong = pd.read_csv("http://cocl.us/Geospatial_data")
latlong.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [133]:
#Join the two tables together
latlong.rename(columns = {"Postal Code": "PostalCode"}, inplace=True)

df.set_index("PostalCode")
latlong.set_index("PostalCode")
neighbor=pd.merge(df, latlong)
neighbor.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,Malvern / Rouge,43.806686,-79.194353
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,43.784535,-79.160497
2,M1E,Scarborough,Guildwood / Morningside / West Hill,43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [134]:
print('Toronto has {} boroughs and {} neighborhoods.'.format(
        len(neighbor['Borough'].unique()),
        neighbor.shape[0]
    )
)

Toronto has 10 boroughs and 103 neighborhoods.


In [135]:
address = 'Toronto, CA'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The coordinates for Toronto, CA are {}, {}.'.format(latitude, longitude))


Using Nominatim with the default "geopy/1.21.0" `user_agent` is strongly discouraged, as it violates Nominatim's ToS https://operations.osmfoundation.org/policies/nominatim/ and may possibly cause 403 and 429 HTTP errors. Please specify a custom `user_agent` with `Nominatim(user_agent="my-application")` or by overriding the default `user_agent`: `geopy.geocoders.options.default_user_agent = "my-application"`. In geopy 2.0 this will become an exception.



The coordinates for Toronto, CA are 43.6534817, -79.3839347.


In [136]:
### Visualize the neighborhoods of Toronto

In [137]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighbor['Latitude'], neighbor['Longitude'], neighbor['Borough'], neighbor['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [138]:
#Using Folium, let us dive deeper into the neighborhood location data - Bayview Village

In [139]:
northyork_data = neighbor[neighbor['Borough'] == 'North York'].reset_index(drop=True)
northyork_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M2H,North York,Hillcrest Village,43.803762,-79.363452
1,M2J,North York,Fairview / Henry Farm / Oriole,43.778517,-79.346556
2,M2K,North York,Bayview Village,43.786947,-79.385975
3,M2L,North York,York Mills / Silver Hills,43.75749,-79.374714
4,M2M,North York,Willowdale / Newtonbrook,43.789053,-79.408493


In [140]:
northyork_data.shape

(24, 5)

In [141]:
#### Now let us get the coorindates of North York

In [142]:
address = 'North York, Toronto'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of North York, CA are {}, {}.'.format(latitude, longitude))


Using Nominatim with the default "geopy/1.21.0" `user_agent` is strongly discouraged, as it violates Nominatim's ToS https://operations.osmfoundation.org/policies/nominatim/ and may possibly cause 403 and 429 HTTP errors. Please specify a custom `user_agent` with `Nominatim(user_agent="my-application")` or by overriding the default `user_agent`: `geopy.geocoders.options.default_user_agent = "my-application"`. In geopy 2.0 this will become an exception.



The geograpical coordinate of North York, CA are 43.7543263, -79.44911696639593.


In [143]:
# create map of North York using latitude and longitude values
map_northyork = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(northyork_data['Latitude'], northyork_data['Longitude'], northyork_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_scarborough)  
    
map_northyork

In [144]:
### Explore venues in North York, Toronto using Foursqaure API

In [145]:
#### Validate Foursquare credentials

In [413]:
CLIENT_ID = 'NCRIZQSSMOIYVAI10YSPAA5IU2S1P3ZLXKLZZVIYQEVBL0FP'
CLIENT_SECRET = 'WKTI53HHOH5JN1PQO0KRXZ0NYKVEITIF4UEUTM5SZG1ECNHC'
VERSION = '20180605'

fs = foursquare.Foursquare(client_id=['CLIENT_ID'], client_secret=['CLIENT_SECRET'])

In [199]:
#### Write a function to explore neighborhoods

In [222]:
RADIUS = 500
def venues_nearby(latitude, longitude, category):
    results = venues.search(
        params = {
            'query': category,
            'll': '{},{}'.format(latitude, longitude),
            'radius':RADIUS,
            'categoryID': category
        }
    )
    df2 = json_normalize(results['venues'])
    cols = ['Neighborhood', 'Neighborhood Latitude', 'Neighborhood Longitude', 'Venue', 'Venue Latitude', 'Venue Longitude', 'Venue Category']
    df2.columns = cols
    print('{} "{}" venues are found within {}m of location'.format(len(df2), category, RADIUS))
    return df2

In [223]:
venues_nearby = getNearbyVenues(names=northyork_data['Neighborhood'], latitudes=northyork_data['Latitude'], longitudes=northyork_data['Longitude'])

Found 240 venues in 24 neighborhoods.


In [224]:
print(venues_nearby.shape)
venues_nearby.head()

(240, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Hillcrest Village,43.803762,-79.363452,Eagle's Nest Golf Club,43.805455,-79.364186,Golf Course
1,Hillcrest Village,43.803762,-79.363452,New York Fries,43.803664,-79.363905,Fast Food Restaurant
2,Hillcrest Village,43.803762,-79.363452,AY Jackson Pool,43.804515,-79.366138,Pool
3,Hillcrest Village,43.803762,-79.363452,Villa Madina,43.801685,-79.363938,Mediterranean Restaurant
4,Hillcrest Village,43.803762,-79.363452,Duncan Creek Park,43.805539,-79.360695,Dog Run


In [None]:
#Venue types in North York

In [203]:
venues_nearby.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bathurst Manor / Wilson Heights / Downsview North,19,19,19,19,19,19
Bayview Village,4,4,4,4,4,4
Bedford Park / Lawrence Manor East,25,25,25,25,25,25
Don Mills,24,24,24,24,24,24
Downsview,14,14,14,14,14,14
Fairview / Henry Farm / Oriole,68,68,68,68,68,68
Glencairn,4,4,4,4,4,4
Hillcrest Village,5,5,5,5,5,5
Humber Summit,1,1,1,1,1,1
Humberlea / Emery,1,1,1,1,1,1


In [None]:
### Dive deeper into each neighborhood in North York

In [232]:
print('There are {} distinct venues in {} categories.'.format(
    len(venues_nearby['Venue'].unique()),len(venues_nearby['Venue Category'].unique())))

There are 198 distinct venues in 101 categories.


In [386]:
#one hot encoding
northyork_onehot = pd.get_dummies(venues_nearby[['Venue Category']], prefix="", prefix_sep="")

#add the neighborhood column into the dataframe
northyork_onehot['Neighborhood'] = venues_nearby['Neighborhood']

#make it the primary column
fixed_columns = [northyork_onehot.columns[-1]] + list(northyork_onehot.columns[:-1])
northyork_onehot = northyork_onehot[fixed_columns]

northyork_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,Baseball Field,Basketball Court,Beer Store,Bike Shop,Boutique,Bridal Shop,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Station,Butcher,Café,Caribbean Restaurant,Chinese Restaurant,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Electronics Store,Empanada Restaurant,Event Space,Fast Food Restaurant,Food & Drink Shop,Food Court,Food Truck,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gas Station,Gift Shop,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hockey Arena,Home Service,Hotel,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Juice Bar,Kids Store,Liquor Store,Lounge,Luggage Store,Massage Studio,Mediterranean Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Movie Theater,Park,Pet Store,Pharmacy,Pizza Place,Plaza,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Restaurant,Salon / Barbershop,Sandwich Place,Shoe Store,Shopping Mall,Spa,Sporting Goods Shop,Steakhouse,Supermarket,Supplement Shop,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant,Women's Store
0,Hillcrest Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Hillcrest Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Hillcrest Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Hillcrest Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Hillcrest Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
#### Grouped venue types

In [387]:
northyork_grouped = northyork_onehot.groupby('Neighborhood').mean().reset_index()
northyork_grouped

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,Baseball Field,Basketball Court,Beer Store,Bike Shop,Boutique,Bridal Shop,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Station,Butcher,Café,Caribbean Restaurant,Chinese Restaurant,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Electronics Store,Empanada Restaurant,Event Space,Fast Food Restaurant,Food & Drink Shop,Food Court,Food Truck,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gas Station,Gift Shop,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hockey Arena,Home Service,Hotel,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Juice Bar,Kids Store,Liquor Store,Lounge,Luggage Store,Massage Studio,Mediterranean Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Movie Theater,Park,Pet Store,Pharmacy,Pizza Place,Plaza,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Restaurant,Salon / Barbershop,Sandwich Place,Shoe Store,Shopping Mall,Spa,Sporting Goods Shop,Steakhouse,Supermarket,Supplement Shop,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant,Women's Store
0,Bathurst Manor / Wilson Heights / Downsview North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.105263,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.105263,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.052632,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.052632,0.052632,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.052632,0.0,0.052632,0.0,0.0,0.0,0.052632,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Bedford Park / Lawrence Manor East,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.08,0.04,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.08,0.0,0.04,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.08,0.0,0.0,0.0,0.04,0.0,0.08,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.04,0.0,0.0,0.0,0.0,0.0
3,Don Mills,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.041667,0.041667,0.0,0.041667,0.083333,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.041667,0.0,0.0,0.0,0.041667,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Downsview,0.0,0.071429,0.0,0.0,0.0,0.071429,0.0,0.071429,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.214286,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Fairview / Henry Farm / Oriole,0.0,0.0,0.014706,0.0,0.014706,0.0,0.029412,0.029412,0.014706,0.014706,0.0,0.0,0.0,0.014706,0.0,0.0,0.014706,0.014706,0.014706,0.0,0.0,0.0,0.0,0.014706,0.117647,0.073529,0.0,0.0,0.0,0.014706,0.044118,0.014706,0.014706,0.0,0.0,0.0,0.014706,0.0,0.014706,0.0,0.0,0.058824,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.044118,0.014706,0.014706,0.014706,0.0,0.014706,0.0,0.0,0.0,0.0,0.014706,0.0,0.0,0.014706,0.0,0.0,0.0,0.0,0.0,0.0,0.044118,0.014706,0.0,0.029412,0.014706,0.014706,0.029412,0.0,0.0,0.014706,0.0,0.014706,0.0,0.014706,0.029412,0.014706,0.0,0.029412
6,Glencairn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.25,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Hillcrest Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Humber Summit,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Humberlea / Emery,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
#### Top 10 venues for each neighborhood in North York

In [388]:
num_top_venues = 10
for x in northyork_grouped['Neighborhood']:
    print("----"+x+"----")
    temp = northyork_grouped[northyork_grouped['Neighborhood'] == x].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq':2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bathurst Manor / Wilson Heights / Downsview North----
                       venue  freq
0                Coffee Shop  0.11
1                       Bank  0.11
2                Pizza Place  0.05
3                Supermarket  0.05
4                      Diner  0.05
5                   Pharmacy  0.05
6  Middle Eastern Restaurant  0.05
7                 Restaurant  0.05
8             Sandwich Place  0.05
9                Bridal Shop  0.05


----Bayview Village----
                       venue  freq
0         Chinese Restaurant  0.25
1                       Bank  0.25
2                       Café  0.25
3        Japanese Restaurant  0.25
4          Accessories Store  0.00
5               Liquor Store  0.00
6                       Park  0.00
7              Movie Theater  0.00
8         Miscellaneous Shop  0.00
9  Middle Eastern Restaurant  0.00


----Bedford Park / Lawrence Manor East----
                 venue  freq
0       Sandwich Place  0.08
1          Pizza Place  0.08
2          Cof

In [286]:
#### Then lets put this into a pandas dataframe

In [389]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [390]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

#organize columns to show top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

#showcase this in a new dataframe
venues_organized = pd.DataFrame(columns=columns)
venues_organized['Neighborhood'] = northyork_grouped['Neighborhood']

for ind in np.arange(northyork_grouped.shape[0]):
    venues_organized.iloc[ind, 1:] = return_most_common_venues(northyork_grouped.iloc[ind,:], num_top_venues)

venues_organized

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bathurst Manor / Wilson Heights / Downsview North,Coffee Shop,Bank,Shopping Mall,Middle Eastern Restaurant,Deli / Bodega,Pharmacy,Pizza Place,Ice Cream Shop,Bridal Shop,Diner
1,Bayview Village,Chinese Restaurant,Café,Bank,Japanese Restaurant,Women's Store,Distribution Center,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega
2,Bedford Park / Lawrence Manor East,Sandwich Place,Italian Restaurant,Pizza Place,Restaurant,Coffee Shop,Café,Comfort Food Restaurant,Juice Bar,Butcher,Pharmacy
3,Don Mills,Beer Store,Coffee Shop,Japanese Restaurant,Gym,Restaurant,Chinese Restaurant,Clothing Store,Caribbean Restaurant,Café,Italian Restaurant
4,Downsview,Grocery Store,Park,Airport,Food Truck,Athletics & Sports,Bank,Discount Store,Baseball Field,Gym / Fitness Center,Liquor Store
5,Fairview / Henry Farm / Oriole,Clothing Store,Coffee Shop,Fast Food Restaurant,Restaurant,Japanese Restaurant,Cosmetics Shop,Sporting Goods Shop,Bank,Bakery,Shoe Store
6,Glencairn,Park,Pizza Place,Pub,Japanese Restaurant,Dim Sum Restaurant,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store
7,Hillcrest Village,Golf Course,Mediterranean Restaurant,Pool,Fast Food Restaurant,Dog Run,Diner,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store
8,Humber Summit,Empanada Restaurant,Women's Store,Distribution Center,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
9,Humberlea / Emery,Baseball Field,Women's Store,Dog Run,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dim Sum Restaurant


In [396]:
northyork_grouped['Total'] = northyork_grouped .sum(axis=1)

In [290]:
### Cluster the neighborhoods in North York using k-means

In [391]:
# set number of clusters
kclusters = 3

north_york_clustering = northyork_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=2).fit(north_york_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 1, 1, 1, 1, 1, 1, 1, 0, 2], dtype=int32)

In [395]:
north_york_results = pd.DataFrame(kmeans.cluster_centers_)
north_york_results.columns = north_york_clustering.columns
north_york_results.index = ['cluster0','cluster1','cluster2']
north_york_results['Total Sum'] = north_york_results.sum(axis = 1)
north_york_results

Unnamed: 0,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bakery,Bank,Bar,Baseball Field,Basketball Court,Beer Store,Bike Shop,Boutique,Bridal Shop,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Station,Butcher,Café,Caribbean Restaurant,Chinese Restaurant,Chocolate Shop,Clothing Store,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Electronics Store,Empanada Restaurant,Event Space,Fast Food Restaurant,Food & Drink Shop,Food Court,Food Truck,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gas Station,Gift Shop,Golf Course,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hockey Arena,Home Service,Hotel,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Juice Bar,Kids Store,Liquor Store,Lounge,Luggage Store,Massage Studio,Mediterranean Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Movie Theater,Park,Pet Store,Pharmacy,Pizza Place,Plaza,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Restaurant,Salon / Barbershop,Sandwich Place,Shoe Store,Shopping Mall,Spa,Sporting Goods Shop,Steakhouse,Supermarket,Supplement Shop,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant,Women's Store,Total Sum
cluster0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
cluster1,0.010256,0.004762,0.003647,0.001626,0.003758,0.00989,0.018627,0.0487,0.012092,0.005742,0.016667,0.005556,0.002778,0.006109,0.003509,0.001626,0.00098,0.00098,0.00098,0.004293,0.025363,0.013889,0.019444,0.00098,0.020877,0.057259,0.002667,0.002778,0.016667,0.017647,0.005608,0.004489,0.00098,0.002778,0.003509,0.010792,0.00098,0.013333,0.002606,-6.938894e-18,0.005128,0.021548,0.033333,0.001961,0.004762,0.003509,0.003509,0.026496,0.003509,0.00098,0.013333,0.002667,0.018578,0.005556,0.00754,0.013333,0.001626,0.001626,0.005135,0.002667,0.001626,0.013333,0.008111,0.043456,0.005273,0.00098,0.008409,0.001626,0.00098,0.011111,0.013333,0.005135,0.016239,0.002606,0.109524,0.001626,0.008782,0.04372,0.001626,0.013333,0.013333,0.019333,0.004878,0.020591,0.00098,0.014872,0.001961,0.010877,0.00098,0.004739,0.001626,0.006287,0.00098,0.009427,0.00098,0.002667,0.00098,0.001961,0.00098,0.006754,0.001961,1.0
cluster2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [None]:
### Dataframe with Neighborhood,Cluster Number and Total Sum

In [397]:
north_york_results_merged = pd.DataFrame(northyork_grouped['Neighborhood'])

north_york_results_merged['Total'] = northyork_grouped['Total']
north_york_results_merged = north_york_results_merged.assign(Cluster_Labels = kmeans.labels_)

In [399]:
print(north_york_results_merged.shape)
north_york_results_merged

(17, 3)


Unnamed: 0,Neighborhood,Total,Cluster_Labels
0,Bathurst Manor / Wilson Heights / Downsview North,1.0,1
1,Bayview Village,1.0,1
2,Bedford Park / Lawrence Manor East,1.0,1
3,Don Mills,1.0,1
4,Downsview,1.0,1
5,Fairview / Henry Farm / Oriole,1.0,1
6,Glencairn,1.0,1
7,Hillcrest Village,1.0,1
8,Humber Summit,1.0,0
9,Humberlea / Emery,1.0,2


In [400]:
#add coordinates to above table
northyork_merged = northyork_merged.join(north_york_results_merged.set_index('Neighborhood'), on='Neighborhood')

print(northyork_merged.shape)
northyork_merged.head(10) # check the last columns!

(22, 17)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Total,Cluster_Labels
0,M2H,North York,Hillcrest Village,43.803762,-79.363452,Golf Course,Mediterranean Restaurant,Pool,Fast Food Restaurant,Dog Run,Diner,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,1.0,1
1,M2J,North York,Fairview / Henry Farm / Oriole,43.778517,-79.346556,Clothing Store,Coffee Shop,Fast Food Restaurant,Restaurant,Japanese Restaurant,Cosmetics Shop,Sporting Goods Shop,Bank,Bakery,Shoe Store,1.0,1
2,M2K,North York,Bayview Village,43.786947,-79.385975,Chinese Restaurant,Café,Bank,Japanese Restaurant,Women's Store,Distribution Center,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,1.0,1
5,M2N,North York,Willowdale,43.77012,-79.408493,Coffee Shop,Pizza Place,Ramen Restaurant,Sandwich Place,Sushi Restaurant,Café,Restaurant,Discount Store,Bubble Tea Shop,Pet Store,1.0,1
6,M2P,North York,York Mills West,43.752758,-79.400049,Park,Convenience Store,Bank,Women's Store,Distribution Center,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Cosmetics Shop,Deli / Bodega,1.0,1
7,M2R,North York,Willowdale,43.782736,-79.442259,Coffee Shop,Pizza Place,Ramen Restaurant,Sandwich Place,Sushi Restaurant,Café,Restaurant,Discount Store,Bubble Tea Shop,Pet Store,1.0,1
8,M3A,North York,Parkwoods,43.753259,-79.329656,Food & Drink Shop,Park,Women's Store,Discount Store,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,1.0,1
9,M3B,North York,Don Mills,43.745906,-79.352188,Beer Store,Coffee Shop,Japanese Restaurant,Gym,Restaurant,Chinese Restaurant,Clothing Store,Caribbean Restaurant,Café,Italian Restaurant,1.0,1
10,M3C,North York,Don Mills,43.7259,-79.340923,Beer Store,Coffee Shop,Japanese Restaurant,Gym,Restaurant,Chinese Restaurant,Clothing Store,Caribbean Restaurant,Café,Italian Restaurant,1.0,1
11,M3H,North York,Bathurst Manor / Wilson Heights / Downsview North,43.754328,-79.442259,Coffee Shop,Bank,Shopping Mall,Middle Eastern Restaurant,Deli / Bodega,Pharmacy,Pizza Place,Ice Cream Shop,Bridal Shop,Diner,1.0,1


In [None]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(northyork_merged['Latitude'], northyork_merged['Longitude'], northyork_merged['Neighborhood'], northyork_merged['Cluster_Labels']):
    label = folium.Popup(str(poi) + 'Cluster' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=black[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [None]:
### Now examine each cluster in North York

In [None]:
#### North York clusters 0,1,2

In [407]:
northyork_cluster_0 = northyork_merged.loc[northyork_merged['Cluster_Labels'] == 0, northyork_merged.columns[[1] + list(range(4, northyork_merged.shape[1]))]]

northyork_cluster_1 = northyork_merged.loc[northyork_merged['Cluster_Labels'] == 1, northyork_merged.columns[[1] + list(range(4, northyork_merged.shape[1]))]]

northyork_cluster_2 = northyork_merged.loc[northyork_merged['Cluster_Labels'] == 2, northyork_merged.columns[[1] + list(range(4, northyork_merged.shape[1]))]]


In [408]:
northyork_cluster_0

Unnamed: 0,Borough,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Total,Cluster_Labels
22,North York,-79.565963,Empanada Restaurant,Women's Store,Distribution Center,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,1.0,0


In [409]:
northyork_cluster_2

Unnamed: 0,Borough,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Total,Cluster_Labels
23,North York,-79.532242,Baseball Field,Women's Store,Dog Run,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dim Sum Restaurant,1.0,2


In [410]:
northyork_cluster_1

Unnamed: 0,Borough,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Total,Cluster_Labels
0,North York,-79.363452,Golf Course,Mediterranean Restaurant,Pool,Fast Food Restaurant,Dog Run,Diner,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,1.0,1
1,North York,-79.346556,Clothing Store,Coffee Shop,Fast Food Restaurant,Restaurant,Japanese Restaurant,Cosmetics Shop,Sporting Goods Shop,Bank,Bakery,Shoe Store,1.0,1
2,North York,-79.385975,Chinese Restaurant,Café,Bank,Japanese Restaurant,Women's Store,Distribution Center,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,1.0,1
5,North York,-79.408493,Coffee Shop,Pizza Place,Ramen Restaurant,Sandwich Place,Sushi Restaurant,Café,Restaurant,Discount Store,Bubble Tea Shop,Pet Store,1.0,1
6,North York,-79.400049,Park,Convenience Store,Bank,Women's Store,Distribution Center,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Cosmetics Shop,Deli / Bodega,1.0,1
7,North York,-79.442259,Coffee Shop,Pizza Place,Ramen Restaurant,Sandwich Place,Sushi Restaurant,Café,Restaurant,Discount Store,Bubble Tea Shop,Pet Store,1.0,1
8,North York,-79.329656,Food & Drink Shop,Park,Women's Store,Discount Store,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,1.0,1
9,North York,-79.352188,Beer Store,Coffee Shop,Japanese Restaurant,Gym,Restaurant,Chinese Restaurant,Clothing Store,Caribbean Restaurant,Café,Italian Restaurant,1.0,1
10,North York,-79.340923,Beer Store,Coffee Shop,Japanese Restaurant,Gym,Restaurant,Chinese Restaurant,Clothing Store,Caribbean Restaurant,Café,Italian Restaurant,1.0,1
11,North York,-79.442259,Coffee Shop,Bank,Shopping Mall,Middle Eastern Restaurant,Deli / Bodega,Pharmacy,Pizza Place,Ice Cream Shop,Bridal Shop,Diner,1.0,1


In [255]:
## Time to look at locations in New York, USA!
### Import New York data on boroughs and neighborhoods

In [30]:
#Using data from a previous module - https://ibm.box.com/shared/static/fbpwbovar7lf8p5sgddm06cgipa2rxpe.json
with open('nyu_2451_34572-geojson.json') as json_data:
    nyu_data = json.load(json_data)
print('New York data imported')

#Create empty pandas dataframe
new_york_neighborhoods = pd.DataFrame(columns=['Borough','Neighborhood','Latitude','Longitude'])

#Transfer json data into the pandas dataframe
for data in nyu_data['features']:
    borough = neighborhood = data['properties']['borough']
    neighborhood = data['properties']['name']
    neighborhood_coordinates = data['geometry']['coordinates']
    neighborhood_latitude = neighborhood_coordinates[1]
    neighborhood_longitude = neighborhood_coordinates[0]
    new_york_neighborhoods = new_york_neighborhoods.append({'Borough': borough,
                                                            'Neighborhood': neighborhood,
                                                            'Latitude': neighborhood_latitude,
                                                            'Longitude': neighborhood_longitude},
                                                           ignore_index=True)
print('New York City data converted to Pandas dataframe')

#Print file as csv
new_york_neighborhoods.to_csv('new_york_neighborhoods.csv', sep=',', encoding='utf-8')

new_york_neighborhoods.head()

New York data imported
New York City data converted to Pandas dataframe


Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [25]:
new_york_neighborhoods.shape

(306, 4)

In [None]:
### Find the coordinates of New York City using Geolibrary

In [27]:
address = 'New York City, NY'

geolocator = Nominatim(user_agent = 'my-application')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The laitude and longitude of New York City are {}, {}'.format(latitude, longitude))

The laitude and longitude of New York City are 40.7127281, -74.0060152


In [32]:
#display a visual map of New York City using the coordinates
new_york_map = folium.Map(location=[latitude, longitude], zoom_start=10)

#add markers
for lat, lng, borough, neighborhood in zip(new_york_neighborhoods['Latitude'], new_york_neighborhoods['Longitude'], new_york_neighborhoods['Borough'], new_york_neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat ,lng],
    radius=5,
    popup=label,
    color='red',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.5,
    parse_html=False).add_to(new_york_map)

new_york_map

In [None]:
#In order simplify the map, let us segment and cluster the Bronx neighborhood area.

In [33]:
bronx_data = new_york_neighborhoods[new_york_neighborhoods['Borough'] == 'Bronx'].reset_index(drop=True)
bronx_data.head()

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Bronx,Wakefield,40.894705,-73.847201
1,Bronx,Co-op City,40.874294,-73.829939
2,Bronx,Eastchester,40.887556,-73.827806
3,Bronx,Fieldston,40.895437,-73.905643
4,Bronx,Riverdale,40.890834,-73.912585


In [None]:
#Next we need the coordinates for Bronx, NY

In [420]:
address = 'Bronx, NY'

geolocator = Nominatim(user_agent = 'my-application')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The latitude and longitude of Bronx, NY are {}, {}.'.format(latitude, longitude))

The latitude and longitude of Bronx, NY are 40.8466508, -73.8785937.


In [421]:
### Going another level below the city level, let us analyze a neighborhood in the Bronx

In [422]:
bronx_data.loc[4, 'Neighborhood']

'Riverdale'

In [423]:
#Riverdale coordinates

riverdale_latitude = bronx_data.loc[4, 'Latitude']
riverdale_longitude = bronx_data.loc[4, 'Longitude']

neighborhood_name = bronx_data.loc[4, 'Neighborhood']
print('The latitude and longitude of {} are {}, {}.'.format(neighborhood_name,
                                                           riverdale_latitude,
                                                           riverdale_longitude))

The latitude and longitude of Riverdale are 40.890834493891305, -73.9125854610857.


In [424]:
### Let us find the Top 100 venues in Riverdale within a 500 meter radius
#### Validate Foursquare credentials

In [433]:
#### Function to explore venues in neighborhood

LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

url

'https://api.foursquare.com/v2/venues/explore?&client_id=NCRIZQSSMOIYVAI10YSPAA5IU2S1P3ZLXKLZZVIYQEVBL0FP&client_secret=WKTI53HHOH5JN1PQO0KRXZ0NYKVEITIF4UEUTM5SZG1ECNHC&v=20180605&ll=40.61731079252983,-74.08173992211962&radius=500&limit=100'

In [434]:
results = requests.get(url).json()

In [435]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [436]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,SUBWAY,Sandwich Place,40.618939,-74.082881
1,MTA Bus - Vanderbilt Av & Osgood Av (S76),Bus Stop,40.617809,-74.081111
2,MTA Bus - Targee St & Vanderbilt Av (S74/S76),Bus Stop,40.614856,-74.084598
3,MTA Bus - Tompkins Av & Vanderbilt Av (S52/S76...,Bus Stop,40.620052,-74.07718


In [437]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

4 venues were returned by Foursquare.


In [438]:
### Analyze each neighborhood in the Bronx

In [439]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    print('Found {} venues in {} neighborhoods.'.format(nearby_venues.shape[0], len(venues_list)))
    
    return(nearby_venues)

In [440]:
bronx_venues = getNearbyVenues(names=bronx_data['Neighborhood'],
                                   latitudes=bronx_data['Latitude'],
                                   longitudes=bronx_data['Longitude'])

Found 1223 venues in 52 neighborhoods.


In [442]:
bronx_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Wakefield,40.894705,-73.847201,Lollipops Gelato,40.894123,-73.845892,Dessert Shop
1,Wakefield,40.894705,-73.847201,Rite Aid,40.896649,-73.844846,Pharmacy
2,Wakefield,40.894705,-73.847201,Carvel Ice Cream,40.890487,-73.848568,Ice Cream Shop
3,Wakefield,40.894705,-73.847201,Walgreens,40.896528,-73.8447,Pharmacy
4,Wakefield,40.894705,-73.847201,Dunkin',40.890459,-73.849089,Donut Shop


In [443]:
print(bronx_venues.shape)
bronx_venues.tail()

(1223, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
1218,Kingsbridge Heights,40.870392,-73.901523,Parada Tropical,40.872606,-73.905942,Food Truck
1219,Kingsbridge Heights,40.870392,-73.901523,Patie Food Cart,40.868198,-73.896826,Food Truck
1220,Kingsbridge Heights,40.870392,-73.901523,Caridad Spanish Restaurant,40.871832,-73.9067,Spanish Restaurant
1221,Kingsbridge Heights,40.870392,-73.901523,King's Farm,40.867376,-73.897498,Restaurant
1222,Kingsbridge Heights,40.870392,-73.901523,Lehman College Ballfield,40.871007,-73.895781,Baseball Field


In [444]:
#Venues per Neighborhood
bronx_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Allerton,31,31,31,31,31,31
Baychester,19,19,19,19,19,19
Bedford Park,35,35,35,35,35,35
Belmont,99,99,99,99,99,99
Bronxdale,15,15,15,15,15,15
Castle Hill,9,9,9,9,9,9
City Island,27,27,27,27,27,27
Claremont Village,18,18,18,18,18,18
Clason Point,9,9,9,9,9,9
Co-op City,16,16,16,16,16,16


In [445]:
print('There are {} distinct venues in {} categories.'.format(
    len(bronx_venues['Venue'].unique()),len(bronx_venues['Venue Category'].unique())))

There are 885 distinct venues in 169 categories.


In [446]:
# one hot encoding - same thing we did with North York in Toronto
bronx_onehot = pd.get_dummies(bronx_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
bronx_onehot['Neighborhood'] = bronx_venues['Neighborhood'] 


neighbor = bronx_onehot['Neighborhood']
bronx_onehot.drop(labels=['Neighborhood'], axis=1,inplace = True)
bronx_onehot.insert(0, 'Neighborhood', neighbor)

bronx_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,African Restaurant,American Restaurant,Arcade,Arepa Restaurant,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Court,Beer Bar,Board Shop,Boat or Ferry,Bookstore,Bowling Alley,Breakfast Spot,Brewery,Buffet,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Café,Candy Store,Caribbean Restaurant,Check Cashing Service,Cheese Shop,Chinese Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Distillery,Dive Bar,Doctor's Office,Donut Shop,Eastern European Restaurant,Electronics Store,Eye Doctor,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flower Shop,Food,Food & Drink Shop,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gas Station,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Health & Beauty Service,Historic Site,History Museum,Hobby Shop,Home Service,Hookah Bar,Hotel,IT Services,Ice Cream Shop,Indian Restaurant,Indie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kids Store,Lake,Latin American Restaurant,Laundromat,Lawyer,Liquor Store,Lounge,Market,Martial Arts Dojo,Mattress Store,Medical School,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Music Venue,Nightclub,Optical Shop,Outdoor Sculpture,Outlet Store,Paella Restaurant,Paper / Office Supplies Store,Park,Performing Arts Venue,Peruvian Restaurant,Pet Store,Pharmacy,Piano Bar,Pizza Place,Platform,Playground,Plaza,Pool,Pub,Recreation Center,Rental Car Location,Restaurant,River,Salon / Barbershop,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shipping Store,Shoe Store,Shopping Mall,Smoke Shop,Social Club,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Bar,Storage Facility,Supermarket,Supplement Shop,Sushi Restaurant,Tattoo Parlor,Tennis Court,Tennis Stadium,Thai Restaurant,Thrift / Vintage Store,Track,Trail,Train,Train Station,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Waste Facility,Wine Shop,Wings Joint,Women's Store
0,Wakefield,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Wakefield,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Wakefield,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Wakefield,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Wakefield,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
#### Now let us group by each neighborhood in the Bronx

In [447]:
bronx_grouped = bronx_onehot.groupby('Neighborhood').mean().reset_index()
bronx_grouped

Unnamed: 0,Neighborhood,Accessories Store,African Restaurant,American Restaurant,Arcade,Arepa Restaurant,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Court,Beer Bar,Board Shop,Boat or Ferry,Bookstore,Bowling Alley,Breakfast Spot,Brewery,Buffet,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Café,Candy Store,Caribbean Restaurant,Check Cashing Service,Cheese Shop,Chinese Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Distillery,Dive Bar,Doctor's Office,Donut Shop,Eastern European Restaurant,Electronics Store,Eye Doctor,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flower Shop,Food,Food & Drink Shop,Food Truck,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gas Station,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Harbor / Marina,Health & Beauty Service,Historic Site,History Museum,Hobby Shop,Home Service,Hookah Bar,Hotel,IT Services,Ice Cream Shop,Indian Restaurant,Indie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kids Store,Lake,Latin American Restaurant,Laundromat,Lawyer,Liquor Store,Lounge,Market,Martial Arts Dojo,Mattress Store,Medical School,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Music Venue,Nightclub,Optical Shop,Outdoor Sculpture,Outlet Store,Paella Restaurant,Paper / Office Supplies Store,Park,Performing Arts Venue,Peruvian Restaurant,Pet Store,Pharmacy,Piano Bar,Pizza Place,Platform,Playground,Plaza,Pool,Pub,Recreation Center,Rental Car Location,Restaurant,River,Salon / Barbershop,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shipping Store,Shoe Store,Shopping Mall,Smoke Shop,Social Club,Soup Place,South American Restaurant,Southern / Soul Food Restaurant,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Bar,Storage Facility,Supermarket,Supplement Shop,Sushi Restaurant,Tattoo Parlor,Tennis Court,Tennis Stadium,Thai Restaurant,Thrift / Vintage Store,Track,Trail,Train,Train Station,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Waste Facility,Wine Shop,Wings Joint,Women's Store
0,Allerton,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.0,0.064516,0.0,0.0,0.0,0.0,0.032258,0.0,0.064516,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.064516,0.064516,0.032258,0.0,0.032258,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.032258,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.193548,0.0,0.032258,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.032258,0.0,0.0,0.0,0.064516,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Baychester,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.105263,0.0,0.052632,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.052632,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Bedford Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.0,0.0,0.0,0.114286,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.085714,0.0,0.0,0.114286,0.028571,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.085714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.028571,0.0,0.085714,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Belmont,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.050505,0.030303,0.020202,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.020202,0.0,0.0,0.0,0.010101,0.0,0.0,0.020202,0.0,0.0,0.010101,0.0,0.0,0.0,0.080808,0.010101,0.030303,0.010101,0.010101,0.0,0.0,0.0,0.030303,0.010101,0.010101,0.0,0.0,0.010101,0.0,0.020202,0.0,0.0,0.020202,0.0,0.0,0.010101,0.0,0.0,0.010101,0.0,0.010101,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.181818,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020202,0.0,0.010101,0.010101,0.0,0.0,0.0,0.0,0.040404,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.010101,0.010101,0.090909,0.0,0.0,0.010101,0.0,0.0,0.0,0.010101,0.010101,0.0,0.0,0.020202,0.0,0.0,0.0,0.020202,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010101,0.0
4,Bronxdale,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.066667,0.066667,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Castle Hill,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.111111,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.111111,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,City Island,0.0,0.0,0.037037,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.037037,0.037037,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.037037,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.074074,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.074074,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037037,0.0,0.0
7,Claremont Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.055556,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.111111,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Clason Point,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.444444,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Co-op City,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1875,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [450]:
#With the neighborhoods grouped, let us look at the top 5 venues in each neighborhood

num_top_venues = 5

for x in bronx_grouped['Neighborhood']:
    print("----"+x+"----")
    temp = bronx_grouped[bronx_grouped['Neighborhood'] == x].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Allerton----
                venue  freq
0         Pizza Place  0.19
1    Department Store  0.06
2       Deli / Bodega  0.06
3         Bus Station  0.06
4  Chinese Restaurant  0.06


----Baychester----
                  venue  freq
0            Donut Shop  0.11
1        Baseball Field  0.05
2        Discount Store  0.05
3    Mexican Restaurant  0.05
4  Gym / Fitness Center  0.05


----Bedford Park----
                venue  freq
0               Diner  0.11
1  Chinese Restaurant  0.11
2       Deli / Bodega  0.09
3         Pizza Place  0.09
4  Mexican Restaurant  0.09


----Belmont----
                venue  freq
0  Italian Restaurant  0.18
1         Pizza Place  0.09
2       Deli / Bodega  0.08
3              Bakery  0.05
4  Mexican Restaurant  0.04


----Bronxdale----
                venue  freq
0       Deli / Bodega  0.07
1                Bank  0.07
2         Supermarket  0.07
3      Breakfast Spot  0.07
4  Mexican Restaurant  0.07


----Castle Hill----
            venue  freq
0  

In [451]:
#Organize venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [454]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = bronx_grouped['Neighborhood']

for ind in np.arange(bronx_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(bronx_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Allerton,Pizza Place,Supermarket,Deli / Bodega,Department Store,Chinese Restaurant,Bus Station,Playground,Dessert Shop,Discount Store,Check Cashing Service
1,Baychester,Donut Shop,Convenience Store,Baseball Field,Pet Store,Discount Store,Sandwich Place,Electronics Store,Mexican Restaurant,Fast Food Restaurant,Men's Store
2,Bedford Park,Chinese Restaurant,Diner,Mexican Restaurant,Pizza Place,Deli / Bodega,Bus Station,Sandwich Place,Spanish Restaurant,Train Station,Grocery Store
3,Belmont,Italian Restaurant,Pizza Place,Deli / Bodega,Bakery,Mexican Restaurant,Grocery Store,Dessert Shop,Bank,Donut Shop,Sandwich Place
4,Bronxdale,Spanish Restaurant,Bank,Performing Arts Venue,Park,Paper / Office Supplies Store,Coffee Shop,Chinese Restaurant,Eastern European Restaurant,Mexican Restaurant,Breakfast Spot
5,Castle Hill,Pizza Place,Baseball Field,Cosmetics Shop,Deli / Bodega,Pharmacy,Diner,Park,Market,Bank,Fish & Chips Shop
6,City Island,Harbor / Marina,Seafood Restaurant,Thrift / Vintage Store,Ice Cream Shop,Baseball Field,Diner,Pizza Place,History Museum,Music Venue,Smoke Shop
7,Claremont Village,Chinese Restaurant,Park,Bakery,Grocery Store,Deli / Bodega,Flower Shop,Pizza Place,Bus Station,Caribbean Restaurant,Liquor Store
8,Clason Point,Park,South American Restaurant,Pool,Boat or Ferry,Grocery Store,Bus Stop,Women's Store,Donut Shop,Fast Food Restaurant,Farmers Market
9,Co-op City,Bus Station,Fast Food Restaurant,Ice Cream Shop,Bagel Shop,Pharmacy,Park,Restaurant,Discount Store,Optical Shop,Pizza Place


In [None]:
#### Time to cluster these venues using k-means

In [458]:
# set number of clusters
kclusters = 5

bronx_grouped_clustering = bronx_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=2).fit(bronx_grouped_clustering)

# check cluster labels generated for each row in the dataframe
#kmeans.labels_[0:10] 
kmeans.labels_

array([1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 4, 1, 0, 1, 0, 2, 0, 0, 1, 0,
       1, 0, 1, 3, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0,
       0, 0, 1, 0, 0, 0, 0, 0], dtype=int32)

In [None]:
#### Now let us make a dataframe from this information

In [460]:
bronx_merged = bronx_data

# add clustering labels
bronx_merged['Cluster_Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
bronx_merged = bronx_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

bronx_merged.head() 

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster_Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bronx,Wakefield,40.894705,-73.847201,1,Pharmacy,Ice Cream Shop,Sandwich Place,Gas Station,Donut Shop,Laundromat,Caribbean Restaurant,Dessert Shop,Discount Store,Distillery
1,Bronx,Co-op City,40.874294,-73.829939,0,Bus Station,Fast Food Restaurant,Ice Cream Shop,Bagel Shop,Pharmacy,Park,Restaurant,Discount Store,Optical Shop,Pizza Place
2,Bronx,Eastchester,40.887556,-73.827806,0,Caribbean Restaurant,Diner,Deli / Bodega,Cosmetics Shop,Food & Drink Shop,Pizza Place,Platform,Convenience Store,Chinese Restaurant,Donut Shop
3,Bronx,Fieldston,40.895437,-73.905643,0,Plaza,Medical School,River,Bus Station,Donut Shop,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Eye Doctor,Electronics Store
4,Bronx,Riverdale,40.890834,-73.912585,0,Park,Bus Station,Plaza,Gym,Food Truck,Baseball Field,Bank,Playground,Home Service,Gym / Fitness Center


In [None]:
#### Since it is now in a dataframe, we can use it to map the venue clusters

In [461]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(bronx_merged['Latitude'], bronx_merged['Longitude'], bronx_merged['Neighborhood'], bronx_merged['Cluster_Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [None]:
#### With the clusters defined, now let us examine each one in detail

In [462]:
bronx_cluster_0 = bronx_merged.loc[bronx_merged['Cluster_Labels'] == 0, bronx_merged.columns[[1] + list(range(4, bronx_merged.shape[1]))]]

bronx_cluster_1 = bronx_merged.loc[bronx_merged['Cluster_Labels'] == 1, bronx_merged.columns[[1] + list(range(4, bronx_merged.shape[1]))]]

bronx_cluster_2 = bronx_merged.loc[bronx_merged['Cluster_Labels'] == 2, bronx_merged.columns[[1] + list(range(4, bronx_merged.shape[1]))]]

bronx_cluster_3 = bronx_merged.loc[bronx_merged['Cluster_Labels'] == 3, bronx_merged.columns[[1] + list(range(4, bronx_merged.shape[1]))]]

bronx_cluster_4 = bronx_merged.loc[bronx_merged['Cluster_Labels'] == 4, bronx_merged.columns[[1] + list(range(4, bronx_merged.shape[1]))]]

In [463]:
bronx_cluster_0

Unnamed: 0,Neighborhood,Cluster_Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Co-op City,0,Bus Station,Fast Food Restaurant,Ice Cream Shop,Bagel Shop,Pharmacy,Park,Restaurant,Discount Store,Optical Shop,Pizza Place
2,Eastchester,0,Caribbean Restaurant,Diner,Deli / Bodega,Cosmetics Shop,Food & Drink Shop,Pizza Place,Platform,Convenience Store,Chinese Restaurant,Donut Shop
3,Fieldston,0,Plaza,Medical School,River,Bus Station,Donut Shop,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Eye Doctor,Electronics Store
4,Riverdale,0,Park,Bus Station,Plaza,Gym,Food Truck,Baseball Field,Bank,Playground,Home Service,Gym / Fitness Center
5,Kingsbridge,0,Pizza Place,Bar,Sandwich Place,Supermarket,Latin American Restaurant,Mexican Restaurant,Spanish Restaurant,Pharmacy,Fried Chicken Joint,Donut Shop
6,Woodlawn,0,Pub,Pizza Place,Deli / Bodega,Playground,Bar,Donut Shop,Liquor Store,Food & Drink Shop,Food Truck,Rental Car Location
7,Norwood,0,Pizza Place,Bank,Park,Deli / Bodega,Pharmacy,Bus Station,Spanish Restaurant,Supermarket,Restaurant,Mobile Phone Shop
9,Baychester,0,Donut Shop,Convenience Store,Baseball Field,Pet Store,Discount Store,Sandwich Place,Electronics Store,Mexican Restaurant,Fast Food Restaurant,Men's Store
10,Pelham Parkway,0,Pizza Place,Chinese Restaurant,Italian Restaurant,Ice Cream Shop,Sushi Restaurant,Coffee Shop,Plaza,Donut Shop,Sandwich Place,Bus Station
11,City Island,0,Harbor / Marina,Seafood Restaurant,Thrift / Vintage Store,Ice Cream Shop,Baseball Field,Diner,Pizza Place,History Museum,Music Venue,Smoke Shop


In [464]:
bronx_cluster_1

Unnamed: 0,Neighborhood,Cluster_Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Wakefield,1,Pharmacy,Ice Cream Shop,Sandwich Place,Gas Station,Donut Shop,Laundromat,Caribbean Restaurant,Dessert Shop,Discount Store,Distillery
13,University Heights,1,Pizza Place,Fast Food Restaurant,Fried Chicken Joint,Bakery,Convenience Store,Latin American Restaurant,Chinese Restaurant,Donut Shop,Sandwich Place,Shoe Store
15,Fordham,1,Fast Food Restaurant,Mobile Phone Shop,Shoe Store,Bank,Gym / Fitness Center,Donut Shop,Pizza Place,Pharmacy,Spanish Restaurant,Supplement Shop
20,Mott Haven,1,Spanish Restaurant,Gym,Donut Shop,Grocery Store,Pizza Place,Fish & Chips Shop,Storage Facility,Burger Joint,Peruvian Restaurant,Bakery
22,Longwood,1,Sandwich Place,Latin American Restaurant,Wine Shop,Fast Food Restaurant,Diner,Train,Grocery Store,Donut Shop,Fish & Chips Shop,Farmers Market
24,Morrisania,1,Discount Store,Donut Shop,Grocery Store,Fast Food Restaurant,Ice Cream Shop,Liquor Store,Chinese Restaurant,Sandwich Place,Seafood Restaurant,Bus Stop
26,Clason Point,1,Park,South American Restaurant,Pool,Boat or Ferry,Grocery Store,Bus Stop,Women's Store,Donut Shop,Fast Food Restaurant,Farmers Market
28,Country Club,1,Sandwich Place,Playground,Chinese Restaurant,Doctor's Office,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Eye Doctor,Electronics Store
32,Morris Park,1,Pizza Place,Bakery,Burger Joint,Deli / Bodega,Buffet,Sandwich Place,Salon / Barbershop,Supermarket,Donut Shop,Juice Bar
34,Spuyten Duyvil,1,Pizza Place,Tennis Stadium,Pharmacy,Park,Scenic Lookout,Bank,Tennis Court,Thai Restaurant,Farmers Market,Eye Doctor


In [465]:
bronx_cluster_2

Unnamed: 0,Neighborhood,Cluster_Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,West Farms,2,Bus Station,Park,Metro Station,Playground,Bank,Diner,Convenience Store,Coffee Shop,Sandwich Place,Donut Shop


In [466]:
bronx_cluster_3

Unnamed: 0,Neighborhood,Cluster_Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Williamsbridge,3,Playground,Bar,Soup Place,Nightclub,Caribbean Restaurant,Convenience Store,Dessert Shop,Diner,Fish & Chips Shop,Fast Food Restaurant
25,Soundview,3,Chinese Restaurant,Grocery Store,Breakfast Spot,Pharmacy,Burger Joint,Discount Store,Video Store,Mobile Phone Shop,Bus Station,Bus Stop
42,Concourse,3,Grocery Store,Bakery,Ice Cream Shop,Spanish Restaurant,Italian Restaurant,Fried Chicken Joint,Food Truck,Metro Station,Donut Shop,Pharmacy


In [467]:
bronx_cluster_4

Unnamed: 0,Neighborhood,Cluster_Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Bedford Park,4,Chinese Restaurant,Diner,Mexican Restaurant,Pizza Place,Deli / Bodega,Bus Station,Sandwich Place,Spanish Restaurant,Train Station,Grocery Store
