# **City Analysis** #
We are exploring neighbourhoods to open new bakery shops. To this end, we are going to analyse three cities: New York City, Toronto, and Chicago.

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print('Libraries imported.')

Libraries imported.


## **New York City** ##

We will start with New York City data. We will import data as we did for our lab exercise.

In [2]:
# Import Data
!wget -q -O 'newyork_data.json' https://cocl.us/new_york_dataset

In [3]:
# Create DataFrame and delete "nan" value(s)
with open('newyork_data.json') as json_data:
    newyork_data = json.load(json_data)

neighborhoods_data = newyork_data['features']

# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)

for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['borough'] 
    neighborhood_name = data['properties']['name']
        
    neighborhood_latlon = data['geometry']['coordinates']
    neighborhood_lat = neighborhood_latlon[1]
    neighborhood_lon = neighborhood_latlon[0]
    
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

#### Use geopy library to get the latitude and longitude values of Manhattan.

In [4]:
address = 'Manhattan, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 40.7900869, -73.9598295.


#### Create a map of New York Manhattan with neighborhoods superimposed on top. ####

We will use Manhattan data only.

In [5]:
# create map of Manhattan using latitude and longitude values
manhattan_data = neighborhoods[neighborhoods['Borough'] == 'Manhattan'].reset_index(drop=True)
map_manhattan = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(manhattan_data['Latitude'], manhattan_data['Longitude'], manhattan_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_manhattan)  

print(manhattan_data.shape)
map_manhattan

(40, 4)


## **Toronto** ##

We will move on to Toronto data. We will import data as we did for our last exercise.

In [6]:
# Read the webpage
from bs4 import BeautifulSoup
import urllib3

http = urllib3.PoolManager()
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
toronto_postal = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
response = http.request('GET', toronto_postal)
soup = BeautifulSoup(response.data)


# Create DataFrame and delete "nan" value(s)
My_table = soup.find('table',{'class':'wikitable sortable'})

Postcode = []
Borough = []
Neighbrouhood = []
for item in My_table:
    Postcode.append(My_table.get('Postcode'))
    Borough.append(My_table.get('Borough'))
    Neighbrouhood.append(My_table.get('Neighbrouhood'))

number_of_rows = len(My_table.findAll(lambda tag: tag.name == 'tr' and tag.findParent('table') == My_table))

new_table = pd.DataFrame(columns=['PostalCode','Borough','Neighbourhood'], index = range(0,number_of_rows))

row_marker = 0
for row in My_table.find_all('tr'):
    column_marker = 0
    columns = row.find_all('td')
    for column in columns:
        new_table.iat[row_marker,column_marker] = column.get_text()
        column_marker += 1
    if len(columns) > 0:
        row_marker += 1
new_table['Neighbourhood'] = new_table['Neighbourhood'].astype(str).str.replace('\n', '')
new_table.dropna(inplace=True)

# We will remove rows with a borough that is Not assigned and if there is a borough value but neighbourhood is not assigned, we will assign borough as neighbourhood.
new_table.drop(new_table[new_table['Borough'] == 'Not assigned'].index, inplace=True)
new_table.loc[new_table['Neighbourhood'] == 'Not assigned', 'Neighbourhood'] = new_table.loc[new_table['Neighbourhood'] == 'Not assigned', 'Borough']

final_table =new_table.groupby(by=['PostalCode', 'Borough'])['Neighbourhood'].agg([('Neighbourhood', ', '.join)]).reset_index()

df_Geospatial_Coordinate = pd.read_csv('Geospatial_Coordinates.csv')

df_Geospatial_Coordinate.rename(columns={'Postal Code':'PostalCode'}, inplace = True)
merged_table = pd.merge(final_table, df_Geospatial_Coordinate, left_on='PostalCode', right_on='PostalCode')

#### Use geopy library to get the latitude and longitude values of Toronto. ####

In [7]:
toronto_data = merged_table[merged_table['Borough'].str.contains("Toronto") ].reset_index(drop=True)

address = 'Toronto, ON'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  

print(toronto_data.shape)
map_toronto

The geograpical coordinate of Toronto are 43.653963, -79.387207.
(38, 5)


## **Chicago** ##

We will move on to Chicago data. We will import data from the spreadsheet to make this exercise easy.

In [8]:
Chicago_Neighbourhood = pd.read_excel('Chicago.xlsx', sheet_name='Sheet1')
Chicago_geospatial = pd.read_excel('Chicago.xlsx', sheet_name='Sheet2')

final_Chicago_Neighbourhood =Chicago_Neighbourhood.groupby(by=['Community Area'])['Neighbourhood'].agg([('Neighbourhood', ', '.join)]).reset_index()

merged_table_Chicago = final_Chicago_Neighbourhood.merge(Chicago_geospatial, left_on='Community Area', right_on='Community Area')
merged_table_Chicago.head()
#merged_table_Chicago = pd.read_excel('Chicago.xlsx', sheet_name='Sheet4')

Unnamed: 0,Community Area,Neighbourhood,Latitude,Longitude
0,Albany Park,"Albany Park, Mayfair, North Mayfair, Ravenswoo...",41.971937,-87.716174
1,Archer Heights,Archer Heights,41.81,-87.73
2,Armour Square,"Armour Square, Chinatown, Wentworth Gardens",41.840033,-87.633107
3,Ashburn,"Ashburn, Ashburn Estates, Beverly View, Crestl...",41.747533,-87.711163
4,Auburn Gresham,"Auburn Gresham, Gresham",41.750474,-87.664304


#### Use geopy library to get the latitude and longitude values of Chicago. ####

In [9]:
chicago_data = merged_table_Chicago

address = 'Chicago, IL'

geolocator = Nominatim(user_agent="ch_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Chicago are {}, {}.'.format(latitude, longitude))

map_chicago = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(chicago_data['Latitude'], chicago_data['Longitude'], chicago_data['Community Area']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_chicago)  
    
print(chicago_data.shape)
map_chicago

The geograpical coordinate of Chicago are 41.8755616, -87.6244212.
(82, 4)


## **Create a function to load venues in neighbourhood** ##

We are going to use the same function, which we used in our previous exercise to load venues in neighbourhoods.

In [10]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

We will define our Foursquare ID, Foursquare Secret, Version, Limit and radius.

In [11]:
CLIENT_ID = '1150N5KTOFILOY5YMCTYKZDGFH0O3MXETZNNRIQ0EL2HYJE4' # your Foursquare ID
CLIENT_SECRET = '1SROTYX4CGZKJJDW1OSH4C5LN1ARJUAJGBHDMPPOFAV2BHUT' # your Foursquare Secret
VERSION = '20190406'
LIMIT = 100
radius = 500

Let's get venues with respect to neighbourhoods in three cities.

In [12]:
manhattan_venues = getNearbyVenues(names=manhattan_data['Neighborhood'],
                                   latitudes=manhattan_data['Latitude'],
                                   longitudes=manhattan_data['Longitude']
                                  )
toronto_venues = getNearbyVenues(names=toronto_data['Neighbourhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )
chicago_venues = getNearbyVenues(names=chicago_data['Neighbourhood'],
                                   latitudes=chicago_data['Latitude'],
                                   longitudes=chicago_data['Longitude']
                                  )

Marble Hill
Chinatown
Washington Heights
Inwood
Hamilton Heights
Manhattanville
Central Harlem
East Harlem
Upper East Side
Yorkville
Lenox Hill
Roosevelt Island
Upper West Side
Lincoln Square
Clinton
Midtown
Murray Hill
Chelsea
Greenwich Village
East Village
Lower East Side
Tribeca
Little Italy
Soho
West Village
Manhattan Valley
Morningside Heights
Gramercy
Battery Park City
Financial District
Carnegie Hill
Noho
Civic Center
Midtown South
Sutton Place
Turtle Bay
Tudor City
Stuyvesant Town
Flatiron
Hudson Yards
The Beaches
The Danforth West, Riverdale
The Beaches West, India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park, Summerhill East
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront, Regent Park
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exch

## **Analyze each neighbourhood** ##

In [13]:
# Manhattan
# one hot encoding
manhattan_onehot = pd.get_dummies(manhattan_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
manhattan_onehot['Neighborhood'] = manhattan_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns1 = [manhattan_onehot.columns[-1]] + list(manhattan_onehot.columns[:-1])
manhattan_onehot = manhattan_onehot[fixed_columns1]

# Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
manhattan_grouped = manhattan_onehot.groupby('Neighborhood').mean().reset_index()

In [14]:
# Toronto
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns2 = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns2]

# Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()

In [15]:
# Chicago
# one hot encoding
chicago_onehot = pd.get_dummies(chicago_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
chicago_onehot['Neighbourhood'] = chicago_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns3 = [chicago_onehot.columns[-1]] + list(chicago_onehot.columns[:-1])
chicago_onehot = chicago_onehot[fixed_columns3]

# Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
chicago_grouped = chicago_onehot.groupby('Neighbourhood').mean().reset_index()

We will use the same function to sort the venues in descending order.

In [16]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Create the new dataframe and display the top 10 venues for each neighborhood.

In [17]:
# Manhattan
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted1 = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted1['Neighbourhood'] = manhattan_grouped['Neighborhood']

for ind in np.arange(manhattan_grouped.shape[0]):
    neighborhoods_venues_sorted1.iloc[ind, 1:] = return_most_common_venues(manhattan_grouped.iloc[ind, :], num_top_venues)
    
neighborhoods_venues_sorted1.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Battery Park City,Coffee Shop,Park,Hotel,Gym,Italian Restaurant,Shopping Mall,Clothing Store,Wine Shop,Sandwich Place,Pizza Place
1,Carnegie Hill,Pizza Place,Coffee Shop,Bar,Café,Cosmetics Shop,Yoga Studio,Spa,Bookstore,French Restaurant,Grocery Store
2,Central Harlem,African Restaurant,Cosmetics Shop,American Restaurant,French Restaurant,Gym / Fitness Center,Chinese Restaurant,Seafood Restaurant,Bookstore,Library,Beer Bar
3,Chelsea,Coffee Shop,Ice Cream Shop,Italian Restaurant,Nightclub,Bakery,Theater,Hotel,American Restaurant,Seafood Restaurant,French Restaurant
4,Chinatown,Chinese Restaurant,Dim Sum Restaurant,Cocktail Bar,American Restaurant,Vietnamese Restaurant,Bar,Bubble Tea Shop,Noodle House,Hotpot Restaurant,Salon / Barbershop


In [19]:
# Toronto
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted2 = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted2['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted2.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted2.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Thai Restaurant,Steakhouse,American Restaurant,Burger Joint,Restaurant,Sushi Restaurant,Asian Restaurant,Hotel
1,Berczy Park,Coffee Shop,Cocktail Bar,Cheese Shop,Bakery,Steakhouse,Seafood Restaurant,Farmers Market,Café,Pub,Restaurant
2,"Brockton, Exhibition Place, Parkdale Village",Breakfast Spot,Café,Coffee Shop,Yoga Studio,Bar,Burrito Place,Restaurant,Caribbean Restaurant,Climbing Gym,Gym
3,Business Reply Mail Processing Centre 969 Eastern,Light Rail Station,Yoga Studio,Park,Smoke Shop,Skate Park,Brewery,Burrito Place,Restaurant,Recording Studio,Comic Shop
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Service,Airport Lounge,Airport Terminal,Boat or Ferry,Sculpture Garden,Harbor / Marina,Plane,Airport Gate,Airport Food Court,Airport


In [20]:
# Chicago
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted3 = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted3['Neighbourhood'] = chicago_grouped['Neighbourhood']

for ind in np.arange(chicago_grouped.shape[0]):
    neighborhoods_venues_sorted3.iloc[ind, 1:] = return_most_common_venues(chicago_grouped.iloc[ind, :], num_top_venues)
   
neighborhoods_venues_sorted3.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Albany Park, Mayfair, North Mayfair, Ravenswoo...",Sandwich Place,Shoe Store,Fried Chicken Joint,Chinese Restaurant,Café,Korean Restaurant,Cocktail Bar,Bakery,Bus Station,Grocery Store
1,"Altgeld Gardens, Eden Green, Golden Gate, Rive...",Grocery Store,Park,Yoga Studio,Dry Cleaner,Flea Market,Fish Market,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant,Fast Food Restaurant
2,"Andersonville, Edgewater, Edgewater Beach, Edg...",Sandwich Place,Mexican Restaurant,Sushi Restaurant,Asian Restaurant,Restaurant,Coffee Shop,Pharmacy,Thai Restaurant,Deli / Bodega,Mobile Phone Shop
3,Archer Heights,Mexican Restaurant,Bakery,Food,Bar,Seafood Restaurant,Floating Market,Fish Market,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant
4,"Armour Square, Chinatown, Wentworth Gardens",Chinese Restaurant,Italian Restaurant,Cosmetics Shop,Sports Bar,Sandwich Place,Asian Restaurant,Breakfast Spot,Hot Dog Joint,Department Store,Electronics Store


## **Cluster neighbourhoods** ##

We will run *k*-means clustering with 5 clusters.

In [21]:
# set number of clusters
kclusters = 5

In [22]:
# Manhattan
manhattan_grouped_clustering = manhattan_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans1 = KMeans(n_clusters=kclusters, random_state=0).fit(manhattan_grouped_clustering)

# add clustering labels
neighborhoods_venues_sorted1.insert(0, 'Cluster Labels', kmeans1.labels_)

manhattan_merged = manhattan_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
manhattan_merged = manhattan_merged.join(neighborhoods_venues_sorted1.set_index('Neighbourhood'), on='Neighborhood')

manhattan_merged.head() # check the last columns!

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Manhattan,Marble Hill,40.876551,-73.91066,2,Coffee Shop,Discount Store,Sandwich Place,Tennis Stadium,Gym,Big Box Store,Supplement Shop,Shoe Store,Seafood Restaurant,Yoga Studio
1,Manhattan,Chinatown,40.715618,-73.994279,1,Chinese Restaurant,Dim Sum Restaurant,Cocktail Bar,American Restaurant,Vietnamese Restaurant,Bar,Bubble Tea Shop,Noodle House,Hotpot Restaurant,Salon / Barbershop
2,Manhattan,Washington Heights,40.851903,-73.9369,0,Café,Bakery,Grocery Store,Mobile Phone Shop,Latin American Restaurant,Mexican Restaurant,Spanish Restaurant,Clothing Store,Gym,Supermarket
3,Manhattan,Inwood,40.867684,-73.92121,0,Mexican Restaurant,Café,Lounge,Pizza Place,Park,Bakery,Frozen Yogurt Shop,Restaurant,Deli / Bodega,Chinese Restaurant
4,Manhattan,Hamilton Heights,40.823604,-73.949688,0,Mexican Restaurant,Coffee Shop,Pizza Place,Café,Yoga Studio,Indian Restaurant,Sushi Restaurant,Sandwich Place,Deli / Bodega,Liquor Store


In [23]:
# Toronto
toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans2 = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# add clustering labels
neighborhoods_venues_sorted2.insert(0, 'Cluster Labels', kmeans2.labels_)

toronto_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted2.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Coffee Shop,Pub,Neighborhood,Health Food Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Dessert Shop,Donut Shop
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,0,Greek Restaurant,Coffee Shop,Ice Cream Shop,Bookstore,Furniture / Home Store,Italian Restaurant,Cosmetics Shop,Brewery,Bubble Tea Shop,Restaurant
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572,0,Park,Sandwich Place,Pizza Place,Light Rail Station,Ice Cream Shop,Liquor Store,Burger Joint,Burrito Place,Fast Food Restaurant,Fish & Chips Shop
3,M4M,East Toronto,Studio District,43.659526,-79.340923,0,Café,Coffee Shop,Italian Restaurant,American Restaurant,Bakery,Brewery,Coworking Space,Bookstore,Latin American Restaurant,Middle Eastern Restaurant
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,4,Park,Swim School,Bus Line,Yoga Studio,Diner,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


In [24]:
# Chicago
chicago_grouped_clustering = chicago_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans3 = KMeans(n_clusters=kclusters, random_state=0).fit(chicago_grouped_clustering)

# add clustering labels
neighborhoods_venues_sorted3.insert(0, 'Cluster Labels', kmeans3.labels_)
#neighborhoods_venues_sorted3['Cluster Labels'] = neighborhoods_venues_sorted3['Cluster Labels'].astype(int)
chicago_merged = chicago_data

# merge chicago_grouped with chicago_data to add latitude/longitude for each neighborhood
chicago_merged = chicago_merged.join(neighborhoods_venues_sorted3.set_index('Neighbourhood'), on='Neighbourhood')

chicago_merged.head() # check the last columns!

Unnamed: 0,Community Area,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Albany Park,"Albany Park, Mayfair, North Mayfair, Ravenswoo...",41.971937,-87.716174,2.0,Sandwich Place,Shoe Store,Fried Chicken Joint,Chinese Restaurant,Café,Korean Restaurant,Cocktail Bar,Bakery,Bus Station,Grocery Store
1,Archer Heights,Archer Heights,41.81,-87.73,0.0,Mexican Restaurant,Bakery,Food,Bar,Seafood Restaurant,Floating Market,Fish Market,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant
2,Armour Square,"Armour Square, Chinatown, Wentworth Gardens",41.840033,-87.633107,2.0,Chinese Restaurant,Italian Restaurant,Cosmetics Shop,Sports Bar,Sandwich Place,Asian Restaurant,Breakfast Spot,Hot Dog Joint,Department Store,Electronics Store
3,Ashburn,"Ashburn, Ashburn Estates, Beverly View, Crestl...",41.747533,-87.711163,2.0,Cosmetics Shop,Electronics Store,Light Rail Station,Italian Restaurant,Pizza Place,Cupcake Shop,Currency Exchange,Flea Market,Fish Market,Fish & Chips Shop
4,Auburn Gresham,"Auburn Gresham, Gresham",41.750474,-87.664304,2.0,Fast Food Restaurant,Lounge,Discount Store,Greek Restaurant,Cosmetics Shop,Pharmacy,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant,Dry Cleaner


#### Visualizing the resulting clusters ####

In [25]:
# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

In [26]:
# Manhattan
address = 'Manhattan, NY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
# create map
map_clusters1 = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(manhattan_merged['Latitude'], manhattan_merged['Longitude'], manhattan_merged['Neighborhood'], manhattan_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters1)
       
map_clusters1

In [27]:
# Toronto
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
# create map
map_clusters2 = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters2)
       
map_clusters2

In [28]:
# Drop NaN value

chicago_merged = chicago_merged.dropna(axis=0, how='any')

In [29]:
# Chicago
address = 'Chicago, IL'

geolocator = Nominatim(user_agent="ch_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude


# create map
map_clusters3 = folium.Map(location=[latitude, longitude], zoom_start=11)
chicago_merged['Cluster Labels'] = chicago_merged['Cluster Labels'].apply(np.int64)
# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(chicago_merged['Latitude'], chicago_merged['Longitude'], chicago_merged['Neighbourhood'], chicago_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters3)
       
map_clusters3

#### **Finding Neighbourhood with Most Bakery, Coffee Shop, Café, Juice Bar and Donut Shop** ##

Next, we will select neighbourhood(s) in each city, which have most bakeries (weight = 4), coffee shops, café and bubble tea shop (weight = 3), juice bar (weight = 2) and donut shop (weight = 1). We will also give weights from 10 to 1 with respect to 1st most common venus to 10th most common venue.

In [30]:
IDX_Bakery1 = manhattan_merged.iloc[:, 5:15] == 'Bakery'
IDX_Coffee1 = manhattan_merged.iloc[:, 5:15] == 'Coffee Shop'
IDX_Cafe1 = manhattan_merged.iloc[:, 5:15] == 'Café'
IDX_Bubble1 = manhattan_merged.iloc[:, 5:15] == 'Bubble Tea Shop'
IDX_Juice1 = manhattan_merged.iloc[:, 5:15] == 'Juice Bar'
IDX_Donut1 = manhattan_merged.iloc[:, 5:15] == 'Dunut Shop'

RankWeightBase1 = np.array([10,9,8,7,6,5,4,3,2,1])
RankWeight1 = np.tile(RankWeightBase1, (manhattan_merged.shape[0], 1))
Ranking1 = np.sum((IDX_Bakery1*4 + (IDX_Coffee1 + IDX_Cafe1 + IDX_Bubble1)*3 + IDX_Juice1*2 + IDX_Donut1)*RankWeight1, axis = 1)
Ranking_max1 = np.max(Ranking1)

manhattan_merged[Ranking1 == Ranking_max1]

  .format(op=op_str, alt_op=unsupported[op_str]))


Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,Manhattan,Little Italy,40.719324,-73.997305,1,Bakery,Café,Seafood Restaurant,Sandwich Place,Bubble Tea Shop,Chinese Restaurant,Clothing Store,Salon / Barbershop,Mediterranean Restaurant,Ice Cream Shop


In [31]:
IDX_Bakery2 = toronto_merged.iloc[:, 6:16] == 'Bakery'
IDX_Coffee2 = toronto_merged.iloc[:, 6:16] == 'Coffee Shop'
IDX_Cafe2 = toronto_merged.iloc[:, 6:16] == 'Café'
IDX_Bubble2 = toronto_merged.iloc[:, 6:16] == 'Bubble Tea Shop'
IDX_Juice2 = toronto_merged.iloc[:, 6:16] == 'Juice Bar'
IDX_Donut2 = toronto_merged.iloc[:, 6:16] == 'Dunut Shop'

RankWeightBase2 = np.array([10,9,8,7,6,5,4,3,2,1])
RankWeight2 = np.tile(RankWeightBase2, (toronto_merged.shape[0], 1))
Ranking2 = np.sum((IDX_Bakery2*4 + (IDX_Coffee2 + IDX_Cafe2 + IDX_Bubble2)*3 + IDX_Juice2*2 + IDX_Donut2)*RankWeight2, axis = 1)
Ranking_max2 = np.max(Ranking2)

toronto_merged[Ranking2 == Ranking_max2]

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636,0,Coffee Shop,Bakery,Park,Café,Pub,Theater,Breakfast Spot,Restaurant,Mexican Restaurant,Bank


In [32]:
IDX_Bakery3 = chicago_merged.iloc[:, 5:15] == 'Bakery'
IDX_Coffee3 = chicago_merged.iloc[:, 5:15] == 'Coffee Shop'
IDX_Cafe3 = chicago_merged.iloc[:, 5:15] == 'Café'
IDX_Bubble3 = chicago_merged.iloc[:, 5:15] == 'Bubble Tea Shop'
IDX_Juice3 = chicago_merged.iloc[:, 5:15] == 'Juice Bar'
IDX_Donut3 = chicago_merged.iloc[:, 5:15] == 'Dunut Shop'

RankWeightBase3 = np.array([10,9,8,7,6,5,4,3,2,1])
RankWeight3 = np.tile(RankWeightBase3, (chicago_merged.shape[0], 1))
Ranking3 = np.sum((IDX_Bakery3*4 + (IDX_Coffee3 + IDX_Cafe3 + IDX_Bubble3)*3 + IDX_Juice3*2 + IDX_Donut3)*RankWeight3, axis = 1)
Ranking_max3 = np.max(Ranking3)

chicago_merged[Ranking3 == Ranking_max3]

Unnamed: 0,Community Area,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
36,Hyde Park,"East Hyde Park, Hyde Park",41.794446,-87.593924,2,Sandwich Place,Coffee Shop,Yoga Studio,Bakery,Bookstore,Bubble Tea Shop,Shopping Mall,Shipping Store,Café,Rental Car Location


## **Examine Clusters** ##

Now, we will examine clusters to figure out which neighbourhood is good for bakery shop opening.

#### Cluster 1 - Manhattan ####

In [33]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 0, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Washington Heights,Café,Bakery,Grocery Store,Mobile Phone Shop,Latin American Restaurant,Mexican Restaurant,Spanish Restaurant,Clothing Store,Gym,Supermarket
3,Inwood,Mexican Restaurant,Café,Lounge,Pizza Place,Park,Bakery,Frozen Yogurt Shop,Restaurant,Deli / Bodega,Chinese Restaurant
4,Hamilton Heights,Mexican Restaurant,Coffee Shop,Pizza Place,Café,Yoga Studio,Indian Restaurant,Sushi Restaurant,Sandwich Place,Deli / Bodega,Liquor Store
7,East Harlem,Mexican Restaurant,Bakery,Deli / Bodega,Latin American Restaurant,Pizza Place,Thai Restaurant,Restaurant,Clothing Store,Sandwich Place,Cocktail Bar
25,Manhattan Valley,Coffee Shop,Pizza Place,Mexican Restaurant,French Restaurant,Indian Restaurant,Thai Restaurant,Deli / Bodega,Yoga Studio,Café,Bar
26,Morningside Heights,Coffee Shop,American Restaurant,Park,Bookstore,Food Truck,Burger Joint,Pizza Place,Café,Tennis Court,Deli / Bodega
36,Tudor City,Mexican Restaurant,Park,Greek Restaurant,Café,Hotel,Asian Restaurant,Sushi Restaurant,Pizza Place,Dog Run,Deli / Bodega


#### Cluster 2 - Manhattan ####

In [34]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 1, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Chinatown,Chinese Restaurant,Dim Sum Restaurant,Cocktail Bar,American Restaurant,Vietnamese Restaurant,Bar,Bubble Tea Shop,Noodle House,Hotpot Restaurant,Salon / Barbershop
5,Manhattanville,Coffee Shop,Park,Sushi Restaurant,Italian Restaurant,Mexican Restaurant,Seafood Restaurant,Indian Restaurant,Bus Stop,Lounge,Bike Trail
9,Yorkville,Bar,Gym,Coffee Shop,Italian Restaurant,Pizza Place,Japanese Restaurant,Wine Shop,Deli / Bodega,Diner,Sushi Restaurant
10,Lenox Hill,Italian Restaurant,Coffee Shop,Sushi Restaurant,Gym / Fitness Center,Pizza Place,Sporting Goods Shop,Burger Joint,Gym,Mexican Restaurant,Bakery
12,Upper West Side,Italian Restaurant,Bar,Coffee Shop,Indian Restaurant,Wine Bar,Bakery,Mediterranean Restaurant,Burger Joint,Vegetarian / Vegan Restaurant,Yoga Studio
17,Chelsea,Coffee Shop,Ice Cream Shop,Italian Restaurant,Nightclub,Bakery,Theater,Hotel,American Restaurant,Seafood Restaurant,French Restaurant
18,Greenwich Village,Italian Restaurant,Sushi Restaurant,French Restaurant,Clothing Store,Café,Seafood Restaurant,Bakery,Indian Restaurant,Boutique,Burger Joint
19,East Village,Bar,Wine Bar,Mexican Restaurant,Ice Cream Shop,Vegetarian / Vegan Restaurant,Cocktail Bar,Chinese Restaurant,Coffee Shop,Pizza Place,Ramen Restaurant
20,Lower East Side,Café,Art Gallery,Coffee Shop,Japanese Restaurant,Sandwich Place,Ramen Restaurant,Pizza Place,Cocktail Bar,Chinese Restaurant,Shoe Store
22,Little Italy,Bakery,Café,Seafood Restaurant,Sandwich Place,Bubble Tea Shop,Chinese Restaurant,Clothing Store,Salon / Barbershop,Mediterranean Restaurant,Ice Cream Shop


#### Cluster 3 - Manhattan ####

In [35]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 2, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Marble Hill,Coffee Shop,Discount Store,Sandwich Place,Tennis Stadium,Gym,Big Box Store,Supplement Shop,Shoe Store,Seafood Restaurant,Yoga Studio
6,Central Harlem,African Restaurant,Cosmetics Shop,American Restaurant,French Restaurant,Gym / Fitness Center,Chinese Restaurant,Seafood Restaurant,Bookstore,Library,Beer Bar
8,Upper East Side,Italian Restaurant,Exhibit,Coffee Shop,Art Gallery,Juice Bar,Bakery,Gym / Fitness Center,French Restaurant,Spa,Hotel
13,Lincoln Square,Theater,Café,Italian Restaurant,Concert Hall,Gym / Fitness Center,Plaza,Opera House,French Restaurant,Performing Arts Venue,Indie Movie Theater
14,Clinton,Theater,American Restaurant,Hotel,Italian Restaurant,Gym / Fitness Center,Wine Shop,Gym,Spa,Indie Theater,Lounge
15,Midtown,Hotel,Clothing Store,Theater,Cocktail Bar,American Restaurant,Bakery,Spa,Bookstore,Sporting Goods Shop,Food Truck
16,Murray Hill,Hotel,Japanese Restaurant,Coffee Shop,Italian Restaurant,Sandwich Place,Bar,French Restaurant,Spa,Gym,American Restaurant
21,Tribeca,Italian Restaurant,Park,Café,American Restaurant,Boutique,Spa,Wine Shop,Wine Bar,Coffee Shop,Greek Restaurant
24,West Village,Italian Restaurant,Cosmetics Shop,New American Restaurant,Wine Bar,Gastropub,Jazz Club,Park,Bakery,American Restaurant,French Restaurant
28,Battery Park City,Coffee Shop,Park,Hotel,Gym,Italian Restaurant,Shopping Mall,Clothing Store,Wine Shop,Sandwich Place,Pizza Place


#### Cluster 4 - Manhattan ####

In [36]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 3, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
37,Stuyvesant Town,Bar,Playground,Park,Gas Station,Pet Service,Baseball Field,Harbor / Marina,Cocktail Bar,Heliport,Coffee Shop


#### Cluster 5 - Manhattan ####

In [37]:
manhattan_merged.loc[manhattan_merged['Cluster Labels'] == 4, manhattan_merged.columns[[1] + list(range(5, manhattan_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Roosevelt Island,Sandwich Place,Park,Japanese Restaurant,Pizza Place,Bus Line,School,Coffee Shop,Residential Building (Apartment / Condo),Rental Car Location,Playground


#### Cluster 1 - Toronto ####

In [38]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,The Beaches,0,Coffee Shop,Pub,Neighborhood,Health Food Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Dessert Shop,Donut Shop
1,"The Danforth West, Riverdale",0,Greek Restaurant,Coffee Shop,Ice Cream Shop,Bookstore,Furniture / Home Store,Italian Restaurant,Cosmetics Shop,Brewery,Bubble Tea Shop,Restaurant
2,"The Beaches West, India Bazaar",0,Park,Sandwich Place,Pizza Place,Light Rail Station,Ice Cream Shop,Liquor Store,Burger Joint,Burrito Place,Fast Food Restaurant,Fish & Chips Shop
3,Studio District,0,Café,Coffee Shop,Italian Restaurant,American Restaurant,Bakery,Brewery,Coworking Space,Bookstore,Latin American Restaurant,Middle Eastern Restaurant
5,Davisville North,0,Gym,Clothing Store,Breakfast Spot,Burger Joint,Food & Drink Shop,Sandwich Place,Hotel,Asian Restaurant,Park,Creperie
6,North Toronto West,0,Clothing Store,Coffee Shop,Sporting Goods Shop,Yoga Studio,Bagel Shop,Grocery Store,Chinese Restaurant,Rental Car Location,Dessert Shop,Diner
7,Davisville,0,Sandwich Place,Dessert Shop,Pizza Place,Sushi Restaurant,Italian Restaurant,Café,Coffee Shop,Park,Seafood Restaurant,Burger Joint
9,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",0,Coffee Shop,Pub,Sushi Restaurant,Bagel Shop,Fried Chicken Joint,Sports Bar,American Restaurant,Convenience Store,Light Rail Station,Vietnamese Restaurant
11,"Cabbagetown, St. James Town",0,Coffee Shop,Restaurant,Pizza Place,Bakery,Pub,Italian Restaurant,Park,Café,Gym / Fitness Center,Playground
12,Church and Wellesley,0,Japanese Restaurant,Coffee Shop,Gay Bar,Sushi Restaurant,Burger Joint,Restaurant,Gym,Fast Food Restaurant,Pub,Men's Store


#### Cluster 2 - Toronto ####

In [39]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,"Moore Park, Summerhill East",1,Playground,Gym,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant


#### Cluster 3 - Toronto ####

In [40]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,Roselawn,2,Garden,Fast Food Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant


#### Cluster 4 - Toronto ####

In [41]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Rosedale,3,Park,Playground,Trail,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant
23,"Forest Hill North, Forest Hill West",3,Trail,Park,Sushi Restaurant,Jewelry Store,Donut Shop,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Yoga Studio


#### Cluster 5 - Toronto ####

In [42]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Lawrence Park,4,Park,Swim School,Bus Line,Yoga Studio,Diner,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


#### Cluster 1 - Chicago ####

In [43]:
chicago_merged.loc[chicago_merged['Cluster Labels'] == 0, chicago_merged.columns[[1] + list(range(5, chicago_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Archer Heights,Mexican Restaurant,Bakery,Food,Bar,Seafood Restaurant,Floating Market,Fish Market,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant
10,"Belmont Central, Brickyard, Cragin, Hanson Park",Mexican Restaurant,Grocery Store,Pharmacy,Department Store,Nightclub,Chinese Restaurant,Thrift / Vintage Store,Restaurant,Discount Store,Bakery
13,Brighton Park,Mexican Restaurant,Grocery Store,Taco Place,Sandwich Place,Bus Station,Pizza Place,Café,Park,Burger Joint,Check Cashing Service
18,"Chicago Lawn, Lithuanian Plaza, Marquette Park",Pizza Place,Mexican Restaurant,American Restaurant,Fast Food Restaurant,Intersection,Electronics Store,Fish & Chips Shop,Farm,Ethiopian Restaurant,Falafel Restaurant
23,East Side,Mexican Restaurant,Convenience Store,Bakery,Flower Shop,Floating Market,Flea Market,Fish Market,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant
29,Gage Park,Mexican Restaurant,Bakery,Convenience Store,Dessert Shop,Asian Restaurant,Chinese Restaurant,Sandwich Place,Fast Food Restaurant,Filipino Restaurant,Eastern European Restaurant
34,"Belmont Gardens, Hermosa, Kelvyn Park",Mexican Restaurant,Brewery,Hobby Shop,Gay Bar,Pizza Place,Discount Store,Doctor's Office,Donut Shop,Café,Electronics Store
45,"East Pilsen, Heart of Chicago, Lower West Side...",Soccer Field,Music Venue,Donut Shop,Mexican Restaurant,Art Gallery,Diner,Yoga Studio,Farmers Market,Falafel Restaurant,Farm
47,Montclare,Convenience Store,Pizza Place,Mexican Restaurant,Financial or Legal Service,Dry Cleaner,Flea Market,Fish Market,Fish & Chips Shop,Filipino Restaurant,Fast Food Restaurant
63,"Loyola, Rogers Park",Mexican Restaurant,Pizza Place,Theater,Bakery,Asian Restaurant,Donut Shop,Chinese Restaurant,Bar,Train Station,Park


#### Cluster 2 - Chicago ####

In [44]:
chicago_merged.loc[chicago_merged['Cluster Labels'] == 1, chicago_merged.columns[[1] + list(range(5, chicago_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,"Calumet Heights, Pill Hill",Gym / Fitness Center,Yoga Studio,Donut Shop,Flea Market,Fish Market,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant,Fast Food Restaurant,Farmers Market


#### Cluster 3 - Chicago ####

In [45]:
chicago_merged.loc[chicago_merged['Cluster Labels'] == 2, chicago_merged.columns[[1] + list(range(5, chicago_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Albany Park, Mayfair, North Mayfair, Ravenswoo...",Sandwich Place,Shoe Store,Fried Chicken Joint,Chinese Restaurant,Café,Korean Restaurant,Cocktail Bar,Bakery,Bus Station,Grocery Store
2,"Armour Square, Chinatown, Wentworth Gardens",Chinese Restaurant,Italian Restaurant,Cosmetics Shop,Sports Bar,Sandwich Place,Asian Restaurant,Breakfast Spot,Hot Dog Joint,Department Store,Electronics Store
3,"Ashburn, Ashburn Estates, Beverly View, Crestl...",Cosmetics Shop,Electronics Store,Light Rail Station,Italian Restaurant,Pizza Place,Cupcake Shop,Currency Exchange,Flea Market,Fish Market,Fish & Chips Shop
4,"Auburn Gresham, Gresham",Fast Food Restaurant,Lounge,Discount Store,Greek Restaurant,Cosmetics Shop,Pharmacy,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant,Dry Cleaner
8,"Avondale, Jackowo, Wacławowo",Park,Food Truck,Chinese Restaurant,Donut Shop,Bar,Rental Car Location,Supermarket,Storage Facility,Road,Discount Store
9,Polish Village,Coffee Shop,Pizza Place,Bar,Gym,Sandwich Place,Hobby Shop,Chinese Restaurant,Brewery,Bus Line,Taco Place
12,Bridgeport,Park,Chinese Restaurant,Mobile Phone Shop,Art Gallery,Pharmacy,Coffee Shop,Bar,Pizza Place,Wings Joint,Grocery Store
17,West Chesterfield,Convenience Store,Intersection,Dry Cleaner,Floating Market,Flea Market,Fish Market,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant,Fast Food Restaurant
20,"Bronzeville, Dearborn Homes, Groveland Park, L...",Fast Food Restaurant,Sandwich Place,Historic Site,Fried Chicken Joint,Pharmacy,Spa,Shipping Store,Seafood Restaurant,Salon / Barbershop,Restaurant
22,"East Garfield Park, Fifth City",Burger Joint,Pet Service,Garden Center,Pharmacy,Warehouse Store,Liquor Store,Public Art,Bus Line,Train Station,Diner


#### Cluster 4 - Chicago ####

In [46]:
chicago_merged.loc[chicago_merged['Cluster Labels'] == 3, chicago_merged.columns[[1] + list(range(5, chicago_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
49,"Mount Greenwood, Talley's Corner",Cosmetics Shop,Vineyard,Yoga Studio,Dry Cleaner,Flea Market,Fish Market,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant,Fast Food Restaurant


#### Cluster 5 - Chicago ####

In [47]:
chicago_merged.loc[chicago_merged['Cluster Labels'] == 4, chicago_merged.columns[[1] + list(range(5, chicago_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,"Galewood, North Austin, South Austin, The Island",Bus Station,Park,Donut Shop,Shoe Repair,Wings Joint,Liquor Store,Gym,Grocery Store,Train Station,Eastern European Restaurant
6,West Humboldt Park,Park,Soccer Field,Food Truck,Beach,Museum,Latin American Restaurant,Farmers Market,Ethiopian Restaurant,Falafel Restaurant,Farm
11,"Beverly, East Beverly, West Beverly",Cosmetics Shop,Grocery Store,Pizza Place,Burger Joint,Bakery,Train Station,Park,Italian Restaurant,Deli / Bodega,Department Store
14,Burnside,Intersection,Home Service,Bus Station,Train Station,Yoga Studio,Farm,Electronics Store,Ethiopian Restaurant,Falafel Restaurant,Farmers Market
16,"Chatham, East Chatham, West Chatham",Convenience Store,Metro Station,Soccer Field,Liquor Store,Park,Eastern European Restaurant,Fish Market,Fish & Chips Shop,Financial or Legal Service,Filipino Restaurant
19,"Chrysler Village, Clearing East, Clearing West",Heliport,Bar,Currency Exchange,Deli / Bodega,Park,Discount Store,Nightclub,Bus Station,Sports Bar,Pizza Place
21,"Belmont Heights, Belmont Terrace, Dunning, Irv...",Candy Store,Park,Indoor Play Area,Deli / Bodega,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Falafel Restaurant,Farm,Yoga Studio
25,Edison Park,Park,Automotive Shop,Thai Restaurant,Fast Food Restaurant,Shipping Store,Music Store,Donut Shop,Sandwich Place,Salon / Barbershop,Nail Salon
28,Fuller Park,Fast Food Restaurant,Park,Construction & Landscaping,Sandwich Place,Yoga Studio,Eastern European Restaurant,Electronics Store,Ethiopian Restaurant,Falafel Restaurant,Farm
35,Humboldt Park,Park,Lake,Beach,Soccer Field,Food Truck,Café,Museum,Farmers Market,Ethiopian Restaurant,Falafel Restaurant
