In [80]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans 
from sklearn.datasets.samples_generator import make_blobs
import folium # map rendering library
import json

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

## Question 1

In [61]:
url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(url,'lxml')
table = soup.find('table',{'class':'wikitable sortable'})
print(table)

<table class="wikitable sortable">
<tbody><tr>
<th>Postal Code
</th>
<th>Borough
</th>
<th>Neighbourhood
</th></tr>
<tr>
<td>M1A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M2A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3A
</td>
<td>North York
</td>
<td>Parkwoods
</td></tr>
<tr>
<td>M4A
</td>
<td>North York
</td>
<td>Victoria Village
</td></tr>
<tr>
<td>M5A
</td>
<td>Downtown Toronto
</td>
<td>Regent Park, Harbourfront
</td></tr>
<tr>
<td>M6A
</td>
<td>North York
</td>
<td>Lawrence Manor, Lawrence Heights
</td></tr>
<tr>
<td>M7A
</td>
<td>Downtown Toronto
</td>
<td>Queen's Park, Ontario Provincial Government
</td></tr>
<tr>
<td>M8A
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M9A
</td>
<td>Etobicoke
</td>
<td>Islington Avenue, Humber Valley Village
</td></tr>
<tr>
<td>M1B
</td>
<td>Scarborough
</td>
<td>Malvern, Rouge
</td></tr>
<tr>
<td>M2B
</td>
<td>Not assigned
</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3B
</td>
<td

In [62]:
# getting the table header
table_header = [th.text.replace('\n','') for th in table.find_all('th')]
# getting the table contents
table_content_pc =  [td.text.replace('\n','') for td in table.findAll('td')[::3]] # postal code
table_content_bor =  [td.text.replace('\n','') for td in table.findAll('td')[1::3]] # Borough
table_content_neigh =  [td.text.replace('\n','') for td in table.findAll('td')[2::3]] # Neighbourhood
#abc = [td.text.replace('\n','') for td in table.findAll('td')]
#print(abc)

In [63]:
# Creating DataFrame using lists of scraped data
df = pd.DataFrame(zip(table_content_pc,table_content_bor,table_content_neigh))
df.columns = table_header
df.head(20)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
7,M8A,Not assigned,Not assigned
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"


In [64]:
#Check if there is not assigned value
df_check_na_bor = df[df['Borough'] == "Not assigned"]
print(df_check_na_bor.count())
#Dropping the line with not assigned in column Borough
row_na = df[ df['Borough'] =='Not assigned'].index
df = df.drop(row_na)
df.head()

Postal Code      77
Borough          77
Neighbourhood    77
dtype: int64


Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [65]:
#Check if there is not assigned value in column Neighbourhood
df_check_na_neigh = df[df['Neighbourhood'] == "Not assigned"]
df_check_na_neigh.count()

Postal Code      0
Borough          0
Neighbourhood    0
dtype: int64

In [66]:
df.shape

(103, 3)

## Question 2

In [67]:
#Importing geospatial data
df_geo= pd.read_csv("http://cocl.us/Geospatial_data")
new_df = pd.merge(df,df_geo)
new_df

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


## Question 3

In [68]:
import geocoder
from geopy.geocoders import Nominatim 

address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="can_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [69]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(new_df['Latitude'], new_df['Longitude'], new_df['Borough'], new_df['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [70]:
CLIENT_ID = 'NNNAAOPSKZGUQKTWZAF0BF32XLWEAYH3K0S20DF3P0AOB2FB'
CLIENT_SECRET = 'ZWIXSHAME0OW4WDKMA3WCJV0CM3EUKBWEN0DG3QH4KBEZNX2'
VERSION = '20191101'
LIMIT = 100

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: NNNAAOPSKZGUQKTWZAF0BF32XLWEAYH3K0S20DF3P0AOB2FB
CLIENT_SECRET:ZWIXSHAME0OW4WDKMA3WCJV0CM3EUKBWEN0DG3QH4KBEZNX2


In [71]:
new_df_2= new_df[new_df['Borough'] == 'North York'].reset_index(drop=True)
new_df_2.head()
#new_df_2.shape

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
3,M3B,North York,Don Mills,43.745906,-79.352188
4,M6B,North York,Glencairn,43.709577,-79.445073


In [72]:
address_2 = 'North York, Canada'

geolocator_2 = Nominatim(user_agent="can_explorer")
location_2 = geolocator.geocode(address_2)
latitude_2 = location.latitude
longitude_2 = location.longitude
print('The geograpical coordinate of North York are {}, {}.'.format(latitude_2, longitude_2))

The geograpical coordinate of North York are 43.6534817, -79.3839347.


In [73]:
# create map of North York using latitude and longitude values
map_toronto = folium.Map(location=[latitude_2, longitude_2], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(new_df_2['Latitude'], new_df_2['Longitude'], new_df_2['Borough'], new_df_2['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  


map_toronto

In [74]:
#Getting the neighbourhood name
new_df_2.loc[0, 'Neighbourhood']

'Parkwoods'

In [75]:
# Get the neighborhood's latitude and longitude values.

neighborhood_latitude = new_df_2.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = new_df_2.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = new_df_2.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Parkwoods are 43.7532586, -79.3296565.


In [76]:
# limit of number of venues returned by Foursquare API
LIMIT = 100
# define radius
radius = 500 
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, neighborhood_latitude,   neighborhood_longitude, radius, LIMIT)
# display URL
url

'https://api.foursquare.com/v2/venues/explore?&client_id=NNNAAOPSKZGUQKTWZAF0BF32XLWEAYH3K0S20DF3P0AOB2FB&client_secret=ZWIXSHAME0OW4WDKMA3WCJV0CM3EUKBWEN0DG3QH4KBEZNX2&v=20191101&ll=43.7532586,-79.3296565&radius=500&limit=100'

In [77]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '60287b3fab79dc20369f0e60'},
  'headerLocation': 'Parkwoods - Donalda',
  'headerFullLocation': 'Parkwoods - Donalda, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 2,
  'suggestedBounds': {'ne': {'lat': 43.757758604500005,
    'lng': -79.32343823984928},
   'sw': {'lat': 43.7487585955, 'lng': -79.33587476015072}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4e8d9dcdd5fbbbb6b3003c7b',
       'name': 'Brookbanks Park',
       'location': {'address': 'Toronto',
        'lat': 43.751976046055574,
        'lng': -79.33214044722958,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.751976046055574,
          'lng': -79.33214044722958}],
        'distance': 245,
        'cc': 'CA',
        'c

In [78]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [82]:
# clean the json and structure into a dataframe
from pandas.io.json import json_normalize

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Brookbanks Park,Park,43.751976,-79.33214
1,Variety Store,Food & Drink Shop,43.751974,-79.333114


In [83]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

2 venues were returned by Foursquare.


## Exploring Neighborhood in North York

In [88]:
# a function to repeat the same process to all the neighborhoods in North York
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [89]:
northyork_venues = getNearbyVenues(names=new_df_2['Neighbourhood'],
                                   latitudes=new_df_2['Latitude'],
                                   longitudes=new_df_2['Longitude']
                                  )
print(northyork_venues.head())
print(northyork_venues.shape)

Parkwoods
Victoria Village
Lawrence Manor, Lawrence Heights
Don Mills
Glencairn
Don Mills
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Fairview, Henry Farm, Oriole
Northwood Park, York University
Bayview Village
Downsview
York Mills, Silver Hills
Downsview
North Park, Maple Leaf Park, Upwood Park
Humber Summit
Willowdale, Newtonbrook
Downsview
Bedford Park, Lawrence Manor East
Humberlea, Emery
Willowdale, Willowdale East
Downsview
York Mills West
Willowdale, Willowdale West
      Neighbourhood  Neighbourhood Latitude  Neighbourhood Longitude  \
0         Parkwoods               43.753259               -79.329656   
1         Parkwoods               43.753259               -79.329656   
2  Victoria Village               43.725882               -79.315572   
3  Victoria Village               43.725882               -79.315572   
4  Victoria Village               43.725882               -79.315572   

                    Venue  Venue Latitude  Venue Longitude  \
0    

In [90]:
northyork_venues.groupby('Neighbourhood').count()
print('There are {} uniques categories.'.format(len(northyork_venues['Venue Category'].unique())))

There are 103 uniques categories.


## Analyzing Neighbourhood

In [92]:
# one hot encoding
northyork_onehot = pd.get_dummies(northyork_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
northyork_onehot['Neighbourhood'] = northyork_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [northyork_onehot.columns[-1]] + list(northyork_onehot.columns[:-1])
northyork_onehot = northyork_onehot[fixed_columns]

print(northyork_onehot.head())
print(northyork_venues.shape)

      Neighbourhood  Accessories Store  Airport  American Restaurant  \
0         Parkwoods                  0        0                    0   
1         Parkwoods                  0        0                    0   
2  Victoria Village                  0        0                    0   
3  Victoria Village                  0        0                    0   
4  Victoria Village                  0        0                    0   

   Art Gallery  Arts & Crafts Store  Asian Restaurant  Athletics & Sports  \
0            0                    0                 0                   0   
1            0                    0                 0                   0   
2            0                    0                 0                   0   
3            0                    0                 0                   0   
4            0                    0                 0                   0   

   Bakery  Bank  ...  Supplement Shop  Sushi Restaurant  Tea Room  \
0       0     0  ...               

In [93]:
northyork_grouped = northyork_onehot.groupby('Neighbourhood').mean().reset_index()
print(northyork_grouped)
print(northyork_grouped.shape)

                                      Neighbourhood  Accessories Store  \
0   Bathurst Manor, Wilson Heights, Downsview North           0.000000   
1                                   Bayview Village           0.000000   
2                 Bedford Park, Lawrence Manor East           0.000000   
3                                         Don Mills           0.000000   
4                                         Downsview           0.000000   
5                      Fairview, Henry Farm, Oriole           0.000000   
6                                         Glencairn           0.000000   
7                                 Hillcrest Village           0.000000   
8                                     Humber Summit           0.000000   
9                                  Humberlea, Emery           0.000000   
10                 Lawrence Manor, Lawrence Heights           0.090909   
11         North Park, Maple Leaf Park, Upwood Park           0.000000   
12                  Northwood Park, Yo

In [94]:
num_top_venues = 5

for hood in northyork_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = northyork_grouped[northyork_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bathurst Manor, Wilson Heights, Downsview North----
            venue  freq
0            Bank  0.09
1     Coffee Shop  0.09
2  Ice Cream Shop  0.04
3     Bridal Shop  0.04
4            Park  0.04


----Bayview Village----
                 venue  freq
0   Chinese Restaurant  0.25
1  Japanese Restaurant  0.25
2                 Bank  0.25
3                 Café  0.25
4    Accessories Store  0.00


----Bedford Park, Lawrence Manor East----
                     venue  freq
0       Italian Restaurant  0.08
1              Coffee Shop  0.08
2          Thai Restaurant  0.08
3           Sandwich Place  0.08
4  Comfort Food Restaurant  0.04


----Don Mills----
                 venue  freq
0                  Gym  0.12
1  Japanese Restaurant  0.08
2           Restaurant  0.08
3          Coffee Shop  0.08
4           Beer Store  0.08


----Downsview----
            venue  freq
0   Grocery Store  0.20
1            Park  0.13
2           Hotel  0.07
3  Baseball Field  0.07
4  Discount Store  0.07


In [95]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [199]:
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = northyork_grouped['Neighbourhood']

for ind in np.arange(northyork_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(northyork_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head(10)

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Shopping Mall,Middle Eastern Restaurant,Mobile Phone Shop
1,Bayview Village,Chinese Restaurant,Café,Bank,Japanese Restaurant,Women's Store
2,"Bedford Park, Lawrence Manor East",Thai Restaurant,Italian Restaurant,Sandwich Place,Coffee Shop,Café
3,Don Mills,Gym,Beer Store,Japanese Restaurant,Coffee Shop,Restaurant
4,Downsview,Grocery Store,Park,Shopping Mall,Hotel,Liquor Store
5,"Fairview, Henry Farm, Oriole",Clothing Store,Coffee Shop,Fast Food Restaurant,Restaurant,Cosmetics Shop
6,Glencairn,Pizza Place,Metro Station,Pub,Bakery,Japanese Restaurant
7,Hillcrest Village,Golf Course,Athletics & Sports,Pool,Mediterranean Restaurant,Fast Food Restaurant
8,Humber Summit,Pizza Place,Furniture / Home Store,Intersection,Distribution Center,Coffee Shop
9,"Humberlea, Emery",Furniture / Home Store,Baseball Field,Women's Store,Dog Run,Comfort Food Restaurant


## Clustering

In [198]:
# set number of clusters
kclusters = 5

northyork_cluster = northyork_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(northyork_cluster)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 0, 0, 2, 2], dtype=int32)

In [200]:
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

northyork_merged = new_df_2

# merge grouped data of northyork with northyork data to add latitude/longitude for each neighborhood
northyork_merged = northyork_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

northyork_merged = northyork_merged.fillna(0)
northyork_merged = northyork_merged.astype({"Cluster Labels": int})

northyork_merged.head(10) # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,4,Park,Food & Drink Shop,Women's Store,Distribution Center,Comfort Food Restaurant
1,M4A,North York,Victoria Village,43.725882,-79.315572,0,Coffee Shop,Hockey Arena,Portuguese Restaurant,Intersection,Distribution Center
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,0,Clothing Store,Furniture / Home Store,Athletics & Sports,Event Space,Miscellaneous Shop
3,M3B,North York,Don Mills,43.745906,-79.352188,0,Gym,Beer Store,Japanese Restaurant,Coffee Shop,Restaurant
4,M6B,North York,Glencairn,43.709577,-79.445073,0,Pizza Place,Metro Station,Pub,Bakery,Japanese Restaurant
5,M3C,North York,Don Mills,43.7259,-79.340923,0,Gym,Beer Store,Japanese Restaurant,Coffee Shop,Restaurant
6,M2H,North York,Hillcrest Village,43.803762,-79.363452,0,Golf Course,Athletics & Sports,Pool,Mediterranean Restaurant,Fast Food Restaurant
7,M3H,North York,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259,0,Coffee Shop,Bank,Shopping Mall,Middle Eastern Restaurant,Mobile Phone Shop
8,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556,0,Clothing Store,Coffee Shop,Fast Food Restaurant,Restaurant,Cosmetics Shop
9,M3J,North York,"Northwood Park, York University",43.76798,-79.487262,2,Furniture / Home Store,Caribbean Restaurant,Bar,Massage Studio,Coffee Shop


In [201]:
# Visualize
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(northyork_merged['Latitude'], northyork_merged['Longitude'], northyork_merged['Neighbourhood'], northyork_merged['Cluster Labels'].astype(int)):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters

In [202]:
northyork_merged.loc[northyork_merged['Cluster Labels'] == 0, northyork_merged.columns[[1] + list(range(5, northyork_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,North York,0,Coffee Shop,Hockey Arena,Portuguese Restaurant,Intersection,Distribution Center
2,North York,0,Clothing Store,Furniture / Home Store,Athletics & Sports,Event Space,Miscellaneous Shop
3,North York,0,Gym,Beer Store,Japanese Restaurant,Coffee Shop,Restaurant
4,North York,0,Pizza Place,Metro Station,Pub,Bakery,Japanese Restaurant
5,North York,0,Gym,Beer Store,Japanese Restaurant,Coffee Shop,Restaurant
6,North York,0,Golf Course,Athletics & Sports,Pool,Mediterranean Restaurant,Fast Food Restaurant
7,North York,0,Coffee Shop,Bank,Shopping Mall,Middle Eastern Restaurant,Mobile Phone Shop
8,North York,0,Clothing Store,Coffee Shop,Fast Food Restaurant,Restaurant,Cosmetics Shop
10,North York,0,Chinese Restaurant,Café,Bank,Japanese Restaurant,Women's Store
11,North York,0,Grocery Store,Park,Shopping Mall,Hotel,Liquor Store


In [203]:
northyork_merged.loc[northyork_merged['Cluster Labels'] == 2, northyork_merged.columns[[1] + list(range(5, northyork_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
9,North York,2,Furniture / Home Store,Caribbean Restaurant,Bar,Massage Studio,Coffee Shop
15,North York,2,Pizza Place,Furniture / Home Store,Intersection,Distribution Center,Coffee Shop
19,North York,2,Furniture / Home Store,Baseball Field,Women's Store,Dog Run,Comfort Food Restaurant
