# Toronto Borough Scraping Lab

## Part One: Data Scraping
This is the portion where we'll pull some data from Wikipedia and then format it for a pandas DataFrame.

In [1]:
# Imports and installs
import requests
import pandas as pd
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors
import geocoder
import folium
from bs4 import BeautifulSoup
from sklearn.cluster import KMeans

In [2]:
# Get the raw data from Wikipedia
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
r = requests.get(url)
soup = BeautifulSoup(r.text, "html.parser")

In [3]:
# Process the Wikipedia data into Python lists
table = soup.select("#mw-content-text table")[0]
td_list = table.find_all("td")
postal_codes, boroughs, neighborhoods = [], [], []
for td in td_list: 
    if td.span.text == "Not assigned":
        continue
    postal_codes += [td.b.text]
    boroughs += [td.span.text.split("(")[0].strip()]
    neighborhoods += [[n.strip().replace(")", " ") for n in td.span.text.split("(")[1].strip(")").split("/")]]
neighborhoods = [", ".join(n) for n in neighborhoods]

In [4]:
# Now put the scraped data from lists into a pandas DataFrame
df = pd.DataFrame(list(zip(postal_codes, boroughs, neighborhoods)))
df.columns = ["PostalCode", "Borough", "Neighborhood"]
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


In [5]:
# Show the number of rows in the dataframe
df.shape

(103, 3)

## Part Two: Latitudes and Longitudes

Wherein we'll struggle to get the data organically and eventually gratefully accept the spoon-fed data from the IBM course.

In [6]:
# Now we need to find the latitude and longitude of each postal code
# This requires a paid Google service and therefore does not work! 
"""
latitudes, longitudes = [], []

for postal_code in df["PostalCode"]:
    
    # initialize latitude and longitude to None
    coords = None

    # loop until coords are obtained, geocoder library will sometimes wrongly return None so this is necessary
    while coords is None:
        g = geocoder.google(f"{postal_code}, Toronto, Ontario")
        if g.status == "REQUEST DENIED":
            print(g.status)
            break
            raise Exception("See? It doesn't work unless you pay up.")
        coords = g.latlng
    print(f"Coordinates retrieved for {postal_code}!      ")
    
    latitudes += [coords[0]]
    longitudes += [coords[1]]
    print(postal_code) 

# Now add the latitude and longitude columns to the dataframe
df["latitude"] = latitudes
df["longitude"] = longitudes
df.head()
"""

'\nlatitudes, longitudes = [], []\n\nfor postal_code in df["PostalCode"]:\n    \n    # initialize latitude and longitude to None\n    coords = None\n\n    # loop until coords are obtained, geocoder library will sometimes wrongly return None so this is necessary\n    while coords is None:\n        g = geocoder.google(f"{postal_code}, Toronto, Ontario")\n        if g.status == "REQUEST DENIED":\n            print(g.status)\n            break\n            raise Exception("See? It doesn\'t work unless you pay up.")\n        coords = g.latlng\n    print(f"Coordinates retrieved for {postal_code}!      ")\n    \n    latitudes += [coords[0]]\n    longitudes += [coords[1]]\n    print(postal_code) \n\n# Now add the latitude and longitude columns to the dataframe\ndf["latitude"] = latitudes\ndf["longitude"] = longitudes\ndf.head()\n'

In [7]:
# Now we need to find the latitude and longitude of each postal code
# This is attempting using another library/api, but it is not precise enough -- 
# it gives random addresses within the zip codes rather than just a general lat/long for the zip.
# And also returns no data at all for several searches. Try it out if you want, I'm not confident 
# in this data
"""
import urllib.parse

latitudes, longitudes = [], []

for postal_code in df["PostalCode"]:
    address = f"{postal_code}, Toronto, Ontario, Canada"
    url = "https://nominatim.openstreetmap.org/search/" + urllib.parse.quote(address) + "?format=json"
    print(url)
    response = requests.get(url).json()
    print(response)
    if response:
        latitudes += [response[0]["lat"]]
        longitudes += [response[0]["lon"]]
    break # it doesn't work, don't go through every loop
"""

'\nimport urllib.parse\n\nlatitudes, longitudes = [], []\n\nfor postal_code in df["PostalCode"]:\n    address = f"{postal_code}, Toronto, Ontario, Canada"\n    url = "https://nominatim.openstreetmap.org/search/" + urllib.parse.quote(address) + "?format=json"\n    print(url)\n    response = requests.get(url).json()\n    print(response)\n    if response:\n        latitudes += [response[0]["lat"]]\n        longitudes += [response[0]["lon"]]\n    break # it doesn\'t work, don\'t go through every loop\n'

In [8]:
# Given that this is beginning to be annoyingly frustrating and IBM has provided the data
# based on their anticipation of my frustration, I'm going to just get the data provided.

# Get and parse the data
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs_v1/Geospatial_Coordinates.csv"
dataset = requests.get(url)
dataset = dataset.text.split("\n")
dataset = [d.split(",") for d in dataset]

# Make it a dataframe
dataset = pd.DataFrame(dataset[1:])
dataset.columns = ["PostalCode", "Latitude", "Longitude"]

# Merge the two dataframes on the common postal code
if "Latitude" not in df:
    df = df.merge(dataset, on="PostalCode", how="inner")

df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7532586,-79.3296565
1,M4A,North York,Victoria Village,43.7258823,-79.3155716
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6542599,-79.3606359
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.4647633
4,M7A,Queen's Park,Ontario Provincial Government,43.6623015,-79.3894938


## Part Three: FourSquare API Fun

Define credentials for later use.

In [9]:
# @hidden_cell
CLIENT_ID = 'VWZMMMOLA50YBUWRTGPQQCHWL2VSYTWMW3JQ1WUPR42EBDPT' # your Foursquare ID
CLIENT_SECRET = 'YFP4MEWMOX2WHCISRKA0SRE1NY0PF4MAS0U0NPZJ5C0AHTB2' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: VWZMMMOLA50YBUWRTGPQQCHWL2VSYTWMW3JQ1WUPR42EBDPT
CLIENT_SECRET:YFP4MEWMOX2WHCISRKA0SRE1NY0PF4MAS0U0NPZJ5C0AHTB2


Adapt the getNearbyVenues code for the NY lab to this project:

In [10]:
def get_nearby_venues(names, latitudes, longitudes, radius=500):
    LIMIT = 100
    URL = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = URL.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [11]:
toronto_venues = get_nearby_venues(names=df["Neighborhood"], latitudes=df["Latitude"], longitudes=df["Longitude"])

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Ontario Provincial Government
Islington Avenue
Malvern, Rouge
Don Mills North
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills South
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
The Danforth  East
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmount Park
Bayview Village
Downsview East
The Danforth

In [12]:
print(toronto_venues.shape)
toronto_venues.head()

(2128, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.7532586,-79.3296565,KFC,43.754387,-79.333021,Fast Food Restaurant
1,Parkwoods,43.7532586,-79.3296565,Brookbanks Park,43.751976,-79.33214,Park
2,Parkwoods,43.7532586,-79.3296565,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.7258823,-79.3155716,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.7258823,-79.3155716,Portugril,43.725819,-79.312785,Portuguese Restaurant


In [13]:
toronto_venues.groupby("Neighborhood").count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
"Alderwood, Long Branch",8,8,8,8,8,8
"Bathurst Manor, Wilson Heights, Downsview North",22,22,22,22,22,22
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",26,26,26,26,26,26
...,...,...,...,...,...,...
Willowdale West,5,5,5,5,5,5
"Willowdale, Newtonbrook",2,2,2,2,2,2
Woburn,4,4,4,4,4,4
Woodbine Heights,7,7,7,7,7,7


In [14]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 268 uniques categories.


Now that we have the basic list, perform one-hot encoding to analyze the venues in each neighborhood.

In [15]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood to dataframe as first column
toronto_onehot.drop("Neighborhood", axis=1, inplace=True)   # don't know why this is even here, but now it's not
toronto_onehot.insert(0, "Neighborhood", toronto_venues['Neighborhood'])

toronto_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
toronto_onehot.shape

(2128, 268)

The above values represent the (currently) 2128 neighborhoods and 268 unique venue types. 

In [17]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Willowdale West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
96,"Willowdale, Newtonbrook",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
97,Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
98,Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,...,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0


In [18]:
toronto_grouped.shape

(100, 268)

### Now print each neighborhood with the top five most common venue types in each

In [19]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt ----
                       venue  freq
0             Clothing Store  0.25
1  Latin American Restaurant  0.25
2             Breakfast Spot  0.25
3                     Lounge  0.25
4        Moroccan Restaurant  0.00


----Alderwood, Long Branch----
                venue  freq
0         Pizza Place  0.25
1                 Gym  0.12
2  Athletics & Sports  0.12
3                 Pub  0.12
4        Skating Rink  0.12


----Bathurst Manor, Wilson Heights, Downsview North----
                       venue  freq
0                Coffee Shop  0.09
1                       Bank  0.09
2                   Pharmacy  0.05
3  Middle Eastern Restaurant  0.05
4              Shopping Mall  0.05


----Bayview Village----
                 venue  freq
0  Japanese Restaurant  0.25
1                 Café  0.25
2   Chinese Restaurant  0.25
3                 Bank  0.25
4        Movie Theater  0.00


----Bedford Park, Lawrence Manor East----
                 venue  freq
0       Sandwich Place  0.08


                             venue  freq
0                   Baseball Field   1.0
1                Accessories Store   0.0
2        Middle Eastern Restaurant   0.0
3              Monument / Landmark   0.0
4  Molecular Gastronomy Restaurant   0.0


----Ontario Provincial Government----
              venue  freq
0       Coffee Shop  0.21
1  Sushi Restaurant  0.07
2       Yoga Studio  0.03
3          Creperie  0.03
4          Beer Bar  0.03


----Parkdale, Roncesvalles----
                         venue  freq
0               Breakfast Spot  0.14
1                    Gift Shop  0.14
2                    Bookstore  0.07
3                  Coffee Shop  0.07
4  Eastern European Restaurant  0.07


----Parkview Hill, Woodbine Gardens----
         venue  freq
0  Pizza Place  0.18
1     Pharmacy  0.09
2    Pet Store  0.09
3  Flea Market  0.09
4    Gastropub  0.09


----Parkwoods----
                  venue  freq
0  Fast Food Restaurant  0.33
1                  Park  0.33
2     Food & Drink Shop  

In [20]:
# Function borrowed to sort venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

In [21]:
# More code borrowed to show the 10 most common venue types per neighborhood in a dataframe
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Lounge,Latin American Restaurant,Breakfast Spot,Clothing Store,Yoga Studio,Eastern European Restaurant,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
1,"Alderwood, Long Branch",Pizza Place,Athletics & Sports,Coffee Shop,Pub,Sandwich Place,Skating Rink,Gym,Airport Service,Airport Terminal,Falafel Restaurant
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Sandwich Place,Sushi Restaurant,Deli / Bodega,Middle Eastern Restaurant,Ice Cream Shop,Restaurant,Mobile Phone Shop,Fried Chicken Joint
3,Bayview Village,Café,Japanese Restaurant,Bank,Chinese Restaurant,Diner,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore
4,"Bedford Park, Lawrence Manor East",Coffee Shop,Sandwich Place,Italian Restaurant,Indian Restaurant,Japanese Restaurant,Juice Bar,Fast Food Restaurant,Liquor Store,Restaurant,Thai Restaurant


# Part Four: K-Means Clustering

In [22]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_ 

array([4, 1, 4, 4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4,
       4, 1, 4, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       3, 4, 4, 4, 4, 1, 4, 0, 4, 4, 1, 0, 4, 0, 4, 4, 4, 4, 3, 4, 4, 1,
       0, 4, 4, 0, 4, 4, 4, 1, 4, 1, 4, 4, 1, 4, 4, 4, 4, 2, 4, 0, 1, 4,
       4, 1, 4, 1, 2, 4, 4, 1, 0, 4, 4, 2])

In [23]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.7532586,-79.3296565,0.0,Fast Food Restaurant,Park,Food & Drink Shop,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
1,M4A,North York,Victoria Village,43.7258823,-79.3155716,1.0,Pizza Place,Coffee Shop,Portuguese Restaurant,Hockey Arena,Intersection,Escape Room,Ethiopian Restaurant,Electronics Store,Event Space,Eastern European Restaurant
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.6542599,-79.3606359,4.0,Coffee Shop,Bakery,Park,Pub,Café,Theater,Breakfast Spot,Bank,Mexican Restaurant,French Restaurant
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.4647633,4.0,Clothing Store,Furniture / Home Store,Accessories Store,Coffee Shop,Boutique,Miscellaneous Shop,Event Space,Vietnamese Restaurant,Convenience Store,Discount Store
4,M7A,Queen's Park,Ontario Provincial Government,43.6623015,-79.3894938,4.0,Coffee Shop,Sushi Restaurant,Yoga Studio,Burrito Place,Bar,Italian Restaurant,Japanese Restaurant,Beer Bar,Smoothie Shop,Sandwich Place


# Step Five: Data Visualization

In [27]:
# Define latitude and longtiude of Toronto
latitude = "43.6532"
longitude = "-79.3832"

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# drop nan values
toronto_merged.dropna(inplace=True)


# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Step Six: Examining the Clusters

### Cluster 1

In [35]:
print(len(toronto_merged.loc[toronto_merged['Cluster Labels'] == 0]))
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

9


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,0.0,Fast Food Restaurant,Park,Food & Drink Shop,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop
21,York,0.0,Park,Women's Store,Pool,Yoga Studio,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
40,North York,0.0,Airport,Park,Yoga Studio,Eastern European Restaurant,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore
52,North York,0.0,Park,Yoga Studio,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant
61,Central Toronto,0.0,Bus Line,Park,Swim School,Yoga Studio,Donut Shop,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Drugstore
83,Central Toronto,0.0,Park,Restaurant,Trail,Tennis Court,Yoga Studio,Distribution Center,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store
85,Scarborough,0.0,Intersection,Playground,Park,Yoga Studio,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
91,Downtown Toronto,0.0,Park,Playground,Trail,Yoga Studio,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
98,Etobicoke,0.0,Park,River,Pool,Yoga Studio,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant


### Cluster 2

In [34]:
print(len(toronto_merged.loc[toronto_merged['Cluster Labels'] == 1]))
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

13


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,1.0,Pizza Place,Coffee Shop,Portuguese Restaurant,Hockey Arena,Intersection,Escape Room,Ethiopian Restaurant,Electronics Store,Event Space,Eastern European Restaurant
6,Scarborough,1.0,Print Shop,Fast Food Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Escape Room,Electronics Store,Eastern European Restaurant,Dim Sum Restaurant,Drugstore
8,East York,1.0,Pizza Place,Gastropub,Pharmacy,Gym / Fitness Center,Café,Flea Market,Bank,Intersection,Athletics & Sports,Pet Store
29,East York,1.0,Indian Restaurant,Sandwich Place,Yoga Studio,Bank,Gym,Gas Station,Intersection,Fast Food Restaurant,Park,Pharmacy
63,York,1.0,Pizza Place,Convenience Store,Bus Line,Brewery,Ethiopian Restaurant,Escape Room,Event Space,Electronics Store,Diner,Eastern European Restaurant
65,Scarborough,1.0,Indian Restaurant,Pet Store,Vietnamese Restaurant,Chinese Restaurant,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
70,Etobicoke,1.0,Pizza Place,Coffee Shop,Sandwich Place,Discount Store,Chinese Restaurant,Middle Eastern Restaurant,Intersection,Drugstore,Donut Shop,Dim Sum Restaurant
72,North York,1.0,Pizza Place,Coffee Shop,Discount Store,Pharmacy,Grocery Store,Comfort Food Restaurant,Dim Sum Restaurant,Event Space,Ethiopian Restaurant,Escape Room
77,Etobicoke,1.0,Pizza Place,Sandwich Place,Mobile Phone Shop,Bus Line,Doner Restaurant,Discount Store,Distribution Center,Dog Run,Donut Shop,College Stadium
82,Scarborough,1.0,Pizza Place,Pharmacy,Gas Station,Noodle House,Chinese Restaurant,Italian Restaurant,Bank,Intersection,Fried Chicken Joint,Thai Restaurant


### Cluster 3

In [36]:
print(len(toronto_merged.loc[toronto_merged['Cluster Labels'] == 2]))
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

3


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
35,East YorkEast Toronto,2.0,Park,Convenience Store,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Electronics Store
64,York,2.0,Convenience Store,Yoga Studio,Eastern European Restaurant,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Electronics Store
66,North York,2.0,Park,Convenience Store,Yoga Studio,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Electronics Store


### Cluster 4

In [37]:
print(len(toronto_merged.loc[toronto_merged['Cluster Labels'] == 3]))
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

2


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
57,North York,3.0,Baseball Field,Yoga Studio,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant,Field
101,Etobicoke,3.0,Baseball Field,Yoga Studio,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant,Field


##### 

### Cluster 5

In [38]:
print(len(toronto_merged.loc[toronto_merged['Cluster Labels'] == 4]))
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

73


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,4.0,Coffee Shop,Bakery,Park,Pub,Café,Theater,Breakfast Spot,Bank,Mexican Restaurant,French Restaurant
3,North York,4.0,Clothing Store,Furniture / Home Store,Accessories Store,Coffee Shop,Boutique,Miscellaneous Shop,Event Space,Vietnamese Restaurant,Convenience Store,Discount Store
4,Queen's Park,4.0,Coffee Shop,Sushi Restaurant,Yoga Studio,Burrito Place,Bar,Italian Restaurant,Japanese Restaurant,Beer Bar,Smoothie Shop,Sandwich Place
7,North York,4.0,Café,Gym,Caribbean Restaurant,Japanese Restaurant,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant
9,Downtown Toronto,4.0,Coffee Shop,Clothing Store,Hotel,Café,Sandwich Place,Italian Restaurant,Japanese Restaurant,Middle Eastern Restaurant,Cosmetics Shop,Pizza Place
...,...,...,...,...,...,...,...,...,...,...,...,...
96,Downtown Toronto,4.0,Coffee Shop,Italian Restaurant,Restaurant,Café,Pub,Pizza Place,Bakery,Plaza,Pet Store,Pharmacy
97,Downtown Toronto,4.0,Coffee Shop,Café,Hotel,Gym,Deli / Bodega,Japanese Restaurant,Sushi Restaurant,Asian Restaurant,Restaurant,Pizza Place
99,Downtown Toronto,4.0,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Gay Bar,Restaurant,Yoga Studio,Bubble Tea Shop,Pub,Café,Men's Store
100,East TorontoBusiness reply mail Processing Cen...,4.0,Light Rail Station,Yoga Studio,Garden Center,Comic Shop,Gym / Fitness Center,Pizza Place,Restaurant,Butcher,Burrito Place,Skate Park


The end!