# Part 1

#### Import pandas and read in table.

In [1]:
import pandas as pd

In [2]:
lst = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
postal = lst[0]

In [3]:
postal.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


#### Cleaning the postal code dataframe.

In [4]:
postal.drop(postal[postal['Borough'] == 'Not assigned'].index, inplace=True)
postal.replace(postal[postal['Neighborhood']=='Not assigned'], postal['Borough'], inplace=True)
postal.replace(regex=[r' /'], value=',', inplace=True)
postal.reset_index(drop = True, inplace=True)
postal.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [5]:
postal.shape

(103, 3)

# Part 2

#### Reading geospatial data.

In [6]:
geo = pd.read_csv('http://cocl.us/Geospatial_data')
geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### Cleaning up the data and combining the geospatial dataframe with the postal code dataframe.

In [7]:
geo.rename(columns={"Postal Code": "Postal code"}, inplace=True)
geo_postal = postal.merge(geo, how='inner')
geo_postal.reset_index(drop = True, inplace = True)

In [8]:
geo_postal.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


# Part 3

#### Import libraries.

In [9]:
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

In [10]:
#!conda install -c conda-forge folium=0.5.0 --yes
import folium

#### Pull data on Toronto from geo_postal dataframe and display on a map.

In [11]:
toronto_geo_postal = geo_postal[geo_postal['Borough'].str.contains("Toronto")].reset_index(drop=True)
toronto_geo_postal.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [12]:
map_toronto = folium.Map(location=[43.6532, -79.3832], zoom_start=11)

for borough, neighborhood, lat, lng in zip(toronto_geo_postal['Borough'], toronto_geo_postal['Neighborhood'], toronto_geo_postal['Latitude'], toronto_geo_postal['Longitude']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Clustering data.

In [13]:
kclusters = 4
toronto_geo_postal_clustering = toronto_geo_postal.drop(['Postal code','Borough','Neighborhood'], 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_geo_postal_clustering)
kmeans.labels_[0:10]

array([1, 1, 1, 1, 3, 1, 1, 2, 1, 2], dtype=int32)

In [14]:
toronto_geo_postal_grouped = toronto_geo_postal
toronto_geo_postal_grouped.insert(0, 'Cluster Labels', kmeans.labels_)

#### Displaying clustered data.

In [15]:
map_clusters = folium.Map(location=[43.6532, -79.3832], zoom_start=11)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for latitude, longitude, neighborhood, cluster in zip(toronto_geo_postal_grouped['Latitude'], toronto_geo_postal_grouped['Longitude'], toronto_geo_postal_grouped['Neighborhood'], toronto_geo_postal_grouped['Cluster Labels']):
    label = folium.Popup(str(neighborhood) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### Observations:

##### Cluster 1:

In [16]:
toronto_geo_postal_grouped.loc[toronto_geo_postal_grouped['Cluster Labels'] == 0, toronto_geo_postal_grouped.columns[[1, 2, 3]]]

Unnamed: 0,Postal code,Borough,Neighborhood
18,M4N,Central Toronto,Lawrence Park
19,M5N,Central Toronto,Roselawn
20,M4P,Central Toronto,Davisville North
21,M5P,Central Toronto,Forest Hill North & West
23,M4R,Central Toronto,North Toronto West
26,M4S,Central Toronto,Davisville
29,M4T,Central Toronto,"Moore Park, Summerhill East"
31,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest..."


##### Cluster 2:

In [17]:
toronto_geo_postal_grouped.loc[toronto_geo_postal_grouped['Cluster Labels'] == 1, toronto_geo_postal_grouped.columns[[1, 2, 3]]]

Unnamed: 0,Postal code,Borough,Neighborhood
0,M5A,Downtown Toronto,"Regent Park, Harbourfront"
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
2,M5B,Downtown Toronto,"Garden District, Ryerson"
3,M5C,Downtown Toronto,St. James Town
5,M5E,Downtown Toronto,Berczy Park
6,M5G,Downtown Toronto,Central Bay Street
8,M5H,Downtown Toronto,"Richmond, Adelaide, King"
10,M5J,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands"
13,M5K,Downtown Toronto,"Toronto Dominion Centre, Design Exchange"
16,M5L,Downtown Toronto,"Commerce Court, Victoria Hotel"


##### Cluster 3:

In [18]:
toronto_geo_postal_grouped.loc[toronto_geo_postal_grouped['Cluster Labels'] == 2, toronto_geo_postal_grouped.columns[[1, 2, 3]]]

Unnamed: 0,Postal code,Borough,Neighborhood
7,M6G,Downtown Toronto,Christie
9,M6H,West Toronto,"Dufferin, Dovercourt Village"
11,M6J,West Toronto,"Little Portugal, Trinity"
14,M6K,West Toronto,"Brockton, Parkdale Village, Exhibition Place"
22,M6P,West Toronto,"High Park, The Junction South"
25,M6R,West Toronto,"Parkdale, Roncesvalles"
28,M6S,West Toronto,"Runnymede, Swansea"


##### Cluster 4:

In [19]:
toronto_geo_postal_grouped.loc[toronto_geo_postal_grouped['Cluster Labels'] == 3, toronto_geo_postal_grouped.columns[[1, 2, 3]]]

Unnamed: 0,Postal code,Borough,Neighborhood
4,M4E,East Toronto,The Beaches
12,M4K,East Toronto,"The Danforth West, Riverdale"
15,M4L,East Toronto,"India Bazaar, The Beaches West"
17,M4M,East Toronto,Studio District
38,M7Y,East Toronto,Business reply mail Processing CentrE


The kmeans clustering has seperated the neighborhoods in Toronto into their respective boroughs, for the most part. Postal codes M5R and M6G were grouped into different boroughs than one might expect.