## Libraries

In [64]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from sklearn.cluster import KMeans 
import geocoder
import folium
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors


## Define url and objects

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page=requests.get(url)
soup = BeautifulSoup(page.text, 'html.parser')


## Find the table

In [13]:
table = soup.find('table',{'class','wikitable sortable'})

## Extract necessary info from the table

In [4]:
Postal=[]
Borough=[]
Neighborhood=[]
count=0
for data in table.find_all('td'):
    if count%3==0:
        Postal.append(data.text)
    elif count%3==1:
        Borough.append(data.text)
    else:
        Neighborhood.append(data.text[:-1])
    count+=1

## Create DataFrame from the the extracted data

In [5]:
column_names = ['PostalCode', 'Borough', 'Neighborhood']
FSA = pd.DataFrame(list(zip(Postal,Borough,Neighborhood)), columns=column_names)
FSA = FSA[FSA['Borough']!='Not assigned']
FSA

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
9,M9A,Queen's Park,Not assigned
10,M1B,Scarborough,Rouge
11,M1B,Scarborough,Malvern
13,M3B,North York,Don Mills North


## Create new DataFrame To Meet the Criteria Given

In [6]:
postal_update=[]
concateNeigh=[]
borough_update=[]
for P in FSA['PostalCode'].unique():
    newNeighbor=''
    for string in FSA[FSA['PostalCode']==P]['Neighborhood']:
        newNeighbor += string + ', '
    
    postal_update.append(P)
    borough_update.append(FSA[FSA['PostalCode']==P]['Borough'].unique()[0])
    if newNeighbor[:-2] == 'Not assigned':
        concateNeigh.append(borough_update[-1])
    else:
        concateNeigh.append(newNeighbor[:-2])
    

In [7]:
column_names = ['PostalCode', 'Borough', 'Neighborhood']
FSA = pd.DataFrame(list(zip(postal_update,borough_update,concateNeigh)), columns=column_names)
FSA

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park
5,M9A,Queen's Park,Queen's Park
6,M1B,Scarborough,"Rouge, Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


## Dimensions

In [8]:
FSA.shape

(103, 3)

In [9]:
urlgeo='http://cocl.us/Geospatial_data'

In [10]:
geo=pd.read_csv(urlgeo)

In [11]:
newFSA=FSA.merge(geo, how='left', left_on='PostalCode', right_on='Postal Code')[['PostalCode','Borough','Neighborhood','Latitude','Longitude']]

In [16]:
newFSA

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.654260,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
5,M9A,Queen's Park,Queen's Park,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


## Clustering

In [18]:
TorontoFSA=newFSA[newFSA['Borough'].str.contains('Toronto')]

In [19]:
TorontoFSA

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
25,M6G,Downtown Toronto,Christie,43.669542,-79.422564
30,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568
31,M6H,West Toronto,"Dovercourt Village, Dufferin",43.669005,-79.442259


## Preprocess Clustering

In [39]:
from sklearn.preprocessing import StandardScaler

TorontoFSA_norm = StandardScaler().fit_transform((TorontoFSA[['Latitude','Longitude']]))
TorontoFSA_norm

array([[-0.55554658,  0.79089146],
       [-0.20855993,  0.01026436],
       [-0.43033262,  0.29583074],
       [-0.67489659,  0.39102765],
       [ 0.39793749,  2.61964756],
       [-0.96499134,  0.44814526],
       [-0.39621906,  0.06737385],
       [ 0.10386009, -0.88430039],
       [-0.71471013,  0.14352435],
       [ 0.08069342, -1.41708063],
       [-1.13564978,  0.21967755],
       [-0.82881755, -0.80817965],
       [ 0.53600121,  1.01941327],
       [-0.86117495,  0.22443577],
       [-1.30688641, -1.03653375],
       [ 0.08040863,  2.00991342],
       [-0.81708966,  0.27203152],
       [-0.32834143,  1.32413968],
       [ 2.62714635,  0.02929996],
       [ 1.92270943, -0.7320562 ],
       [ 1.96828767, -0.00877123],
       [ 1.28638314, -0.57980391],
       [-0.23847079, -2.02583013],
       [ 2.08186867, -0.42754079],
       [ 0.24054304, -0.42754079],
       [-0.78424468, -1.79756801],
       [ 1.60468436,  0.02929996],
       [-0.19155492, -0.27526956],
       [-0.67158707,

In [40]:
kclusters=5
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(TorontoFSA_norm)

In [42]:
TorontoFSA.insert(0, 'Cluster Labels', kmeans.labels_)

In [43]:
TorontoFSA

Unnamed: 0,Cluster Labels,PostalCode,Borough,Neighborhood,Latitude,Longitude
2,0,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
4,4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
9,0,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
15,0,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,2,M4E,East Toronto,The Beaches,43.676357,-79.293031
20,0,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
24,0,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
25,4,M6G,Downtown Toronto,Christie,43.669542,-79.422564
30,0,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568
31,3,M6H,West Toronto,"Dovercourt Village, Dufferin",43.669005,-79.442259


## Create map

In [68]:
# create map

map_clusters = folium.Map(location=[43.6595255,-79.293031], zoom_start=11)
# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(TorontoFSA['Latitude'], TorontoFSA['Longitude'], TorontoFSA['Neighborhood'], TorontoFSA['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)

       
map_clusters