# Part 1:

## 1- Importing the necessary libraries

In [30]:
import pandas as pd
import numpy as np

## 2- Reading the HTML table from the Wikipedia website to a pandas dataframe

In [31]:
url_wiki = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
column_names = ['PostalCode', 'Borough', 'Neighborhood'] 

# Initiating the dataframe
postal_code_df = pd.DataFrame(columns=column_names)

# Reading the HTML table from wikipedia website
postal_data = pd.read_html(url_wiki)

## 3- Creating the dataframe

In [32]:
# Ignoring the rows with "Not assigned" values
postal_data[0]=postal_data[0][~postal_data[0]['Borough'].isin(['Not assigned'])]
postal_code_df = postal_data[0]

#Grouping the dataframe according to the instructions
postal_code_df = postal_code_df.groupby(['Postcode', 'Borough'], sort=False).agg( ','.join)
postal_code_df = postal_code_df.groupby(['Postcode','Borough'])['Neighbourhood'].apply(list)
postal_code_df = postal_code_df.sample(frac=1).reset_index()
postal_code_df['Neighbourhood']= postal_code_df['Neighbourhood'].str.join(', ')
postal_code_df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M8W,Etobicoke,"Alderwood,Long Branch"
1,M5W,Downtown Toronto,Stn A PO Boxes 25 The Esplanade
2,M3J,North York,"Northwood Park,York University"
3,M4C,East York,Woodbine Heights
4,M3N,North York,Downsview Northwest
5,M4K,East Toronto,"The Danforth West,Riverdale"
6,M5X,Downtown Toronto,"First Canadian Place,Underground city"
7,M4A,North York,Victoria Village
8,M2P,North York,York Mills West
9,M5J,Downtown Toronto,"Harbourfront East,Toronto Islands,Union Station"


## 4- printing the size of the dataframe

In [33]:
postal_code_df.shape

(103, 3)

# Part 2:

## 5- Reading the Geospatial data csv file:

In [34]:
postal_new_df = pd.read_csv('http://cocl.us/Geospatial_data')
postal_new_df.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## 6- Creating the new dataframe

In [35]:
column_names = ['PostalCode', 'Borough', 'Neighbourhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
Toronto_df = pd.DataFrame(columns=column_names)

In [36]:
Toronto_df=pd.merge(postal_code_df,postal_new_df,left_on='Postcode',right_on='Postal Code', left_index=False, right_index=False)
Toronto_df.drop('Postal Code', axis =1, inplace=True)
Toronto_df.rename(columns = {'Postcode':'PostalCode'}, inplace = True) 

In [10]:
Toronto_df

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M6L,North York,"Downsview,North Park,Upwood Park",43.713756,-79.490074
1,M2N,North York,Willowdale South,43.770120,-79.408493
2,M5R,Central Toronto,"The Annex,North Midtown,Yorkville",43.672710,-79.405678
3,M4H,East York,Thorncliffe Park,43.705369,-79.349372
4,M6N,York,"The Junction North,Runnymede",43.673185,-79.487262
5,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
6,M4E,East Toronto,The Beaches,43.676357,-79.293031
7,M6H,West Toronto,"Dovercourt Village,Dufferin",43.669005,-79.442259
8,M3H,North York,"Bathurst Manor,Downsview North,Wilson Heights",43.754328,-79.442259
9,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242


# Part 3:

## 1- Generating the map:
### Installing folium

In [37]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

Solving environment: done

# All requested packages already installed.



## Plotting the map

In [38]:
Toronto_map = folium.Map(location=[43.651070,-79.347015],zoom_start=10)

for lat,lng,borough,neighbourhood in zip(Toronto_df['Latitude'],Toronto_df['Longitude'],Toronto_df['Borough'],Toronto_df['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat,lng],
    radius=5,
    popup=label,
    color='blue',
    fill=True,
    fill_color='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(Toronto_map)
Toronto_map

## Clusetering neighborhoods using kmeans:

In [39]:
# Kmeans:
from sklearn.cluster import KMeans
from matplotlib import pyplot as plt
from sklearn.datasets.samples_generator import make_blobs
import matplotlib.cm as cm
import matplotlib.colors as colors
k=5
Toronto_clust = Toronto_df.drop(['PostalCode','Borough','Neighbourhood'],1)
kmeans = KMeans(n_clusters = k,random_state=0).fit(Toronto_clust)
kmeans.labels_
Toronto_df.insert(0, 'Cluster Labels', kmeans.labels_)

In [40]:
Toronto_df

Unnamed: 0,Cluster Labels,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,1,M8W,Etobicoke,"Alderwood,Long Branch",43.602414,-79.543484
1,0,M5W,Downtown Toronto,Stn A PO Boxes 25 The Esplanade,43.646435,-79.374846
2,4,M3J,North York,"Northwood Park,York University",43.767980,-79.487262
3,3,M4C,East York,Woodbine Heights,43.695344,-79.318389
4,1,M3N,North York,Downsview Northwest,43.761631,-79.520999
5,3,M4K,East Toronto,"The Danforth West,Riverdale",43.679557,-79.352188
6,0,M5X,Downtown Toronto,"First Canadian Place,Underground city",43.648429,-79.382280
7,3,M4A,North York,Victoria Village,43.725882,-79.315572
8,4,M2P,North York,York Mills West,43.752758,-79.400049
9,0,M5J,Downtown Toronto,"Harbourfront East,Toronto Islands,Union Station",43.640816,-79.381752


## Creating the map

In [41]:
map_clust = folium.Map(location=[43.651070,-79.347015],zoom_start=10)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, neighbourhood, cluster in zip(Toronto_df['Latitude'], Toronto_df['Longitude'], Toronto_df['Neighbourhood'], Toronto_df['Cluster Labels']):
    label = folium.Popup(' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clust)
       
map_clust