### Import Libraries

In [2]:
import pandas as pd
from sklearn.cluster import KMeans

### Get Data From Wikipedia
After getting the data from wikipedia, the columns were consolidated into a single column.

In [3]:
raw_data = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
raw_data = raw_data.unstack().reset_index(drop=True)
raw_data

0                                        M1ANot assigned
1                        M1BScarborough(Malvern / Rouge)
2      M1CScarborough(Rouge Hill / Port Union / Highl...
3      M1EScarborough(Guildwood / Morningside / West ...
4                                 M1GScarborough(Woburn)
                             ...                        
175    M9VEtobicoke(South Steeles / Silverstone / Hum...
176    M9WEtobicokeNorthwest(Clairville / Humberwood ...
177                                      M9XNot assigned
178                                      M9YNot assigned
179                                      M9ZNot assigned
Length: 180, dtype: object

### Create Dataframe

In [4]:
# create dataframe
column_names = ["PostalCode", "Borough", "Neighborhood"]
df = pd.DataFrame(columns=column_names)

for i, row in raw_data.iteritems():
    postal_code = raw_data[i][:3]
    if "(" not in row:
        borough = raw_data[i][3:]
        neighborhood = ""
    else:
        places = raw_data[i][3:].split('(', 1)
        borough = places[0]
        neighborhood = places[1].replace(')', '').replace(' /', ',')
    df = df.append({'PostalCode': postal_code, 'Borough': borough, 'Neighborhood': neighborhood}, ignore_index=True)

# Remove rows with Not assigned Boroughs
df.drop(df[df['Borough']== 'Not assigned'].index , inplace=True)

df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
1,M1B,Scarborough,"Malvern, Rouge"
2,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
3,M1E,Scarborough,"Guildwood, Morningside, West Hill"
4,M1G,Scarborough,Woburn
5,M1H,Scarborough,Cedarbrae


### Get data shape

In [5]:
df.shape

(103, 3)

### Read neighborhood geographical coordinates

In [6]:
coords = pd.read_csv('Geospatial_Coordinates.csv', names=['PostalCode', 'Latitude', 'Longitude'])
coords

Unnamed: 0,PostalCode,Latitude,Longitude
0,Postal Code,Latitude,Longitude
1,M1B,43.8066863,-79.1943534
2,M1C,43.7845351,-79.1604971
3,M1E,43.7635726,-79.1887115
4,M1G,43.7709921,-79.2169174
...,...,...,...
99,M9N,43.706876,-79.5181884
100,M9P,43.696319,-79.5322424
101,M9R,43.6889054,-79.5547244
102,M9V,43.7394164,-79.5884369


### Adding the coordinates to the initial dataframe

In [7]:
final_df = pd.merge(df, coords, on='PostalCode')
final_df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.8066863,-79.1943534
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.7845351,-79.1604971
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.7635726,-79.1887115
3,M1G,Scarborough,Woburn,43.7709921,-79.2169174
4,M1H,Scarborough,Cedarbrae,43.773136,-79.2394761
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.5181884
99,M9P,Etobicoke,Westmount,43.696319,-79.5322424
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.6889054,-79.5547244
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.7394164,-79.5884369


In [8]:
final_df['Borough'].unique()

array(['Scarborough', 'North York', 'East York', 'East Toronto',
       'East YorkEast Toronto', 'Central Toronto', 'Downtown Toronto',
       'Downtown TorontoStn A PO Boxes25 The Esplanade', 'York',
       'West Toronto', "Queen's Park / Ontario Provincial Government",
       'MississaugaCanada Post Gateway Processing Centre',
       'East TorontoBusiness reply mail Processing Centre969 Eastern',
       'Etobicoke', 'EtobicokeNorthwest'], dtype=object)

In [9]:
import folium

In [10]:
toronto_map = folium.Map(location=[43.6532, -79.3832], zoom_start=12)
toronto_map