In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans

### Read the data from the Wikipidea page and store in a dataframe

In [2]:
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M',header=0)[0] #read wikipidea entry for postal codes

In [3]:
df # view the data

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


### Updating the columns on the dataframe 

In [4]:
df.columns = ['PostalCode','Borough','Neighborhood'] # update columns of dataframe

In [5]:
df # check the dataframe

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


### Drop Borough's which have not been assigned

In [6]:
#dropping Borough's which have not been assigned
indexNames = df[ df['Borough'] == 'Not assigned' ].index
df.drop(indexNames , inplace=True)

In [7]:
df #view tha data

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


### Set the not assigned neighborhood to the borough and group neighborhoods with the same postal code

In [8]:
df['Neighborhood'] .loc[df['Neighborhood'] == 'Not assigned'] = df['Borough'] # Set the not assigned in neighborhood to borough
df_group = df.groupby(['PostalCode','Borough']).agg( ','.join) # group neighborhoods with the same postal code

In [9]:
df_group

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighborhood
PostalCode,Borough,Unnamed: 2_level_1
M1B,Scarborough,"Rouge,Malvern"
M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
M1E,Scarborough,"Guildwood,Morningside,West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae
M1J,Scarborough,Scarborough Village
M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park"
M1L,Scarborough,"Clairlea,Golden Mile,Oakridge"
M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West"
M1N,Scarborough,"Birch Cliff,Cliffside West"


In [10]:
df_group.shape

(103, 1)

### Import the Geospatial data from a CSV file

In [11]:
!wget -q -O 'Geospatial.csv' http://cocl.us/Geospatial_data

### Store the CSV in a pandas dataframe and rename the columns

In [15]:
geo_data = pd.read_csv('Geospatial.csv')
geo_data.columns = ['PostalCode','Latitude','Longitude']

In [16]:
geo_data

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


### Merge the two dataframes into one based on the postal code

In [27]:
pd.merge(geo_data, df_group, on = geo_data['PostalCode'],how='outer')

Unnamed: 0,PostalCode,Latitude,Longitude,Neighborhood
0,M1B,43.806686,-79.194353,"Rouge,Malvern"
1,M1C,43.784535,-79.160497,"Highland Creek,Rouge Hill,Port Union"
2,M1E,43.763573,-79.188711,"Guildwood,Morningside,West Hill"
3,M1G,43.770992,-79.216917,Woburn
4,M1H,43.773136,-79.239476,Cedarbrae
5,M1J,43.744734,-79.239476,Scarborough Village
6,M1K,43.727929,-79.262029,"East Birchmount Park,Ionview,Kennedy Park"
7,M1L,43.711112,-79.284577,"Clairlea,Golden Mile,Oakridge"
8,M1M,43.716316,-79.239476,"Cliffcrest,Cliffside,Scarborough Village West"
9,M1N,43.692657,-79.264848,"Birch Cliff,Cliffside West"
