# Segmenting and clustering neighbourhoods in the city of Torronto, Canada

# PART I

In [41]:
import pandas as pd

### 1. Converting the data into pandas dataframe

In [42]:
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]

In [43]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### 2. Dropping the cells where Borough is not assigned

In [44]:

df.drop(df[df["Borough"]=="Not assigned"].index, axis=0, inplace=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### 3. Combining the rows with same postal code

In [45]:
df_new=df.groupby('Postal Code')['Neighbourhood'].apply(','.join).reset_index()
df_new.head()

Unnamed: 0,Postal Code,Neighbourhood
0,M1B,"Malvern, Rouge"
1,M1C,"Rouge Hill, Port Union, Highland Creek"
2,M1E,"Guildwood, Morningside, West Hill"
3,M1G,Woburn
4,M1H,Cedarbrae


In [46]:
PostalCode_Canada = pd.merge(df, df_new, on='Postal Code', how='inner')

PostalCode_Canada.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood_x,Neighbourhood_y
0,M3A,North York,Parkwoods,Parkwoods
1,M4A,North York,Victoria Village,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront","Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights","Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government","Queen's Park, Ontario Provincial Government"


In [47]:
PostalCode_Canada.drop(['Neighbourhood_y'], axis=1, inplace=True)
PostalCode_Canada.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood_x
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [48]:
PostalCode_Canada.rename(columns={'Neighbourhood_x':'Neighbourhood'}, inplace=True)
PostalCode_Canada.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### 4. Replacing the 'Not assigned' neighbourhood with the corressponding borough

In [49]:
import numpy as np

In [50]:
PostalCode_Canada['Neighbourhood'] = np.where(PostalCode_Canada['Neighbourhood'] == 'Not assigned', PostalCode_Canada['Borough'], PostalCode_Canada['Neighbourhood'])
PostalCode_Canada.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### 5. Shape of the final dataset

In [51]:
PostalCode_Canada.shape

(103, 3)

# PART II

In [52]:
PostalCode_Canada.sort_values(by= ['Postal Code'], inplace= True)
PostalCode_Canada.head(20)

Unnamed: 0,Postal Code,Borough,Neighbourhood
6,M1B,Scarborough,"Malvern, Rouge"
12,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
18,M1E,Scarborough,"Guildwood, Morningside, West Hill"
22,M1G,Scarborough,Woburn
26,M1H,Scarborough,Cedarbrae
32,M1J,Scarborough,Scarborough Village
38,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
44,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
51,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
58,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [53]:
Latitude = [
43.8066863,
43.7845351,
43.7635726,
43.7709921,
43.773136,
43.7447342,
43.7279292,
43.7111117,
43.716316,
43.692657,
43.7574096,
43.7500715,
43.7942003,
43.7816375,
43.8152522,
43.7995252,
43.8361247,
43.8037622,
43.7785175,
43.7869473,
43.7574902,
43.789053,
43.7701199,
43.7527583,
43.7827364,
43.7532586,
43.7459058,
43.7258997,
43.7543283,
43.7679803,
43.7374732,
43.7390146,
43.7284964,
43.7616313,
43.7258823,
43.7063972,
43.6953439,
43.6763574,
43.7090604,
43.7053689,
43.685347,
43.6795571,
43.6689985,
43.6595255,
43.7280205,
43.7127511,
43.7153834,
43.7043244,
43.6895743,
43.6864123,
43.6795626,
43.667967,
43.6658599,
43.6542599,
43.6571618,
43.6514939,
43.6447708,
43.6579524,
43.6505712,
43.6408157,
43.6471768,
43.6481985,
43.7332825,
43.7116948,
43.6969476,
43.6727097,
43.6626956,
43.6532057,
43.6289467,
43.6464352,
43.6484292,
43.718518,
43.709577,
43.6937813,
43.6890256,
43.669542,
43.6690051,
43.6479267,
43.6368472,
43.7137562,
43.6911158,
43.6731853,
43.6616083,
43.6489597,
43.6515706,
43.6623015,
43.6369656,
43.6627439,
43.6056466,
43.6024137,
43.6536536,
43.6362579,
43.6288408,
43.6678556,
43.6509432,
43.6435152,
43.7563033,
43.7247659,
43.706876,
43.696319,
43.6889054,
43.7394164,
43.7067483,
]

In [54]:
PostalCode_Canada['Latitude'] = Latitude
PostalCode_Canada.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude
6,M1B,Scarborough,"Malvern, Rouge",43.806686
12,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535
18,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573
22,M1G,Scarborough,Woburn,43.770992
26,M1H,Scarborough,Cedarbrae,43.773136


In [55]:
Longitude = [
-79.1943534,
-79.1604971,
-79.1887115,
-79.2169174,
-79.2394761,
-79.2394761,
-79.2620294,
-79.2845772,
-79.2394761,
-79.2648481,
-79.273304,
-79.2958491,
-79.2620294,
-79.3043021,
-79.2845772,
-79.3183887,
-79.2056361,
-79.3634517,
-79.3465557,
-79.385975,
-79.3747141,
-79.4084928,
-79.4084928,
-79.4000493,
-79.4422593,
-79.3296565,
-79.352188,
-79.340923,
-79.4422593,
-79.4872619,
-79.4647633,
-79.5069436,
-79.4956974,
-79.5209994,
-79.3155716,
-79.309937,
-79.3183887,
-79.2930312,
-79.3634517,
-79.3493719,
-79.3381065,
-79.352188,
-79.3155716,
-79.340923,
-79.3887901,
-79.3901975,
-79.4056784,
-79.3887901,
-79.3831599,
-79.4000493,
-79.3775294,
-79.3676753,
-79.3831599,
-79.3606359,
-79.3789371,
-79.3754179,
-79.3733064,
-79.3873826,
-79.3845675,
-79.3817523,
-79.3815764,
-79.3798169,
-79.4197497,
-79.4169356,
-79.4113072,
-79.4056784,
-79.4000493,
-79.4000493,
-79.3944199,
-79.374846,
-79.3822802,
-79.4647633,
-79.4450726,
-79.4281914,
-79.453512,
-79.4225637,
-79.4422593,
-79.4197497,
-79.4281914,
-79.4900738,
-79.4760133,
-79.4872619,
-79.4647633,
-79.456325,
-79.4844499,
-79.3894938,
-79.615819,
-79.321558,
-79.5013207,
-79.5434841,
-79.5069436,
-79.4985091,
-79.5209994,
-79.5322424,
-79.5547244,
-79.5772008,
-79.5659633,
-79.5322424,
-79.5181884,
-79.5322424,
-79.5547244,
-79.5884369,
-79.5940544
]

In [56]:
PostalCode_Canada['Longitude'] = Longitude
PostalCode_Canada.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
12,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
18,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
22,M1G,Scarborough,Woburn,43.770992,-79.216917
26,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [59]:
PostalCode_Canada.reset_index(drop=True, inplace=True)
PostalCode_Canada.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
