<h1>Segmenting and Clustering Neighborhoods in Toronto</h1>
<h4>By: Alexander Stetzer</h4>

In [16]:
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import requests 
from bs4 import BeautifulSoup

<h2>Part One:Toronto Neighborhoods and Postal Codes DataFrame</h2>

In [17]:
#url of the postal codes for toronto and processing of url to useable html
url = 'http://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html = requests.get(url)
d = html.text

#Creation of the BeautifulSoup object
soup = BeautifulSoup(d, 'html5lib')


In [18]:
#Empty list used to gather all of the Postal Data
toronto_list = []
postal_table = soup.find('table')

#Loop to go through the table data and extract the Postal Codes, Boroughs, and Neighborhoods of Toronto
for item in postal_table.findAll('td'):
    code = {}
    if item.span.text == 'Not assigned': #used to remove all of the postal codes with no Borough assignment
        pass
    else:
        code['Postal Code'] = item.p.text[:3]
        code['Borough'] = item.span.text.split('(')[0]
        code['Neighborhood'] = item.span.text.split('(')[1].replace(' /', ',').replace(')','').strip(' ')
        toronto_list.append(code)
    
#Creation of the DataFrame from the list created using the previous loop
toronto_df = pd.DataFrame(toronto_list)
toronto_df['Borough']=toronto_df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

print(toronto_df.head())

  Postal Code           Borough                      Neighborhood
0         M3A        North York                         Parkwoods
1         M4A        North York                  Victoria Village
2         M5A  Downtown Toronto         Regent Park, Harbourfront
3         M6A        North York  Lawrence Manor, Lawrence Heights
4         M7A      Queen's Park     Ontario Provincial Government


In [28]:
print(toronto_df.dtypes)

Postal Code     object
Borough         object
Neighborhood    object
dtype: object


<h2>Part Two: Latitude and Longitude of Neighborhoods</h2>

In [20]:
#!pip install geocoder
#import geocoder
import pandas as pd

In [21]:
file = 'Geospatial_Coordinates.csv'

In [34]:
geo_df = pd.read_csv(file)
print(geo_df.head())

  Postal Code   Latitude  Longitude
0         M1B  43.806686 -79.194353
1         M1C  43.784535 -79.160497
2         M1E  43.763573 -79.188711
3         M1G  43.770992 -79.216917
4         M1H  43.773136 -79.239476


In [36]:
torontoGeo_df = toronto_df.merge(geo_df, on = 'Postal Code', how = 'left')
print(torontoGeo_df.head())

  Postal Code           Borough                      Neighborhood   Latitude  \
0         M3A        North York                         Parkwoods  43.753259   
1         M4A        North York                  Victoria Village  43.725882   
2         M5A  Downtown Toronto         Regent Park, Harbourfront  43.654260   
3         M6A        North York  Lawrence Manor, Lawrence Heights  43.718518   
4         M7A      Queen's Park     Ontario Provincial Government  43.662301   

   Longitude  
0 -79.329656  
1 -79.315572  
2 -79.360636  
3 -79.464763  
4 -79.389494  


In [44]:
print(torontoGeo_df.groupby('Borough').head())

    Postal Code                 Borough  \
0           M3A              North York   
1           M4A              North York   
2           M5A        Downtown Toronto   
3           M6A              North York   
4           M7A            Queen's Park   
5           M9A               Etobicoke   
6           M1B             Scarborough   
7           M3B              North York   
8           M4B               East York   
9           M5B        Downtown Toronto   
10          M6B              North York   
11          M9B               Etobicoke   
12          M1C             Scarborough   
14          M4C               East York   
15          M5C        Downtown Toronto   
16          M6C                    York   
17          M9C               Etobicoke   
18          M1E             Scarborough   
19          M4E            East Toronto   
20          M5E        Downtown Toronto   
21          M6E                    York   
22          M1G             Scarborough   
23         

<h2>Part Three: Clustering of Neighborhoods</h2>