Exploring Neighborhoods in Toronto

In [1]:
import urllib.request

Scraping website for getting Postal codes and Neighborhoods of Canada

In [2]:
wiki = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

Loding the webpage

In [3]:
page = urllib.request.urlopen(wiki)

BeatifulSoup package is used for scraping the website

In [4]:
from bs4 import BeautifulSoup

In [5]:
soup = BeautifulSoup(page)

Print the scraped page 


In [None]:
print(soup.prettify)

In [6]:
soup.title

<title>List of postal codes of Canada: M - Wikipedia</title>

In [7]:
table=soup.find_all('table')
table

In [9]:
A=[]
B=[]
C=[]
for row in soup.find_all('tr'):
    cells = row.find_all('td')
    #states=row.find_all('th') #To store second column data
    #print(len(cells))
    if(len(cells)==3):
        A.append(cells[0].find(text=True))
        B.append(cells[1].find(text=True))
        C.append(cells[2].find(text=True))
    

Making DataFrame out of the scraped page and data preprocessing

In [10]:
import pandas as pd
df=pd.DataFrame(A,columns=['PostalCode'])
df['Borough']=B
df['Neighborhood']=C

In [11]:
df = df[df['Borough']!='Not assigned']
df['Neighborhood'] = df['Neighborhood'].str.strip()

Merging the Neighborhood with same postal code

In [12]:
df=df[['PostalCode','Borough','Neighborhood']].groupby(['PostalCode','Borough'])['Neighborhood'].apply(lambda x:','.join(x)).reset_index()

In [13]:
df[df['Neighborhood'].str.contains('Not assigned')]

Unnamed: 0,PostalCode,Borough,Neighborhood
85,M7A,Queen's Park,Not assigned


Assigning Borough where Neighborhood is not assigned

In [14]:
df.loc[df['Neighborhood']=='Not assigned','Neighborhood'] = df['Borough']

Print extracted structured data

In [15]:
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park"
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge"
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff,Cliffside West"


In [16]:
df.shape

(103, 3)

Assignment 2: Getting Geo Coordinates of the Postal codes

In [34]:
postal_code = df['PostalCode']

Get coordites of postal codes

In [None]:
import geocoder # import geocoder

# initialize your variable to None
lat_lng_coords = None

# loop until you get the coordinates
while(lat_lng_coords is None):
  g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
  lat_lng_coords = g.latlng

latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]

In [19]:
geo_coords = pd.read_csv('http://cocl.us/Geospatial_data')

In [23]:
geo_coords.head()

Unnamed: 0_level_0,Postal Code,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
M1B,M1B,43.806686,-79.194353
M1C,M1C,43.784535,-79.160497
M1E,M1E,43.763573,-79.188711
M1G,M1G,43.770992,-79.216917
M1H,M1H,43.773136,-79.239476


In [21]:
geo_coords.index = geo_coords['Postal Code']

In [22]:
df.index = df['PostalCode']

Assign coordinates to corresponding Postal codes in the prepared data frame

In [24]:
df['Latitude'] = geo_coords['Latitude']
df['Longitude'] = geo_coords['Longitude']

Filter the records with contain "Toronto" in "Borough"

In [25]:
df_toronto = df[df['Borough'].str.contains('Toronto')]

In [27]:
df_toronto = df_toronto.reset_index(drop=True)
df_toronto.head(5)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West,Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West,India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


Assignment 3 : Visualizing the Neighborhoods

Latitude and Logitude of Toronto

In [28]:
latitude = 43.70011
longitude = -79.4163

Importing Folium library for generating maps

In [30]:
import folium # plotting library

visualizing the Neighbprhood of Toronto

In [33]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=12) 


folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    popup='Toronto',
    fill=True,
    color='red',
    fill_color='red',
    fill_opacity=0.6
    ).add_to(venues_map)



for lat, lng, label in zip(df_toronto.Latitude, df_toronto.Longitude, df_toronto.Neighborhood):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        fill=True,
        color='blue',
        fill_color='blue',
        fill_opacity=0.6
        ).add_to(venues_map)

# display map
venues_map