<h1> Scraping Postal Codes Of Toronto City </h1>

<h4>Importing dependencies</h4>

In [38]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
from geopy.geocoders import Nominatim
import folium

<h4>Requesting source page</h4>

In [2]:
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text

<h4>Using BeautifulSoup for scraping the data and geocoder for obtaining coordinates</h4>

In [27]:
soup = BeautifulSoup(source, 'lxml')
table = soup.find('tbody')
postcode = []
borough = []
neighborhood = []
latitudes = []
longitudes = []
for row in table.findAll('tr'):
    temp = []
    for each_data in row.findAll('td'):
        temp.append(each_data.text)
    if len(temp) and temp[1] != 'Not assigned': #Ignoring cells with a borough that is Not assigned
        postcode.append(temp[0])
        borough.append(temp[1])
        temp_2 = temp[2].rstrip('\n')
        if temp_2 == 'Not assigned': #If a cell has a Not assigned neighborhood
            temp2 = temp[1]          #then the neighborhood will be the same as the borough
        neighborhood.append(temp_2)
        
        #lat_lng_coords = None
        #while(lat_lng_coords is None):
            #g = geocoder.google('{}, Toronto, Ontario'.format(temp[0]))
            #lat_lng_coords = g.latlng

        #latitudes.append(lat_lng_coords[0])
        #longitudes.append(lat_lng_coords[1])
data = {'Postal Code': postcode, 'Borough': borough, 'Neighborhood': neighborhood} #The dataframe will consist of three columns:
                                                                                  #PostalCode, Borough, and Neighborhood

<h4>Storing the scraped data in pandas dataframe</h4>

In [28]:
df = pd.DataFrame(data)                                                           
df = df.groupby("Postal Code").agg(lambda x:','.join(set(x)))

<h4>The number of rows of the dataframe</h4>

In [29]:
df.shape

(103, 2)

<h4>Displaying any 10 data from the dataframe</h4>

In [30]:
df.sample(10)

Unnamed: 0_level_0,Borough,Neighborhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1L,Scarborough,"Clairlea,Golden Mile,Oakridge"
M2P,North York,York Mills West
M4R,Central Toronto,North Toronto West
M4M,East Toronto,Studio District
M8Z,Etobicoke,"The Queensway West,Mimico NW,Royal York South ..."
M1E,Scarborough,"Guildwood,Morningside,West Hill"
M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern
M9L,North York,Humber Summit
M4G,East York,Leaside
M5S,Downtown Toronto,"Harbord,University of Toronto"


<h4>Get the latitude and the longitude coordinates of each neighborhood using geospatial data</h4>   
The http://cocl.us/Geospatial_data (csv file) contains geographical coordinates of each postal code of Toronto

In [31]:
df2 = pd.read_csv('Geospatial_Coordinates.csv')
df = pd.merge(df, df2, on = 'Postal Code')

<h4>Examing the resulting dataframe</h4> 

In [32]:
df.sample(10)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
80,M6M,York,"Mount Dennis,Keelesdale,Silverthorn,Del Ray",43.691116,-79.476013
19,M2K,North York,Bayview Village,43.786947,-79.385975
60,M5K,Downtown Toronto,"Toronto Dominion Centre,Design Exchange",43.647177,-79.381576
59,M5J,Downtown Toronto,"Union Station,Toronto Islands,Harbourfront East",43.640816,-79.381752
43,M4M,East Toronto,Studio District,43.659526,-79.340923
51,M4X,Downtown Toronto,"Cabbagetown,St. James Town",43.667967,-79.367675
102,M9W,Etobicoke,Northwest,43.706748,-79.594054
62,M5M,North York,"Bedford Park,Lawrence Manor East",43.733283,-79.41975
77,M6J,West Toronto,"Little Portugal,Trinity",43.647927,-79.41975
78,M6K,West Toronto,"Parkdale Village,Exhibition Place,Brockton",43.636847,-79.428191


In [35]:
df.groupby('Borough')['Neighborhood'].count()

Borough
Central Toronto      9
Downtown Toronto    18
East Toronto         5
East York            5
Etobicoke           12
Mississauga          1
North York          24
Queen's Park         1
Scarborough         17
West Toronto         6
York                 5
Name: Neighborhood, dtype: int64

In [25]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df['Borough'].unique()),
        df.shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighborhoods.


In [33]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


<h4>Create a map of Toronto with neighborhoods superimposed on top</h4>

In [49]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        fill=True,
        fill_color='##3186cc',
        fill_opacity=0.5,
        parse_html=False).add_to(map_toronto)  
    
map_toronto