IMPORTING ALL THE LIBRARIES NECESSARY:

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import geocoder

In [2]:
#!conda install -c conda-forge geocoder --yes

Getting all content from the wikipidea page using requests package:

In [3]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

response=requests.get(url)

Using BeautifulSoup library to read the html content from the wikipedia page:

In [4]:
soup=BeautifulSoup(response.content,'lxml')


Extracting and Exploring the table content from webpage:

In [5]:
table=soup.find_all('table',class_='wikitable sortable')
table=table[0]
type(table)

bs4.element.Tag

Writing all content of the table in a file named stats.txt in a csv format:

In [6]:
with open ('stats.txt', 'w') as r:
    r.write('0,Postcode,Borough,Neighbourhood')
    r.write('\n')
    for row in table.find_all('tr'):
        for cell in row.find_all('td'):
            r.write(',')
            r.write(cell.text)
            
        

Reading the stats.txt file as a csv file import it as a dataframe using Pandas library:

In [7]:
df=pd.read_csv('stats.txt')

PRE-PROCESSING THE DATAFRAME:

In [8]:
df.drop('0',axis=1,inplace=True)
df=df[df['Borough']!='Not assigned']
df.reset_index(drop=True,inplace=True)

In [9]:
df.head(5)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


If more than one neighborhood existed in one postal code area, then merging the two rows:

In [18]:
for i in range(0,400):
    if i<(df.shape[0]-1):
        if df.loc[i,'Postcode']==df.loc[i+1,'Postcode']:
            df.loc[i,'Neighbourhood']=df.loc[i,'Neighbourhood']+', '+df.loc[i+1,'Neighbourhood']
            df.drop(i+1,axis=0,inplace=True)
            df.reset_index(drop=True,inplace=True)

If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough:

In [19]:
for i in range(0,df.shape[0]):
    if df.loc[i,'Neighbourhood']=='Not assigned':
        df.loc[i,'Neighbourhood']=df.loc[i,'Borough']

In [20]:
df

Unnamed: 0,Postcode,Borough,Neighbourhood,latitude,longitude
0,M3A,North York,Parkwoods,0,0
1,M4A,North York,Victoria Village,1,1
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",2,2
3,M6A,North York,"Lawrence Heights, Lawrence Manor",3,3
4,M7A,Queen's Park,Queen's Park,4,4
5,M9A,Etobicoke,Islington Avenue,5,5
6,M1B,Scarborough,"Rouge, Malvern",6,6
7,M3B,North York,Don Mills North,7,7
8,M4B,East York,"Woodbine Gardens, Parkview Hill",8,8
9,M5B,Downtown Toronto,"Ryerson, Garden District",9,9


In [21]:
df.shape

(103, 5)

Creating two more columns for Latitude and Longitude:

In [22]:
df['latitude']=df.index
df['longitude']=df.index
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,latitude,longitude
0,M3A,North York,Parkwoods,0,0
1,M4A,North York,Victoria Village,1,1
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",2,2
3,M6A,North York,"Lawrence Heights, Lawrence Manor",3,3
4,M7A,Queen's Park,Queen's Park,4,4


As the Geocoder package is not working, I used the csv file provided in the assignment:

In [31]:
coord=pd.read_csv('Geospatial_Coordinates.csv')

In [28]:
coord.columns=['Postcode', 'Latitude', 'Longitude']
coord.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Filling the correct values of latitude and longitude in main dataframe i.e. df:

In [29]:
for i in range(0,df.shape[0]):
    for j in range(0,coord.shape[0]):
        if df.loc[i,'Postcode']==coord.loc[j,'Postcode']:
            df.loc[i,'latitude']=coord.loc[j,'Latitude']
            df.loc[i,'longitude']=coord.loc[j,'Longitude']

In [32]:
df

Unnamed: 0,Postcode,Borough,Neighbourhood,latitude,longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.654260,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


In [33]:
import folium

In [34]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df['Borough'].unique()),
        df.shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighborhoods.


Creating a map of Toronto:

In [42]:
latitude=43.741667
longitude=-79.373333
map = folium.Map(location=[latitude, longitude], zoom_start=10)
map

Adding markers of Neighbourhoods in the map of Toronto:

In [44]:
# add markers to map
for lat, lng, borough, neighborhood in zip(df['latitude'], df['longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map)  
    
map

Selecting a Borough=York in Toronto and exploring its neighbourhood:

In [47]:
york = df[df['Borough'] == 'York'].reset_index(drop=True)
york

Unnamed: 0,Postcode,Borough,Neighbourhood,latitude,longitude
0,M6C,York,Humewood-Cedarvale,43.693781,-79.428191
1,M6E,York,Caledonia-Fairbanks,43.689026,-79.453512
2,M6M,York,"Del Ray, Keelesdale, Mount Dennis, Silverthorn",43.691116,-79.476013
3,M6N,York,"The Junction North, Runnymede",43.673185,-79.487262
4,M9N,York,Weston,43.706876,-79.518188


Creating map of York along with markers marking the Neighbourhood of York in Toronto:

In [50]:
map_york = folium.Map(location=[43.6957,-79.4504], zoom_start=11)

# add markers to map
for lat, lng, label in zip(york['latitude'], york['longitude'], york['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_york)  
    
map_york