In [2]:
#installs
!pip install beautifulsoup4
!pip install requests
!pip install lxml



In [3]:
#imports
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

In [7]:
#beautiful read
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(source.text,  "html.parser")

data = []
columns = []
table = soup.find(class_='wikitable')

for index, tr in enumerate(table.find_all('tr')):
    section = []
    for td in tr.find_all(['th','td']):
        section.append(td.text.rstrip())
    
    #First row of data is the header
    if (index == 0):
        columns = section
    else:
        data.append(section)

#convert list into Pandas DataFrame
df = pd.DataFrame(data = data,columns = columns)
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [8]:
df.drop(df[df['Borough']=="Not assigned"].index,axis=0, inplace=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [9]:
# More than one neighborhood can exist in one postal code area, combined these into one row with the neighborhoods separated with a comma
df1=df.groupby("Postal Code").agg(lambda x:','.join(set(x)))

#remove duplicates
df1.loc[df1['Neighbourhood']=="Not assigned",'Neighbourhood']=df1.loc[df1['Neighbourhood']=="Not assigned",'Borough']

df1.shape

(103, 2)

In [10]:
df1

Unnamed: 0_level_0,Borough,Neighbourhood
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Malvern, Rouge"
M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae
...,...,...
M9N,York,Weston
M9P,Etobicoke,Westmount
M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


GEO SECTION

In [11]:
#read in geo data
geo_data=pd.read_csv("https://cocl.us/Geospatial_data")
geo_data

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [12]:
#assign values
df1['Latitude']=geo_data['Latitude'].values
df1['Longitude']=geo_data['Longitude'].values

df1

Unnamed: 0_level_0,Borough,Neighbourhood,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
M1G,Scarborough,Woburn,43.770992,-79.216917
M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...
M9N,York,Weston,43.706876,-79.518188
M9P,Etobicoke,Westmount,43.696319,-79.532242
M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


In [14]:
!pip install folium

import folium


Collecting folium
  Downloading folium-0.11.0-py2.py3-none-any.whl (93 kB)
[K     |████████████████████████████████| 93 kB 2.9 MB/s eta 0:00:011
Collecting branca>=0.3.0
  Downloading branca-0.4.1-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0


In [17]:
df1.head()

Unnamed: 0_level_0,Borough,Neighbourhood,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
M1G,Scarborough,Woburn,43.770992,-79.216917
M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [21]:
toronto_df= df1[df1['Borough'].str.contains('Toronto', na = False)].reset_index(drop=True)
toronto_df.head()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,East Toronto,The Beaches,43.676357,-79.293031
1,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
3,East Toronto,Studio District,43.659526,-79.340923
4,Central Toronto,Lawrence Park,43.72802,-79.38879


In [23]:
toronto_df.shape

(39, 4)

In [25]:
latitude = 43.6532
longitude= -79.3832


# create map of TORONTO using latitude and longitude values above:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto
