In [3]:
import requests
import pandas as pd
from lxml import etree
import folium

### Get data from Wiki

In [6]:
remote = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
local = '//Users/xiatian/Downloads/List of postal codes of Canada_ M - Wikipedia.htm'
geocode = '//Users/xiatian/Downloads/Geospatial_Coordinates.csv'
# text = open(url,'r')

def for_remote():
    res = requests.get(remote)
    return res.text

def for_local():
    with open(local,'r') as f:
        text = f.read()
    return text


tree = etree.HTML(for_local())
trs = tree.xpath('//table[@class="wikitable sortable"]/tbody/tr')
arr = []
for tr in trs:
    info = tr.xpath('.//td')
    if len(info) == 0:
        continue
    else:
        arr.append(str(tr.xpath('string()')).strip('\n').split('\n'))
df = pd.DataFrame(arr,index=list(range(0,len(arr))),columns=list(['Postcode','Borough','Neighbourhood']))

### Create Dataframe

In [7]:
df = df.loc[df['Borough'] != 'Not assigned']
df.reset_index(drop=True)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights
...,...,...,...
206,M8Z,Etobicoke,Kingsway Park South West
207,M8Z,Etobicoke,Mimico NW
208,M8Z,Etobicoke,The Queensway West
209,M8Z,Etobicoke,Royal York South West


### Replace Neighbourhood which value is 'Not assigned' with borough's value in the same row

In [8]:
df.loc[df['Neighbourhood'] == 'Not assigned','Neighbourhood'] = df.loc[df['Neighbourhood'] == 'Not assigned','Borough']
df_g = df.groupby('Postcode')
df

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
...,...,...,...
282,M8Z,Etobicoke,Kingsway Park South West
283,M8Z,Etobicoke,Mimico NW
284,M8Z,Etobicoke,The Queensway West
285,M8Z,Etobicoke,Royal York South West


### Combine neighbourhood which has same postcode

In [9]:
for key,v in enumerate(df_g.groups):
    temp_str = ''
    for nei in df.loc[df['Postcode'] == v,'Neighbourhood'].values:
        temp_str += nei + ','
    temp_str = temp_str.strip(',')
    df.loc[df['Postcode'] == v, 'Neighbourhood'] = temp_str
df = df.drop_duplicates(keep = 'first')
df = df.reset_index(drop = True)
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Queen's Park
...,...,...,...
98,M8X,Etobicoke,"The Kingsway,Montgomery Road,Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern
101,M8Y,Etobicoke,"Humber Bay,King's Mill Park,Kingsway Park Sout..."


### Show Dataframe's shape

In [10]:
df.shape

(103, 3)

### Add latitude and longtitude

In [11]:
df_geoc = pd.read_csv(geocode)
df_geoc.rename(columns = {'Postal Code':'Postcode'},inplace = True)
df = pd.merge(df,df_geoc,'inner',on = 'Postcode')
df

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.654260,-79.360636
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway,Montgomery Road,Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558
101,M8Y,Etobicoke,"Humber Bay,King's Mill Park,Kingsway Park Sout...",43.636258,-79.498509


In [12]:
bool = df.Borough.str.contains('Toronto')
df = df[bool].reset_index(drop=True)
map_toronto = folium.Map([43.653963, -79.387207],zoom_start = 11)
for lat, long, borough,neighbourhood in zip(df['Latitude'],df['Longitude'], df['Borough'],df['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label,parse_html = True)
    folium.CircleMarker(
        [lat,long],
        radius = 5,
        popup=  label,
        color = 'blue',
        fill = True,
        fill_color = '#3186cc',
        fill_capacity = 0.7,
        parse_html = False).add_to(map_toronto)
map_toronto