In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import requests
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

source = requests.get(url).text
soup = BeautifulSoup(source, 'lxml')

df = pd.DataFrame(columns=['PostalCode', 'Borough', 'Neighborhood'])

data = soup.find('table', class_='wikitable sortable')
for j, row in enumerate(data.findAll("tr")):
    df.loc[j] = ['PostalCode', 'Borough', 'Neighborhood']
    cells = row.findAll("td")
    for i, cell in enumerate(cells):
        df.loc[j][i] = cell.text

df.drop(df.index[[0]], inplace=True)
df.reset_index(inplace=True, drop=True)
remove = []

for i, row in df.iterrows():
    if row['Borough'] == 'Not assigned':
        remove.append(i)

df.drop(df.index[remove], inplace=True)
df.reset_index(inplace=True, drop=True)

for i, row in df.iterrows():
    df['Neighborhood'][i] = row['Neighborhood'].split('\n')[0]

for i, row in df.iterrows():
    if row['Neighborhood'] == 'Not assigned':
        row['Neighborhood'] = row['Borough']

df.sort_values(by='PostalCode', inplace=True)
df.reset_index(inplace=True, drop=True)

foo = lambda a: ", ".join(a)
df1 = df.groupby(by='PostalCode').agg({'Borough': foo, 'Neighborhood': foo}).reset_index()
df = df1

for i, row in df.iterrows():
    df['Borough'][i] = row['Borough'].split(', ')[0]


In [2]:
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Port Union, Rouge Hill, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Golden Mile, Oakridge, Clairlea"
8,M1M,Scarborough,"Cliffcrest, Scarborough Village West, Cliffside"
9,M1N,Scarborough,"Cliffside West, Birch Cliff"


In [3]:
df.shape

(103, 3)

In [4]:

latitude = []
longitude = []

csv_url = 'http://cocl.us/Geospatial_data'
df_coor=pd.read_csv(csv_url)

for i, row in df.iterrows():
    postal_code = row['PostalCode']
    latitude.append(list(df_coor.loc[df_coor['Postal Code']==postal_code]['Latitude'])[0])
    longitude.append(list(df_coor.loc[df_coor['Postal Code']==postal_code]['Longitude'])[0])


df['Latitude'] = latitude
df['Longitude'] = longitude


In [5]:
df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Port Union, Rouge Hill, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Oakridge, Clairlea",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Scarborough Village West, Cliffside",43.716316,-79.239476
9,M1N,Scarborough,"Cliffside West, Birch Cliff",43.692657,-79.264848


In [8]:

address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto, Ontario are {}, {}.'.format(latitude, longitude))

map_canada = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, label in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_canada)


The geograpical coordinate of Toronto, Ontario are 43.653963, -79.387207.


In [9]:
map_canada