Let's import the necessary libraries

In [119]:
from bs4 import BeautifulSoup
from collections import deque
import requests
import csv
import pandas as pd
import numpy as np
!conda install -c conda-forge folium=0.5.0
import folium
from sklearn.cluster import KMeans

Solving environment: done

# All requested packages already installed.



Then, I loaded the data from the webpage and select the table

In [57]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source, "lxml")

table = soup.find(class_='wikitable sortable').find_all('tr')[1:]

The next step involves making a dataframe with the assigned column names

In [58]:
column_names = ['PostalCode', 'Borough', 'Neighborhood']

neighborhoods = pd.DataFrame(columns=column_names)

The following step is a little bit complex. I started with filtering any 'Not assigned' values from Borough. Then, I changed the 'Not assigned' Neighborhood to be the same value as the Borough. Finally, I put all the necessary data in a Dictionary.

In [67]:
dictionary = {}

for rows in table:
    row = [i.text for i in rows.find_all('td')]
    if(row[1]!='Not assigned'):
        n = row[2]
        neighborhoodsplit = n.split("\n")
        newneigh = neighborhoodsplit[0]
        if(newneigh=='Not assigned'):
            newneigh = row[1]
            
        if(row[0] in dictionary):
            temp = dictionary[row[0]]
            temp.append(newneigh)
            dictionary[row[0]] = temp
        else:
            dictionary[row[0]] = [row[1], newneigh]
        

Let's move the data from the dictionary to a dataframe

In [60]:
for k, v in dictionary.items():
    postal = k
    templist = v.copy()
    borough = templist.pop(0)
    nbh = ''
    if(len(templist)==1):
        nbh = templist.pop()
    else:
        for x in templist:
            if(len(nbh)==0):
                nbh = x
            else:
                nbh = nbh + ", " + x
    
    neighborhoods = neighborhoods.append({'PostalCode' : postal, 'Borough' : borough, 'Neighborhood' : nbh}, ignore_index = True)

What is our Data Frame shape

In [64]:
neighborhoods.shape

(103, 3)

We will read the Latitude and Longitude from csv file.

In [92]:
url = 'http://cocl.us/Geospatial_data'
data = pd.read_csv(url)
data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Now let's merge the Latitude and Longitude into the Neighborhood Data Frame

In [102]:
lat = []
long = []

for k,v in dictionary.items():
    postal = k
    v = data.loc[data['Postal Code'] == postal]
    templat = v['Latitude'].values
    templong = v['Longitude'].values
    lat.append(templat[0])
    long.append(templong[0])
    
neighborhoods['Latitude'] = lat
neighborhoods['Longitude'] = long

Let's view the result

In [115]:
neighborhoods.dtypes

PostalCode       object
Borough          object
Neighborhood     object
Latitude        float64
Longitude       float64
dtype: object

In [118]:
map_toronto = folium.Map(location=[43.7183012, -79.4716584], zoom_start=10)

for post, lt, lng, borough, neighborhood, in zip(neighborhoods['PostalCode'], neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{} : {}, {}'.format(post, neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lt, lng],
        radius=5,
        popup=label,
        color='yellow',
        fill=True,
        fill_color='#f5f12c',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
map_toronto