In [140]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from geopy.geocoders import Nominatim
import folium
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors

In [2]:
#Scraping Wikipedia for the raw data
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

soup = BeautifulSoup(website_url, 'lxml')
table = soup.find('table', {'class':'wikitable sortable'})

In [3]:
#Extracting the Table Headers
headers = table.findAll('th')
headers_result = []
for i in range(len(headers)):
    s = str(headers[i])
    start = s.find('<th>') + len('<th>')
    end = s.find('</th>')
    headers_result.append(s[start:end-1])
    

#Extracting the data from the table
data = table.findAll('td')
data_result = []
for i in range(len(data)):
    s = str(data[i])
    start = s.find('<td>') + len('<td>')
    end = s.find('</td>')
    data_result.append(s[start:end-1])

In [4]:
#Creating the DataFrame and populating
df = pd.DataFrame(columns = headers_result) 
num_cols = len(headers_result)
num_rows = int(len(data_result)/num_cols)

i = 0 

for row_num in range(num_rows):
    pc = data_result[i]
    borough = data_result[i+1]
    neigh = data_result[i+2]
    
    #removing not assigned borough
    if borough=='Not assigned':
        i+=3
        next
    else:
        df.loc[row_num] = [pc, borough, neigh]
        i+=3

df.reset_index(drop=True, inplace=True)
#df.head()

In [5]:
df['Neighborhood'].replace('/', ',', regex=True, inplace=True)
#df.head()

In [6]:
for i in range(len(df)):
    if df['Neighborhood'][i] == 'Not assigned':
        df['Neighborhood'][i] = df['Borough'][i]

In [7]:
df.shape

(103, 3)

In [141]:
geodata_df = pd.read_csv('http://cocl.us/Geospatial_data')
#geodata_df.head()

In [9]:
geodata_df.rename(columns={'Postal Code': 'Postal code'},inplace = True)
df_merged = pd.merge(df, geodata_df, on = 'Postal code')

In [10]:
geodata_df

Unnamed: 0,Postal code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [11]:
df_merged

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway , Montgomery Road , Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,Business reply mail Processing CentrE,43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South , King's Mill Park , Sunnylea ,...",43.636258,-79.498509


# Working with only Boroughs with "Toronto"

In [73]:
# Step 1: Create a new column for True/False -- True when borough contains the word Toronto
df_merged['Toronto?'] = df_merged['Borough'].str.contains('Toronto')

#Step 2: Generate a new df_filtered where borough contains Toronto
neighborhoods = df_merged[df_merged['Toronto?'] == True]
neighborhoods.reset_index(drop=True, inplace=True)

## Locating the coordintates of Toronto City

In [103]:
address = 'Toronto'

geolocator = Nominatim(user_agent="tt_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longtitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


## Creating the map of Toronto with an overlay of the 39 Toronto locations from the Canada table from wikipedia

In [138]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto