In [3]:

!pip install folium
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import json
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
from bs4 import BeautifulSoup
import folium

Collecting folium
  Downloading folium-0.11.0-py2.py3-none-any.whl (93 kB)
[K     |████████████████████████████████| 93 kB 3.8 MB/s  eta 0:00:01
Collecting branca>=0.3.0
  Downloading branca-0.4.1-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0


In [4]:

main_source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup = BeautifulSoup(main_source, 'lxml')

Table = soup.find("table")
Table_row = Table.tbody.find_all('tr')

res = []
for tr in Table_row:
    td = tr.find_all("td")
    row = [tr.text for tr in td]

    if row != [] and row != "Not assigned":
        
        if "Not assigned" in row[2]:
            row[2] = row[0]
        res.append(row)

web_df = pd.DataFrame(res, columns = ["PostalCode", "Borough", "Neighborhood"])
web_df = web_df.replace('\n','',regex=True)
web_df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,M1A
1,M2A,Not assigned,M2A
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,M5Z
176,M6Z,Not assigned,M6Z
177,M7Z,Not assigned,M7Z
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [5]:
web_df = web_df[web_df.Borough != 'Not assigned']
web_df

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [6]:

web_df1 = web_df.reset_index(drop = True)
web_df1

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [7]:
print("shape: ", web_df1.shape)


shape:  (103, 3)


# Since over data frame is ready we will merge it with latitude and longitude columns, which we will fetch from geospatial csv file¶


In [9]:

#Download the csv file
!wget -q -O 'Geospatial_Coordinates.csv' http://cocl.us/Geospatial_data
print('Data downloaded!')

Data downloaded!


In [10]:
#read the csv file into DataFrame
df_data_1 = pd.read_csv('Geospatial_Coordinates.csv')
df_data_1.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [11]:

toronto_df = pd.merge(web_df1, df_data_1, how='left', left_on = 'PostalCode',
                     right_on = 'Postal Code')
toronto_df.drop("Postal Code", axis=1, inplace=True)
toronto_df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


# Exploreing and clustering the toronto neighborhoods.

In [12]:
address = "Toronto, ON"

geolocator = Nominatim(user_agent = 'toronto_explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print("Coordinates for Totonto city {}, {}".format(latitude,longitude))

Coordinates for Totonto city 43.6534817, -79.3839347


In [13]:

toronto_map = folium.Map(location=[latitude, longitude], zoom_start = 15)
for Lati, Longi, borough, neigh_hood, in zip(toronto_df['Latitude'],toronto_df['Longitude'],toronto_df['Borough'],toronto_df['Neighborhood']):
    label = '{},{}'.format(neigh_hood, borough)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker([Lati,Longi], radius = 5, popup=label,
                       color = 'blue',fill=True, fill_color = '#3186cc',
                       fill_opacity = 0.5, parse_html=False).add_to(toronto_map)

toronto_map

In [14]:
toronto_df_boro = toronto_df[toronto_df['Borough'].str.contains("Toronto")].reset_index(drop=True)
toronto_df_boro

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259


In [16]:

toronto_boro_map = folium.Map(location=[latitude,longitude], zoom_start = 15)
for Lati, Longi, borough, neigh_hood, in zip(toronto_df_boro['Latitude'],toronto_df_boro['Longitude'],toronto_df_boro['Borough'],toronto_df_boro['Neighborhood']):
    label = '{},{}'.format(neigh_hood, borough)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker([Lati,Longi], radius = 5, popup=label,
                       color = 'blue',fill=True, fill_color = '#3186cc',
                       fill_opacity = 0.5, parse_html=False).add_to(toronto_boro_map)
toronto_boro_map