# Toronto Project Segmentation

In [1]:
##### import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

#!conda install -c anaconda beautifulsoup4 --yes
from bs4 import BeautifulSoup

print('Libraries imported.')

Libraries imported.


# Import table from the wekipedia Website

In [2]:
import urllib.request
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html_file = urllib.request.urlopen(url)
soup = BeautifulSoup(html_file)
tables = soup.findAll('table')
first_table = soup.find("table", class_ = "wikitable sortable")
A=[]
B=[]
C=[]
#Load the table from website
for row in first_table.findAll("tr"):
    cells = row.findAll("td")
    if len(cells)==3: #Only extract table body not heading
        A.append(cells[0].find(text=True))
        B.append(cells[1].find(text=True))
        C.append(cells[2].find(text=True))

# Built the panda dataframe

In [3]:
#change the table to a dataframe
df=pd.DataFrame(A,columns=["PostalCode"])
df["Borough"]=B
df["Neighberhood"]=C
# drop a not assigned Borough
df=df[df["Borough"]!="Not assigned\n"]
def supprimer(test):
    test=test.split("\n")[0]
    return test
#drom the \n character
df["PostalCode"]=df["PostalCode"].apply(supprimer)
df["Borough"]=df["Borough"].apply(supprimer)
df["Neighberhood"]=df["Neighberhood"].apply(supprimer)
df=df.reset_index()
df=df[["PostalCode","Borough","Neighberhood"]]
df.head()

Unnamed: 0,PostalCode,Borough,Neighberhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [5]:
df.shape

(103, 3)

# Merge Table

In [6]:
geocode=pd.read_csv("http://cocl.us/Geospatial_data")
geocode["PostalCode"]=geocode['Postal Code']
geocode.head()

Unnamed: 0,Postal Code,Latitude,Longitude,PostalCode
0,M1B,43.806686,-79.194353,M1B
1,M1C,43.784535,-79.160497,M1C
2,M1E,43.763573,-79.188711,M1E
3,M1G,43.770992,-79.216917,M1G
4,M1H,43.773136,-79.239476,M1H


In [7]:
df_toronto = pd.merge(df,
                 geocode[["PostalCode","Latitude","Longitude"]],
                 on='PostalCode')
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighberhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


# select all toranto Borough

In [8]:

def toronto(test):
    test=test.split(" ")[-1]
    return test
df_toronto["Borough"]=df_toronto["Borough"].apply(toronto)
df_toronto["Borough"].value_counts()


Toronto        39
York           34
Scarborough    17
Etobicoke      12
Mississauga     1
Name: Borough, dtype: int64

In [9]:
address = 'Toronto'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


# Build the Map of toronto

In [11]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighberhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto