## setup environment

In [1]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
import json # library to handle JSON files
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
# import k-means from clustering stage
from sklearn.cluster import KMeans
import folium # map rendering library
from bs4 import BeautifulSoup
import requests
from geopy.geocoders import Nominatim


# PART 1
## Fatch data

In [3]:
path='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
source=requests.get(path).text
soup=BeautifulSoup(source,'html.parser')

table=soup.find('table')
#print(table.prettify())
rows=table.find_all('tr')

data=[]
for row in rows:
    #print(row.prettify())
    fields=row.find_all('td')
    #print(fields)
    #print('-------------')
    if fields:
        data.append(fields)
#print(data)
postal_code=[]

df_list=[]
for item in data:
    #print(item)
    postal_code=item[0].text.strip('\n')
    borough=item[1].text.strip('\n')
    neighbourhood=item[2].text.strip('\n')
    if borough != 'Not assigned':
        if neighbourhood == 'Not assigned':
            neighborhood = borough
        df_list.append([postal_code, borough, neighbourhood])
df=pd.DataFrame(df_list)
df.columns=['Postal_Code','Borough','Neighbourhood']
df

Unnamed: 0,Postal_Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor
...,...,...,...
205,M8Z,Etobicoke,Kingsway Park South West
206,M8Z,Etobicoke,Mimico NW
207,M8Z,Etobicoke,The Queensway West
208,M8Z,Etobicoke,Royal York South West


## group and clean data

In [8]:

df = df.groupby('Postal_Code').agg(
    {
        'Borough':'first', 
        'Neighbourhood': ', '.join,}
    ).reset_index()
df

Unnamed: 0,Postal_Code,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv..."
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."


## print the number of rows of dataframe

In [7]:
df.shape

(103, 3)

# Part 2
## Geocoder 

In [18]:
url="http://cocl.us/Geospatial_data"
geo_data=pd.read_csv(url)
geo_data.head(10)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [17]:
# generate data by Postalcode
table = df.set_index('Postal_Code').join(geo_data.set_index('Postal Code'))
table = table.sample(frac=1).reset_index(drop=True)
table.head()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,West Toronto,"Runnymede, Swansea",43.651571,-79.48445
1,Downtown Toronto,"Harbord, University of Toronto",43.662696,-79.400049
2,West Toronto,"High Park, The Junction South",43.661608,-79.464763
3,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
4,North York,"Northwood Park, York University",43.76798,-79.487262


# Part 3

In [19]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source, 'lxml')

## locate Toronto

In [23]:
address = 'Toronto'

geolocat = Nominatim()
location = geolocat.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinate of Toronto are 43.653963, -79.387207.


## Map

In [31]:
geomap = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(full_table['Latitude'], full_table['Longitude'], full_table['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186ac',
        fill_opacity=0.7,
        parse_html=False).add_to(geomap)  
    
geomap