In [1]:
# import all necessary libraries
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

#!conda install -c conda-forge beautifulsoup4 --yes
from bs4 import BeautifulSoup

print('Libraries imported.')

Libraries imported.


In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
wikipage = requests.get(url)
wikipage.text[:100]

'<!DOCTYPE html>\n<html class="client-nojs" lang="en" dir="ltr">\n<head>\n<meta charset="UTF-8"/>\n<title'

In [3]:
#Create a new pd DataFrame
toronto = pd.DataFrame()

#use beautifulsoup to read the wikipage
soup = BeautifulSoup(wikipage.text, 'lxml')
wikitable = soup.find_all('table')[0] 

row_marker = 0

for row in wikitable.find_all('tr'):
    column_marker = 0
    columns = row.find_all('td')
    for column in columns:
        toronto.loc[row_marker,column_marker] = column.get_text()
        column_marker += 1
    row_marker += 1

#rename column names
toronto.rename(columns={0:'PostalCode',1:'Borough',2:'Neighborhood'}, inplace=True)
#drop all rows with unassigned borough
toronto = toronto[toronto.Borough!='Not assigned']
toronto.reset_index(drop=True,inplace=True)

#create a new dataframe toronto_neighbor to combine all neighborhoods with the same postal code
toronto_neighbor= pd.DataFrame(columns = ['PostalCode','Borough','Neighborhood'])

for ii in range(toronto.shape[0]):
    borough = toronto.loc[ii,'Borough']
    postalcode = toronto.loc[ii,'PostalCode']
    neighborhood = toronto.loc[ii,'Neighborhood'][:-1]
    if neighborhood == 'Not assigned':
        neighborhood = borough
    
    if postalcode in toronto_neighbor.PostalCode.values:
        old_neighborhood = toronto_neighbor[toronto_neighbor.PostalCode == postalcode].Neighborhood
        new_value = (old_neighborhood + ',' + neighborhood)
        toronto_neighbor.loc[toronto_neighbor.PostalCode == postalcode,'Neighborhood'] = new_value 
    
    else:
        toronto_neighbor = toronto_neighbor.append({'PostalCode': postalcode,
                                                'Borough': borough,
                                                'Neighborhood': neighborhood,
                                               }, ignore_index=True)

toronto_neighbor

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Queen's Park
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Rouge,Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens,Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson,Garden District"


In [5]:
toronto_neighbor.shape

(103, 3)

In [39]:
# use geocoder to get coordinates
#!conda install -c conda-forge geocoder --yes

#import geocoder # import geocoder
#!wget -o geospatial.csv http://cocl.us/Geospatial_data
geospatial = pd.read_csv('Geospatial_Coordinates.csv')

for postalcode in toronto_neighbor['PostalCode']:
    ######################################
    # initialize your variable to None
    #lat_lng_coords = None
    #
    # loop until you get the coordinates   
    #while(lat_lng_coords is None):
    #    g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
    #    lat_lng_coords = g.latlng
    ####################################
    # geocoder doesn't work use the csv file
    latitude = geospatial.loc[toronto_neighbor.PostalCode == postalcode,'Latitude']
    longitude = geospatial.loc[toronto_neighbor.PostalCode == postalcode,'Longitude']
    
    toronto_neighbor.loc[toronto_neighbor.PostalCode == postalcode,'Latitude'] =  latitude 
    toronto_neighbor.loc[toronto_neighbor.PostalCode == postalcode,'Longitude'] =  longitude

toronto_neighbor

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.806686,-79.194353
1,M4A,North York,Victoria Village,43.784535,-79.160497
2,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.763573,-79.188711
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.770992,-79.216917
4,M7A,Queen's Park,Queen's Park,43.773136,-79.239476
5,M9A,Etobicoke,Islington Avenue,43.744734,-79.239476
6,M1B,Scarborough,"Rouge,Malvern",43.727929,-79.262029
7,M3B,North York,Don Mills North,43.711112,-79.284577
8,M4B,East York,"Woodbine Gardens,Parkview Hill",43.716316,-79.239476
9,M5B,Downtown Toronto,"Ryerson,Garden District",43.692657,-79.264848
