## Setting up to Identify Neighborhoods in each Canadian Postal Codes

In [1]:
import requests
import pandas as pd
import numpy as np
pd.set_option('display.max_rows', 250)
pd.set_option('display.width', 200)
pd.options.display.max_colwidth = 200
import urllib.request
from pprint import pprint
#from html_table_parser import HTMLTableParser
from html.parser import HTMLParser

#Initiating class assignment: scraping website to get postal codes in Toronto from a table
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M', header = 0)
df = df[0]
display(df.head())

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [2]:
#Removing Postalcodes that have Not Assigned Boroughs
df1 = df[df['Borough'] != "Not assigned"].reset_index(drop=True)
display(df1.head())

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor


In [3]:
#Assigning 'Unassigned' Neighbourhoods the name of their Borough. 
df1.Neighbourhood = np.where(df1['Neighbourhood']=="Not assigned", df1['Borough'], df1['Neighbourhood'])
display(df1.head())

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor


In [4]:
#Aggregating all Neighbourhoods under their Postcode
df2=df1.groupby(['Postcode','Borough']).agg(','.join)
df2= df2.reset_index()
df2.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [5]:
#Add in Latitude and Longitude headings and initialize
pd.set_option('display.max_colwidth', 200)
df3=df2.reindex(columns=[*df2.columns.tolist(),"Latitude", "Longitude"], fill_value=0.000000)

print(df3.shape)
display(df3.head())

(103, 5)


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",0.0,0.0
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",0.0,0.0
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",0.0,0.0
3,M1G,Scarborough,Woburn,0.0,0.0
4,M1H,Scarborough,Cedarbrae,0.0,0.0


In [6]:
"""
create Geocode object to generate geocodes, Latitude and Longitude, for each neighborhood,
from postal codes using arcGIS
"""
!pip install geocoder
import geocoder # import geocoder
pd.options.display.max_colwidth = 200

Lat_list=[]
Lng_list=[]
lat_lng_coords = None
for i in range(df3.Postcode.shape[0]):
    postcode='{}, Toronto, Ontario'.format(df3.Postcode.at[i])

    g=geocoder.arcgis(postcode) 
    
    lat_lng_coords = g.latlng
     
    df3.Latitude[i] =  lat_lng_coords[0]
    df3.Longitude[i] = lat_lng_coords[1]

df3.head()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.811525,-79.195517
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.785665,-79.158725
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.765815,-79.175193
3,M1G,Scarborough,Woburn,43.768369,-79.21759
4,M1H,Scarborough,Cedarbrae,43.769688,-79.23944


## Setting up to Identify Historic Sites in the City of Toronto 

### First to plot Toronto Pearson International Airport on a Folium Map

In [7]:
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import ArcGIS  # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
#from IPython.core.display import HTML 
from IPython.display import HTML
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

#!conda install -c conda-forge folium=0.5.0 --yes
!pip install folium
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Folium installed
Libraries imported.


In [8]:
pd.set_option('display.max_rows', 250)
pd.set_option('display.width', 200)
pd.options.display.max_colwidth = 200
import urllib.request
from pprint import pprint

In [9]:
#We will use the Toronto Pearson International Airport geocodes as the "known site" for our map

import geocoder
Toronto = geocoder.arcgis('Toronto, Ontario')
g = geocoder.arcgis('Toronto Pearson International Airport')
#check longitude
print((g.latlng[0],g.latlng[1]))

(43.680660000000046, -79.61285999999996)


In [10]:
#create map of Toronto and Airport
# generate map centered around the Toronto Pearson International Airport

#!pip install folium==0.5.0
import folium
from IPython.display import display

LL_coordinates = [g.latlng[0], g.latlng[1]]
#HS_site_map = folium.Map(location=[g.latlng[0], g.latlng[1]], zoom_start=10.00)
HS_site_map = folium.Map(location=LL_coordinates, zoom_start=10.00)


# add a red circle marker to represent the Conrad Hotel
folium.CircleMarker(
    #[g.latlng[0], g.latlng[1]],
    location = LL_coordinates,
    radius=5,
    color='red',
    popup='Toronto Pearson International Airport',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.7
).add_to(HS_site_map)

display(HS_site_map)

## Setting up to Identify Historic Sites in the City of Toronto 

### First let's set a landmark: Toronto Pearson International Airport on a Folium Map

In [11]:
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import ArcGIS  # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
#from IPython.core.display import HTML 
from IPython.display import HTML
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

#!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Folium installed
Libraries imported.


In [12]:
pd.set_option('display.max_rows', 250)
pd.set_option('display.width', 200)
pd.options.display.max_colwidth = 200
import urllib.request
from pprint import pprint
!pip install html-table-parser-python3
from html_table_parser import HTMLTableParser



In [13]:
#We will use the Toronto Pearson International Airport geocodes as the "known site" for our map

import geocoder
Toronto = geocoder.arcgis('Toronto, Ontario')
g = geocoder.arcgis('Toronto Pearson International Airport')
#check longitude
print((g.latlng[0],g.latlng[1]))

(43.680660000000046, -79.61285999999996)


In [24]:
#create map of Toronto and Airport
# generate map centered around the Toronto Pearson International Airport

HS_site_map = folium.Map(location=[g.latlng[0], g.latlng[1]], zoom_start=10.00) 

# add a red circle marker to represent the Conrad Hotel
folium.CircleMarker(
    [g.latlng[0], g.latlng[1]],
    radius=5,
    color='red',
    popup='Toronto Pearson International Airport',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.7
).add_to(HS_site_map)

HS_site_map

### Now let's test two Canadian historic sites with the Geocoder arcgis using specific names of historic sites:
* Annesley Hall
* Balmoral Fire Hall

In [15]:
import geocoder
g = geocoder.arcgis("Annesley Hall, Toronto, Ontario")
print(g.latlng)

[43.715239501384794, -79.37496455017423]


In [16]:
import geocoder
g = geocoder.arcgis("Balmoral Fire Hall, Toronto, Ontario")
print(g.latlng)

[43.68445580251654, -79.39994301478438]


### Let's get the table of historic sites from wikipedia
- https://en.wikipedia.org/wiki/List_of_National_Historic_Sites_of_Canada_in_Toronto

In [17]:
url = 'https://en.wikipedia.org/wiki/List_of_National_Historic_Sites_of_Canada_in_Toronto'

In [18]:
# Set a dataframe
tables = pd.read_html(url)
df_TorHS0 = tables[1]
display(df_TorHS0.head())
df_TorHS0.shape

Unnamed: 0,Site,Date(s),Designated,Location,Description,Image
0,Annesley Hall [6][7],1903 (completed),1990,Toronto43°40′04″N 79°23′35″W﻿ / ﻿43.66778°N 79.39306°W,"The first purpose-built women's’ residence on a Canadian university campus, and a good example of the Queen Anne Revival style in institutional architecture",
1,Balmoral Fire Hall [8][9],1911 (completed),1990,Toronto43°41′09″N 79°23′38″W﻿ / ﻿43.685833°N 79.393870°W,A rare example of the Queen Anne Revival style used for a fire hall,
2,Bank of Upper Canada Building [10][11],1825 (completed),1977,Toronto43°39′06.54″N 79°22′15.5″W﻿ / ﻿43.6518167°N 79.370972°W,"An early 19th-century bank building, representative of the rise of Toronto as a commercial centre and the role played by the Bank of Upper Canada in the development of Upper Canada",
3,Bead Hill[12][13],1600s (village established),"1991 (designated);June 15, 2019 (added to national park system)",Toronto43°48′14.77″N 79°8′24.4″W﻿ / ﻿43.8041028°N 79.140111°W,An archaeological site in Rouge National Urban Park with the only known intact remains of a 17th-century Seneca village in the country,
4,Birkbeck Building [14][15],1908 (completed),1986,Toronto43°39′2.54″N 79°22′40.49″W﻿ / ﻿43.6507056°N 79.3779139°W,A four-storey office building typical of the premises of many financial institutions prevalent in central business districts of Canadian cities before the First World War; representative of a tran...,


(37, 6)

In [19]:
#Remove the Image Column

df_TorHS1 = df_TorHS0.drop(['Image'], axis=1)
display(df_TorHS1.head(8))

Unnamed: 0,Site,Date(s),Designated,Location,Description
0,Annesley Hall [6][7],1903 (completed),1990,Toronto43°40′04″N 79°23′35″W﻿ / ﻿43.66778°N 79.39306°W,"The first purpose-built women's’ residence on a Canadian university campus, and a good example of the Queen Anne Revival style in institutional architecture"
1,Balmoral Fire Hall [8][9],1911 (completed),1990,Toronto43°41′09″N 79°23′38″W﻿ / ﻿43.685833°N 79.393870°W,A rare example of the Queen Anne Revival style used for a fire hall
2,Bank of Upper Canada Building [10][11],1825 (completed),1977,Toronto43°39′06.54″N 79°22′15.5″W﻿ / ﻿43.6518167°N 79.370972°W,"An early 19th-century bank building, representative of the rise of Toronto as a commercial centre and the role played by the Bank of Upper Canada in the development of Upper Canada"
3,Bead Hill[12][13],1600s (village established),"1991 (designated);June 15, 2019 (added to national park system)",Toronto43°48′14.77″N 79°8′24.4″W﻿ / ﻿43.8041028°N 79.140111°W,An archaeological site in Rouge National Urban Park with the only known intact remains of a 17th-century Seneca village in the country
4,Birkbeck Building [14][15],1908 (completed),1986,Toronto43°39′2.54″N 79°22′40.49″W﻿ / ﻿43.6507056°N 79.3779139°W,A four-storey office building typical of the premises of many financial institutions prevalent in central business districts of Canadian cities before the First World War; representative of a tran...
5,Eaton's 7th Floor Auditorium and Round Room [16][17],1930 (completed),1983,Toronto43°39′39″N 79°23′00″W﻿ / ﻿43.660929°N 79.383302°W,"A foyer, restaurant and auditorium, designed by French architect Jacques Carlu and muralist Natacha Carlu, located within the former Eaton's College Street department store; remarkable examples of..."
6,Chapel of St. James-the-Less Anglican Church [18][19],1861 (completed),1990,Toronto43°40′10.4″N 79°22′8.32″W﻿ / ﻿43.669556°N 79.3689778°W,The Chapel is a noted example of High Victorian Gothic Revival architecture and exemplifies the small chapels built in this style in Canada
7,Eglinton Theatre [20][21],1936 (completed),1993,Toronto43°42′15.96″N 79°24′38.66″W﻿ / ﻿43.7044333°N 79.4107389°W,A cinema representing one of the best examples of the Art Deco-style in Canadian theatre design


### Convert non-geocode format in the Location Column to Geocodes
This is a rather tedious process. Any suggestions would be appreciated 

In [20]:
def listToString(s):  
    
    # initialize an empty string 
    str1 = ""  
    
    # traverse in the string   
    for ele in s:  
        str1 += ele   
    
    # return string   
    return str1 

In [21]:
def geosplit(Wikitable):
    import re
    import geocoder
    signlon = 1.0
    signlat = 1.0
    for i in range(Wikitable.shape[0]):
        match = re.findall (r'([\d.-]+\D\w)', Wikitable.Location[i])
        #type(match)
        #print(match)
        matcha = np.array(match)
        splitofflat = [char for char in matcha[-2]]
        if splitofflat[-1] == "S":
            signlat = (-1.0)
        del (splitofflat[-1])
        del (splitofflat[-1])
        splitofflatstring = listToString(splitofflat)


        splitofflon = [char for char in matcha[-1]]
        #print(splitofflon)
        if splitofflon[-1] == "W":
            signlon = (-1.0)
        del (splitofflon[-1])
        del (splitofflon[-1])
        splitofflonstring= listToString(splitofflon)   

        matcha12str = ([splitofflatstring,splitofflonstring])
        #matcha12str = [matcha[-2],matcha[-1]]
        #print(matcha12str)
        matcha2=np.array(matcha12str)
        #print(matcha2)

        matcha12 = [float(matcha2[-2]), float(matcha2[-1])]
        Wikitable.Latitude[i] = matcha12[0]*signlat
        Wikitable.Longitude[i] = matcha12[1]*signlon
    #print(matcha12)
    return(matcha12)

In [22]:
# add Latitude and Longitude columns to Historical dataframe
df_TorHS2 = df_TorHS1.reindex(columns=[*df_TorHS1.columns.tolist(),"Latitude", "Longitude"], fill_value=0.000000)
#display(df_TorHS2)
geosplit(df_TorHS2) #execute Latitude/Longitude conversion function
#display(df_TorHS2)

# remove "Location"
df_TorHS2 = df_TorHS2.drop("Location", axis=1)
display(df_TorHS2.head())


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Site,Date(s),Designated,Description,Latitude,Longitude
0,Annesley Hall [6][7],1903 (completed),1990,"The first purpose-built women's’ residence on a Canadian university campus, and a good example of the Queen Anne Revival style in institutional architecture",43.66778,-79.39306
1,Balmoral Fire Hall [8][9],1911 (completed),1990,A rare example of the Queen Anne Revival style used for a fire hall,43.685833,-79.39387
2,Bank of Upper Canada Building [10][11],1825 (completed),1977,"An early 19th-century bank building, representative of the rise of Toronto as a commercial centre and the role played by the Bank of Upper Canada in the development of Upper Canada",43.651817,-79.370972
3,Bead Hill[12][13],1600s (village established),"1991 (designated);June 15, 2019 (added to national park system)",An archaeological site in Rouge National Urban Park with the only known intact remains of a 17th-century Seneca village in the country,43.804103,-79.140111
4,Birkbeck Building [14][15],1908 (completed),1986,A four-storey office building typical of the premises of many financial institutions prevalent in central business districts of Canadian cities before the First World War; representative of a tran...,43.650706,-79.377914


In [25]:
#g = geocoder.arcgis('Toronto Pearson International Airport') from earlier

HS_site_map2 = folium.Map(location=[g.latlng[0], g.latlng[1]], zoom_start=10.50) 

#add a red circle marker to represent the Toronto Pearson International Airport

folium.CircleMarker(
    [g.latlng[0], g.latlng[1]],
    radius=6,
    color='red',
    popup='Toronto Pearson International Airport',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.3
).add_to(HS_site_map2)

# add the Historic sties to the map as blue circle markers
for i in range(0, len(df_TorHS2.Site)):
    
    folium.CircleMarker(
            [df_TorHS2.Latitude[i], (df_TorHS2.Longitude[i])],
            radius=3,
            color='blue',
            fill = True,
            popup=folium.Popup(str(df_TorHS2.Description[i]),parse_html=True),
            fill_color='green',
            fill_opacity=0.5
    ).add_to(HS_site_map2)

HS_site_map2

#### Be sure to view the Seneca Village in the northeast part of the city