Use libraries in Python

In [41]:
# General libraries
import numpy as np
import pandas as pd
# Special libraries that may need to install (pip install) before the project
import geopy as gp
import requests
import json
import matplotlib as mpl
import folium
from bs4 import BeautifulSoup

Set up the dataframe

In [42]:
# Do part 1 and part 2 again then continue to part 3
link = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
BS = BeautifulSoup(requests.get(link).text, 'html.parser')
Data = []
for tr in BS.tbody.find_all('tr'):
    Data.append([td.get_text().strip() for td in tr.find_all('td')])

In [43]:
# Set Borough column, ignore cells with a borough that is Not assigned
DF = pd.DataFrame(Data,columns = ['PostalCode', 'Borough', 'Neighborhood2'])
Index = DF[(DF['Borough'] == 'Not assigned')].index
# Drop 'Not assigned' and N/A rows
DF.drop(Index, inplace = True)
DF.dropna(inplace = True)
# Combine PostalCode and Borough rows
DF = DF.groupby(['PostalCode','Borough'])['Neighborhood2'].apply(','.join).reset_index()

In [44]:
# Set Neighborhood column
def Neighborhood(Data):
    if Data['Neighborhood2'] == 'Not assigned':
        x = Data['Borough']
    else:
        x = Data['Neighborhood2']
    return x
DF['Neighborhood'] = DF.apply(Neighborhood, axis = 'columns')
DF.drop(columns = 'Neighborhood2', inplace = True)
DF
# next go to do part 2

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,Malvern / Rouge
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek
2,M1E,Scarborough,Guildwood / Morningside / West Hill
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,Kingsview Village / St. Phillips / Martin Grov...
101,M9V,Etobicoke,South Steeles / Silverstone / Humbergate / Jam...


Define a function to return nearby venues borrowed from the lab

Part 2 - Merge the data

In [45]:
# part 2 :
# download the Geospatial file and read it
# Part2_Data = pd.read_csv(r'C:\Users\Xiang Fu\Desktop\Geospatial_Coordinates.csv')
Part2_Data = pd.read_csv('http://cocl.us/Geospatial_data')
# modify the column name of new data
Part2_Data.rename(columns = {'Postal Code': 'PostalCode'}, inplace = True)
MergeData = pd.merge(DF, Part2_Data, how = 'left', on = 'PostalCode')
MergeData

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,Malvern / Rouge,43.806686,-79.194353
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,43.784535,-79.160497
2,M1E,Scarborough,Guildwood / Morningside / West Hill,43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,Kingsview Village / St. Phillips / Martin Grov...,43.688905,-79.554724
101,M9V,Etobicoke,South Steeles / Silverstone / Humbergate / Jam...,43.739416,-79.588437


Part 3 - do the map

In [46]:
Index = gp.geocoders.Nominatim(user_agent = 'trt_explorer').geocode('Toronto')
Long = Index.longitude
Lat = Index.latitude

In [47]:
# Build the map by folium and do CircleMarker on it
Trt_map = folium.Map(location = [Lat,Long], zoom_start = 10)
for i, j, m in zip(MergeData['Latitude'], MergeData['Longitude'],MergeData['Neighborhood']):
    m = folium.Popup(m, parse_html = True)
    folium.CircleMarker([i, j], radius = 3, popup = m, color = 'red', fill = True, fill_color = '#3186cc', fill_opacity = 0.7, parse_html = False).add_to(Trt_map)
Trt_map