In [61]:
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup

In [62]:
# scrap wiki table from an older version of the page
website_text = requests.get('https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=889899728').text
soup = BeautifulSoup(website_text, 'xml')
table = soup.find('table',{'class':'wikitable sortable'})

In [63]:
# put the data into a df
table_rows = table.find_all('tr')
data = []
for row in table_rows:
    td = []
    for t in row.find_all('td'):
        td.append(t.text.strip())
        data.append(td)
df = pd.DataFrame(data, columns = ['PostalCode', 'Borough', 'Neighborhood'])

In [69]:
# drop rows where Borough is not assigned and drop duplicates
df = df[df.Borough != 'Not assigned'].drop_duplicates()

In [70]:
# more than one neighborhood can exist in one postal code area.
df = df.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(list).apply(
    lambda x:', '.join(x)).to_frame().reset_index()

In [71]:
# If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
for index, row in df.iterrows():
    if row['Neighborhood'] == 'Not assigned':
        row ['Neighborhood'] = row['Borough']

In [72]:
## read csv with geographical coordinates
df_coor = pd.read_csv('~/dev_practice/coursera_ibm/Coursera_Capstone/Geospatial_Coordinates.csv')

In [73]:
## combined dfs into one
df_coor.rename(columns={"Postal Code" : "PostalCode"},inplace=True)
df_combined = pd.merge(df, df_coor, on = 'PostalCode')

In [74]:
df_combined.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# # Q3 Clustering

In [75]:
## foursquare credentials and version
CLIENT_ID = '1OEZ3H4JWU312DDTEUJZLLCFJU4UZWK4CJ1JEAQU0E41DDBS' # your Foursquare ID
CLIENT_SECRET = 'KTJ4SLN3ZFDMIEUZDHQXA3HYHHJXO1JWUYTYGH5D5Y02ZFSM' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [76]:
## import needed libraries
import folium
from geopy.geocoders import Nominatim

In [77]:
## only focus on Toronto
df_f = df_combined[df_combined['Borough'].str.contains('Toronto')]
df_f

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
42,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
45,M4P,Central Toronto,Davisville North,43.712751,-79.390197
46,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
47,M4S,Central Toronto,Davisville,43.704324,-79.38879
48,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
49,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",43.686412,-79.400049


In [78]:
## get longitude and latitude of Toronto
address = 'Toronto, Ontario, Canada'

geolocator = Nominatim(user_agent="TO_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [80]:
## create map of Toronto
map_toronto = folium.Map(location = [latitude, longitude], zoom_start=12)

## add markers with Toronto neighborhoods included
for lat, lng, borough, neighborhood in zip(df_f['Latitude'], df_f['Longitude'], df_f['Borough'], df_f['Neighborhood']):
    label = '{}: {}'.format(borough, neighborhood)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker(
        [lat, lng],
        radius = 5,
        popup = label,
        color = 'blue',
        fill = True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
map_toronto