In [1]:
import pandas as pd

### Reading tables and transform data into dataframes using pandas

In [2]:
dfs = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

### Dropping rows with NaN

In [3]:
df = dfs[0]
df = df.dropna().reset_index(drop=True)
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


### Replacing the delimiter in the Neighborhood column

In [4]:
df['Neighborhood'] = df['Neighborhood'].str.replace(' / ', ', ')
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [5]:
df.shape[0]

103

### Reading the coordinates for each postal code

In [9]:
df_geo = pd.read_csv('Geospatial_Coordinates.csv', index_col=0)
df_geo.head()

Unnamed: 0_level_0,Latitude,Longitude
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,43.806686,-79.194353
M1C,43.784535,-79.160497
M1E,43.763573,-79.188711
M1G,43.770992,-79.216917
M1H,43.773136,-79.239476


### Renaming the column for postal codes

In [22]:
df = df.rename(columns={'Postal code': 'Postal Code'})
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### Combining the two dataframes

In [26]:
df_neighborhood = df.join(df_geo, on='Postal Code')
df_neighborhood.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


### Selecting boroughs that contain 'Toronto'

In [40]:
df_borough_T = df_neighborhood.loc[df_neighborhood['Borough'].str.contains('Toronto', regex=False)].reset_index(drop=True)
df_borough_T

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259


In [41]:
import requests

In [49]:
CLIENT_ID = '1D0VDVL40FGS5PIAWB3JCBXHNUXXALOC3RXJF24KO5MZZOL0'
CLIENT_SECRET = 'H150LZX3EHN1PIOHUQ45KP43PVVIGBXT1FC14X12SUO5DVQF'
VERSION = 20180605
RADIUS = 500
LIMIT = 100

In [46]:
venue_list = []
for pcode, neighborhood, lat, lng in zip(df_borough_T['Postal Code'], df_borough_T['Neighborhood'],
                                         df_borough_T['Latitude'], df_borough_T['Longitude']):

    url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, RADIUS, LIMIT)
    results = requests.get(url).json()['response']['groups'][0]['items']
    
    for v in results:
        venue_list.append([pcode, neighborhood, v['venue']['name'], v['venue']['categories'][0]['name'],
                           v['venue']['location']['lat'], v['venue']['location']['lng']])

df_venues = pd.DataFrame(venue_list)
df_venues.head()

SSLError: HTTPSConnectionPool(host='api.foursquare.com', port=443): Max retries exceeded with url: /v2/venues/explore?client_id=1D0VDVL40FGS5PIAWB3JCBXHNUXXALOC3RXJF24KO5MZZOL0&client_secret=H150LZX3EHN1PIOHUQ45KP43PVVIGBXT1FC14X12SUO5DVQF&v=20180605&ll=43.6542599,-79.3606359&radius=500&limit=100 (Caused by SSLError(SSLError("bad handshake: Error([('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')])")))