#### *Imports Needed*

In [1]:
from pandas.io.html import read_html
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

* *html to float markdown tables to left*

In [2]:
%%html
<style>
table {float:left}
</style>

## **Part 1 - Get and Prepare Dataset**

* Get data from Wikipedia -  [link here](https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M)
* Create a dataframe with the collected data
* Remove rows where borough is *Not assigned*
* Join Neibhbourhood with the same Postal Code
* Set *Not assigned* neighbourhood with the borough name
***

#### *1. get html data and convert to a pandas dataframe*

In [63]:
page = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
wikitables = read_html(page, index_col=0,  attrs={"class":"wikitable"})
toronto_postal = wikitables[0]
print(toronto_postal.shape)

(287, 2)


#### *2. cleaning and prepare data*

In [64]:
toronto_postal.Borough.replace('Not assigned',np.nan, inplace=True) #set NaN to not assigned borough
toronto_postal.dropna(inplace=True) #drop nan values
toronto_postal['Neighbourhood'] = toronto_postal.groupby(['Postcode'])['Neighbourhood'].apply(lambda x: ', '.join(x)) #join Neibhbourhood with the same Postal Code
toronto_postal = toronto_postal.loc[~toronto_postal.index.duplicated(keep='first')] #remove duplicated postal codes
toronto_postal.Neighbourhood.replace('Not assigned',toronto_postal.Borough, inplace=True) #replace not assigned neighbourhoods with borough name
print('Shape of dataset: %s'%(str(toronto_postal.shape)))

Shape of dataset: (103, 2)


#### *3. test data*
* **must match:**

| PostalCode | Borough          | Neighbourhood                          |
|------------|------------------|----------------------------------------|
| M7A        | Queen's Park     | Queen's Park                           |
| M5X        | Downtown Toronto | First Canadian Place, Underground city |
| M1C        | Scarborough      | Highland Creek, Rouge Hill, Port Union |



In [19]:
toronto_postal.loc[['M7A','M5X','M1C']] #they match ;)

Unnamed: 0_level_0,Borough,Neighbourhood
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1
M7A,Queen's Park,Queen's Park
M5X,Downtown Toronto,"First Canadian Place, Underground city"
M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"


In [35]:
#reset index to put PostalCode as a column and save on a variable called toronto
toronto = toronto_postal.reset_index()
print('Shape of dataset: %s'%(str(toronto_postal.shape)))

Shape of dataset: (103, 2)


## **Part 2 - Get location *(lat,lon)* for each neighborhood of Toronto**

* Get *(lat,lon)* for each postcode on dataset
* Save the data on a .csv file to prevent unnecessary Google API calls

***

In [3]:
import os
import requests

In [37]:
def get_location(row):
    GOOGLE_MAPS_API_URL = 'https://maps.googleapis.com/maps/api/geocode/json'
    params = {
        'address': '%s, Toronto, Ontario'%row.Postcode,
        'sensor': 'false',
        'region': 'CA',
        'key': os.environ['googleMaps_apiKey']
    }
    req = requests.get(GOOGLE_MAPS_API_URL, params=params)
    res = req.json()
    
    # Use the first result
    result = res['results'][0]
    
    row['lat'] = result['geometry']['location']['lat']
    row['lng'] = result['geometry']['location']['lng']
    return row
   

In [49]:
#toronto = toronto.apply(get_location,axis=1)
toronto.to_csv('toronto.csv',index=False)
toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,lat,lng
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494


## **Part 3 - Neighborhoods Clustering and Data Analysis**

* Get *(lat,lon)* for each postcode on dataset
* 

***

In [4]:
import folium # map rendering library

In [5]:
ds = pd.read_csv('toronto.csv')
ds.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,lat,lng
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494


In [26]:
#toronto (lat,lon)
latitude, longitude = 43.7, -79.4
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10.5)

# add markers to map
for lat, lng, borough, neighborhood in zip(ds['lat'], ds['lng'], ds['Borough'], ds['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [28]:
CLIENT_ID = os.environ['foursquare_ClientId'] # your Foursquare ID
CLIENT_SECRET = os.environ['foursquare_ClientSecret'] # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
