## LOAD LIBRARIES

In [156]:
import numpy as np
import pandas as pd
import json

# !conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim

import requests
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

# !conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library

import requests
from bs4 import BeautifulSoup
from tabulate import tabulate

print('Libraries Imported')

Libraries Imported


## 1. WEBSCRAPING

In [157]:
weblink = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
res = requests.get(weblink)
soup = BeautifulSoup(res.content,'lxml')
table = str(soup.findAll("table", 'sortable')[0].extract())

table_list = pd.read_html(table) 
df = pd.DataFrame(table_list[0], columns = ['Postcode', 'Borough', 'Neighbourhood'])
df.columns = ["Postal Code", "Borough", "Neighborhood"]

In [158]:
df.drop(df[df.Borough == "Not assigned"].index, inplace=True)

mask =  df.Neighborhood == "Not assigned"
df.loc[mask, 'Neighborhood'] = df.loc[mask, 'Borough']

df_toronto = df.groupby(['Postal Code', 'Borough'])['Neighborhood'].apply(lambda x: "%s" % ', '.join(x)).reset_index()
df_toronto.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [159]:
print("\n\nFinal Dataframe Dimensions: " + str(df_toronto.shape))



Final Dataframe Dimensions: (103, 3)


#### END OF QUESTION 1

## 2. MAPPING COORDINATES TO DATAFRAME

In [160]:
link = 'http://cocl.us/Geospatial_data'
df_coord=pd.read_csv(link)
df_study = pd.merge(df_toronto,df_coord,on='Postal Code')
df_study.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


#### END OF QUESTION 2

## 3. CLUSTERING NEIGHBOURHOODS

In [161]:
address = 'Toronto, ON'
geolocator = Nominatim(scheme='http')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

  from ipykernel import kernelapp as app


The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [162]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(df_study['Latitude'], df_study['Longitude'], df_study['Borough'], df_study['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  

map_toronto

In [163]:
downtown_data = df_study[df_study['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
downtown_data.head()

map_downtown = folium.Map(location=[latitude, longitude], zoom_start=14)

for lat, lng, label in zip(downtown_data['Latitude'], downtown_data['Longitude'], downtown_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_downtown)  
    
map_downtown

Map of Downtown Toronto selected to explore.

## Foursquare API to cluster