### Segmenting and Clustering Neighborhoods in Toronto

### Part One

In [1]:
!pip install lxml html5lib beautifulsoup4
import pandas as pd
import numpy as np



In [72]:
#Get dataframe from url
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df = pd.read_html(url)
df = df[0]

#Drop Not assigned Boroughs
for i in reversed(range(len(df.index))):
    if df.iloc[i, 1] == 'Not assigned':
        df.drop(i, axis = 0, inplace = True)
        
df.reset_index(inplace=True, drop=True)

In [81]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [83]:
df.shape

(103, 3)

### Part Two

In [12]:
!pip install geocoder
import geocoder



In [84]:
#Initialize as none
lat_lng_coords = None

#Lists to append
lat = []
lng = []

#for i in range(len(df.index)):
#    postal_code = df.iloc[i, 0]
#    print(postal_code)
#    while (lat_lng_coords is None):
#        print(i)
#        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
#        lat_lng_coords = g.latlng
#        print(g)
#    lat.append(lat_lng_coords[0])
#    lng.append(lat_lng_coords[1])

# /\ Request denied, but the code was awesome (needs google API)
geourl = 'http://cocl.us/Geospatial_data'
geodf = pd.read_csv(geourl)
geodf.set_index('Postal Code', inplace= True)

In [87]:
#Reinicialize lists
lat = []
lng = []

#Search and append latitude and longitude coordenates to a list
for i in range(len(df.index)):
    postal_code = df.iloc[i, 0]
    lat.append(geodf.loc[postal_code][0])
    lng.append(geodf.loc[postal_code][1])

    
#Create Latitude and Longitude columns and its values    
df['Latitude'] = lat
df['Longitude'] = lng

In [103]:
df.head(50)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


### Part Three

In [90]:
#import k-means from clustering stage
from sklearn.cluster import KMeans

#import folium map library
!pip install folium
import folium

Collecting folium
  Downloading folium-0.11.0-py2.py3-none-any.whl (93 kB)
Collecting branca>=0.3.0
  Downloading branca-0.4.1-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0


In [106]:
#create map of toronto using latitude and longitude values
map_toronto = folium.Map(location=[43.651070, -79.347015], zoom_start=10)

#add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [113]:
downtoronto = df[df['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
downtoronto.head(30)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
5,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
6,M6G,Downtown Toronto,Christie,43.669542,-79.422564
7,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
8,M5J,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.640816,-79.381752
9,M5K,Downtown Toronto,"Toronto Dominion Centre, Design Exchange",43.647177,-79.381576


#### Foursquare API

In [94]:
CLIENT_ID = 'A1UMVXF0ABB2AYN3LZF0FCENN13DMIE5QFZMFO0JOYELHN5D' #Foursquare ID
CLIENT_SECRET = 'Q4HTBQPSCBX5SRTAPKZM0I4C0Q3KW5C2QLZOY5VUXW2IRQCL' #Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [98]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
