## Assignment - Segmenting and Clustering Neighbourhoods in the city of Toronto, Canada

### Part 1 - Scrape the wikipedia page, Data Wrangling and Cleaning, Reading into Pandas Dataframe

`Import Library and Modules`

In [1]:
from bs4 import BeautifulSoup
import pandas as pd 
import requests
from sklearn.cluster import KMeans
!conda install -c conda-forge folium=0.5.0 --yes
import numpy as np
import folium
from geopy.geocoders import Nominatim
import json
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    altair-4.1.0               |             py_1         614 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    branca-0.4.1               |             py_0          26 KB  conda-forge
    certifi-2020.6.20          |   py36h9f0ad1d_0         151 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    ca-certificates-2020.6.20  |       hecda079_0         145 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    openssl-1.1.1g             |       h516909a_1         2.1 MB  conda-forge
    ------------------------------------------------------------
                       

**[a] Scrape wikipedia page to extract the required information**

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response = requests.get(url)

# parse the wikipedia HTML page using beautiful soup library
soup = BeautifulSoup(response.text, 'html.parser')

# extract the required table
table = soup.find_all('table')[0]
size = len(table.find_all('td'))

In [3]:
# Loop through the 'td' tab of the 'table' tab and extract the text in a list 
rowDataVal = []
for i in range(size):
    rowVal = table.find_all('td')[i].get_text()
    rowDataVal.append(rowVal)
length = len(rowDataVal)

In [4]:
# Rearrange the data into their respective categories
PostalCode =[]
for i in range(0,length,3):
    PostalCode.append(rowDataVal[i])

Borough = []
for i in range(1, length, 3):
    Borough.append(rowDataVal[i])

Neighborhood = []
for i in range(2, length, 3):
    Neighborhood.append(rowDataVal[i])

**[b] Creating dataframe from extracted data**

In [5]:
df = pd.DataFrame()
df['PostalCode'] = PostalCode
df['Borough'] = Borough
df['Neighborhood'] = Neighborhood

# drop the trailing new line after each text from the dataframe
df = df.replace('\n','', regex=True) 
df.head(5)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


**[c] Cleaning dataframe by dropping rows having unavailable data**

In [6]:
# drop cells with borough 'not assigned' and reset the index of new dataframe
df_clean = df[~df.Borough.str.contains('Not assigned')].reset_index(drop=True) 
df_clean

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [7]:
# check if a cell has a borough but 'Not Assigned' Neighborhood
check = df_clean.loc[df_clean.Neighborhood.str.contains('Not assigned')]
print ("There are {} cells with 'Not Assigned' Neighborhood".format(len(check)))

There are 0 cells with 'Not Assigned' Neighborhood


In [8]:
# display number of rows in the clean dataframe
rows = df_clean.shape
size_df = len(df_clean)
print ("DataFrame Size : {} ".format(rows))
print ("Total Rows in Dataframe : {} ".format(size_df))

DataFrame Size : (103, 3) 
Total Rows in Dataframe : 103 


### Part 2 - Getting Latitude and Longitude Coordinates of Neigborhoods

In [9]:
import types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.
client_8dbf0725bdee4042881dc53e79d63a02 = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='98fDWNT-aajkRYnooCDQp_H-1WRL2inWZCholIVvmOke',
    ibm_auth_endpoint="https://iam.cloud.ibm.com/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3-api.us-geo.objectstorage.service.networklayer.com')

body = client_8dbf0725bdee4042881dc53e79d63a02.get_object(Bucket='courseraassignment-donotdelete-pr-pobx7wk7wfmv1x',Key='Geospatial_Coordinates.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_coordinates = pd.read_csv(body)
df_coordinates.rename(columns={"Postal Code": "PostalCode"}, inplace = True)
df_coordinates.head()


Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [10]:
# Merge Dataframes 
df_merged = pd.merge(df_clean, df_coordinates, on='PostalCode')
df_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


### Part 3 - Explore and Cluster Neighborhoods around Toronto

`Getting Latitudes and Longitudes of Toronto`

In [11]:
address = 'Toronto, TO'
geolocator = Nominatim(user_agent = 'A')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

`Create map of Canada using Latitudes and Longitudes focused around Toronto and add all Borough and corresponding Neigbhborhood to the map`

In [12]:
map_canada = folium.Map(location=[latitude, longitude], zoom_start =10)

for lat, lng, borough, neighborhood in zip(df_merged['Latitude'], df_merged['Longitude'], df_merged['Borough'], df_merged['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker(
    [lat, lng],
    radius = 2,
    popup = label,
    color = 'blue',
    fill = True,
    fill_color = '#3186cc',
    fill_opacity = 0.7
    ).add_to(map_canada)

map_canada

`Explore and cluster neighborhoods in Downtown Toronto`

In [13]:
downtown_canada = df_merged[df_merged['Borough']=='Downtown Toronto'].reset_index(drop=True)
downtown_canada

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
5,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
6,M6G,Downtown Toronto,Christie,43.669542,-79.422564
7,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
8,M5J,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.640816,-79.381752
9,M5K,Downtown Toronto,"Toronto Dominion Centre, Design Exchange",43.647177,-79.381576


`Getting Latitudes and Longitudes of Downtown Toronto`

In [14]:
address= 'Downtown Toronto, Canada'
geolocator = Nominatim(user_agent = 'downtown_explore')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

`Create map of Canada using Latitudes and Longitudes focused around Downtown Toronto and add the corresponding Neigbhborhoods to the map`

In [39]:
downtown_map = folium.Map(location = [latitude, longitude], zoom_start=14)

for lat, lng, borough, neighborhood in zip (downtown_canada['Latitude'], downtown_canada['Longitude'], downtown_canada['Borough'], downtown_canada['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat, lng],
    radius = 5,
    popup=label,
    color = 'red',
    fill=True,
    fill_color = 'red',
    fill_opacity = 0.7).add_to(downtown_map)
    
downtown_map

`Define Foursquare Credentials and Version`


In [16]:
CLIENT_ID = 'IP4U3IYZX5D1XCYCIS4KSXTX0P53VWS2GDCZN3VJOZBHVM2Y'
CLIENT_SECRET = 'JUZQ44HI0A3SDH5N5XLYTUFHDV0RIYG0IBGSA5KLARABSY5T'
VERSION = '20180604'

`Get the latitude and longitude location for the first neighborhood in Downtown Toronto`

In [17]:
test_neighborhood = downtown_canada.loc[0, 'Neighborhood']
test_latitude = downtown_canada.loc[0,'Latitude']
test_longitude = downtown_canada.loc[0,'Longitude']

print(test_neighborhood)
print(test_latitude)
print(test_longitude)

Regent Park, Harbourfront
43.6542599
-79.3606359


`Create the GET request URL and get the top 100 venues that are in Regent Park`

In [18]:
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(
CLIENT_ID,
CLIENT_SECRET,
test_latitude,
test_longitude,
VERSION,
radius,
LIMIT)

`Send the GET request and analyze the results`

In [19]:
results = requests.get(url).json()

`The required info is in the items key and create a pandas dataframe out of json`

In [20]:
venues = results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues)
nearby_venues

Unnamed: 0,reasons.count,reasons.items,referralId,venue.categories,venue.id,venue.location.address,venue.location.cc,venue.location.city,venue.location.country,venue.location.crossStreet,...,venue.location.labeledLatLngs,venue.location.lat,venue.location.lng,venue.location.neighborhood,venue.location.postalCode,venue.location.state,venue.name,venue.photos.count,venue.photos.groups,venue.venuePage.id
0,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-54ea41ad498e9a11e9e13308-0,"[{'id': '4bf58dd8d48988d16a941735', 'name': 'B...",54ea41ad498e9a11e9e13308,362 King St E,CA,Toronto,Canada,Trinity St,...,"[{'label': 'display', 'lat': 43.65344672305267...",43.653447,-79.362017,,M5A 1K9,ON,Roselle Desserts,0,[],
1,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-53b8466a498e83df908c3f21-1,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",53b8466a498e83df908c3f21,368 King St E,CA,Toronto,Canada,at Trinity St,...,"[{'label': 'display', 'lat': 43.65355870959944...",43.653559,-79.361809,,,ON,Tandem Coffee,0,[],
2,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-574c229e498ebb5c6b257902-2,"[{'id': '52e81612bcbc57f1066b7a37', 'name': 'D...",574c229e498ebb5c6b257902,461 Cherry St,CA,Toronto,Canada,,...,"[{'label': 'display', 'lat': 43.65324910177244...",43.653249,-79.358008,,M5A 0H7,ON,Cooper Koo Family YMCA,0,[],
3,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-50760559e4b0e8c7babe2497-3,"[{'id': '4bf58dd8d48988d1ed941735', 'name': 'S...",50760559e4b0e8c7babe2497,497 King Street East,CA,Toronto,Canada,btwn Sackville St and Sumach St,...,"[{'label': 'display', 'lat': 43.65473505045365...",43.654735,-79.359874,,M5A 1L9,ON,Body Blitz Spa East,0,[],
4,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-5612b1cc498e3dd742af0dc8-4,"[{'id': '4bf58dd8d48988d1c4941735', 'name': 'R...",5612b1cc498e3dd742af0dc8,573 King St E,CA,Toronto,Canada,at St Lawrence St,...,"[{'label': 'display', 'lat': 43.65636850543279...",43.656369,-79.35698,,M5A 4L3,ON,Impact Kitchen,0,[],
5,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-51ccc048498ec7792efc955e-5,"[{'id': '4bf58dd8d48988d163941735', 'name': 'P...",51ccc048498ec7792efc955e,,CA,,Canada,,...,"[{'label': 'display', 'lat': 43.65561779974973...",43.655618,-79.356211,,,,Corktown Common,0,[],
6,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-566e1294498e3f6629006bc3-6,"[{'id': '4bf58dd8d48988d11b941735', 'name': 'P...",566e1294498e3f6629006bc3,500 Queen Street East,CA,Toronto,Canada,,...,"[{'label': 'display', 'lat': 43.65691857501867...",43.656919,-79.358967,,M5A 1T9,ON,Dominion Pub and Kitchen,0,[],
7,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-4e8b7fa1cc2112f67517660a-7,"[{'id': '4bf58dd8d48988d175941735', 'name': 'G...",4e8b7fa1cc2112f67517660a,30 Eastern Ave,CA,Toronto,Canada,Sackville St.,...,"[{'label': 'display', 'lat': 43.65331304337331...",43.653313,-79.359725,,,ON,The Extension Room,0,[],
8,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-4ad4c05ef964a520bff620e3-8,"[{'id': '4deefb944765f83613cdba6e', 'name': 'H...",4ad4c05ef964a520bff620e3,"btwn Front, Cherry, Gardiner & Parliament",CA,Toronto,Canada,,...,"[{'label': 'display', 'lat': 43.65024435658077...",43.650244,-79.359323,,M5A 3C4,ON,The Distillery Historic District,0,[],
9,0,"[{'summary': 'This spot is popular', 'type': '...",e-0-4ae5b91ff964a520a6a121e3-9,"[{'id': '4bf58dd8d48988d143941735', 'name': 'B...",4ae5b91ff964a520a6a121e3,457 King St. E,CA,Toronto,Canada,Gilead Place,...,"[{'label': 'display', 'lat': 43.65394694263529...",43.653947,-79.361149,,M5A 1L6,ON,Morning Glory Cafe,0,[],39686393.0


In [21]:
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues = nearby_venues.loc[:,filtered_columns]
nearby_venues.head()

Unnamed: 0,venue.name,venue.categories,venue.location.lat,venue.location.lng
0,Roselle Desserts,"[{'id': '4bf58dd8d48988d16a941735', 'name': 'B...",43.653447,-79.362017
1,Tandem Coffee,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",43.653559,-79.361809
2,Cooper Koo Family YMCA,"[{'id': '52e81612bcbc57f1066b7a37', 'name': 'D...",43.653249,-79.358008
3,Body Blitz Spa East,"[{'id': '4bf58dd8d48988d1ed941735', 'name': 'S...",43.654735,-79.359874
4,Impact Kitchen,"[{'id': '4bf58dd8d48988d1c4941735', 'name': 'R...",43.656369,-79.35698


` Function to extract the categories of the venue`

In [22]:
def get_category_type(rows):
    try:
        categories_list = rows['categories']
    except:
        categories_list = rows['venue.categories']
    if len(categories_list)==0:
        return None
    else:
        return categories_list[0]['name']

nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Roselle Desserts,Bakery,43.653447,-79.362017
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,Cooper Koo Family YMCA,Distribution Center,43.653249,-79.358008
3,Body Blitz Spa East,Spa,43.654735,-79.359874
4,Impact Kitchen,Restaurant,43.656369,-79.35698
5,Corktown Common,Park,43.655618,-79.356211
6,Dominion Pub and Kitchen,Pub,43.656919,-79.358967
7,The Extension Room,Gym / Fitness Center,43.653313,-79.359725
8,The Distillery Historic District,Historic Site,43.650244,-79.359323
9,Morning Glory Cafe,Breakfast Spot,43.653947,-79.361149


`Function to get the venues for all the neighborhoods in Downtown Toronto`

In [23]:
def get_info(latitude,longitude,neighbor):
    venues_list=[]
    for lat,lng,neigh in zip(latitude, longitude, neighbor):
        print(neigh)
        LIMIT = 100
        radius = 500
        url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(
        CLIENT_ID,
        CLIENT_SECRET,
        lat,
        lng,
        VERSION,
        radius,
        LIMIT)
        
        results = requests.get(url).json()['response']['groups'][0]['items']
        for t in results:
            venues_list.append([(neigh,lat,lng,t['venue']['name'], t['venue']['location']['lat'], t['venue']['location']['lng'], t['venue']['categories'][0]['name'])])
            nearby_venues_1 = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
            nearby_venues_1.columns = ['Neighbourhood', 'Neighbourhood Latitude', 'Neighbourhood Longitude', 'Venue', 'Venue Latitude', 'Venue Longitude', 'Venue Category']
    return(nearby_venues_1)


In [24]:
d = get_info(latitude=downtown_canada['Latitude'], longitude=downtown_canada['Longitude'], neighbor=downtown_canada['Neighborhood'])

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Rosedale
Stn A PO Boxes
St. James Town, Cabbagetown
First Canadian Place, Underground city
Church and Wellesley


In [44]:
d.head(70)

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.654260,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.654260,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.654260,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park, Harbourfront",43.654260,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,"Regent Park, Harbourfront",43.654260,-79.360636,Impact Kitchen,43.656369,-79.356980,Restaurant
5,"Regent Park, Harbourfront",43.654260,-79.360636,Corktown Common,43.655618,-79.356211,Park
6,"Regent Park, Harbourfront",43.654260,-79.360636,Dominion Pub and Kitchen,43.656919,-79.358967,Pub
7,"Regent Park, Harbourfront",43.654260,-79.360636,The Extension Room,43.653313,-79.359725,Gym / Fitness Center
8,"Regent Park, Harbourfront",43.654260,-79.360636,The Distillery Historic District,43.650244,-79.359323,Historic Site
9,"Regent Park, Harbourfront",43.654260,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot


`Get the total unique categories of the venue`

In [26]:
print(len(d['Venue Category'].unique()))

209


` Analyze each neighborhood using one hot encoding`

In [27]:
onehot = pd.get_dummies(d[['Venue Category']], prefix="",prefix_sep="")

In [28]:
onehot['Neighborhood'] = d['Neighbourhood']
onehot.head()

Unnamed: 0,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [29]:
fixed_columns = [onehot.columns[-1]]+list(onehot.columns[:-1])
onehot = onehot[fixed_columns]
onehot.head()

Unnamed: 0,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


`Group rows by neighborhood and by taking the mean of the frequency of occurence of each category`

In [30]:
d_grouped = onehot.groupby('Neighborhood').mean().reset_index()
d_grouped.head()

Unnamed: 0,Neighborhood,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0
1,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.0,0.0625,0.0625,0.0625,0.125,0.1875,0.125,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Central Bay Street,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.015625,0.0
3,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Church and Wellesley,0.025974,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,...,0.012987,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


`Print each neighborhood along with top 5 most common venues`

In [31]:
for hood in d_grouped['Neighborhood']:
    temp = d_grouped[d_grouped['Neighborhood']==hood].T.reset_index()
    temp.columns = ['venue', 'freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq':2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(5))

            venue  freq
0     Coffee Shop  0.10
1        Beer Bar  0.03
2    Cocktail Bar  0.03
3  Farmers Market  0.03
4      Restaurant  0.03
              venue  freq
0   Airport Service  0.19
1    Airport Lounge  0.12
2  Airport Terminal  0.12
3     Boat or Ferry  0.06
4  Sculpture Garden  0.06
                venue  freq
0         Coffee Shop  0.17
1      Sandwich Place  0.06
2                Café  0.05
3  Italian Restaurant  0.05
4         Salad Place  0.03
           venue  freq
0  Grocery Store  0.25
1           Café  0.19
2           Park  0.12
3    Candy Store  0.06
4     Baby Store  0.06
                 venue  freq
0          Coffee Shop  0.10
1  Japanese Restaurant  0.06
2     Sushi Restaurant  0.05
3              Gay Bar  0.04
4           Restaurant  0.04
         venue  freq
0  Coffee Shop  0.13
1         Café  0.07
2   Restaurant  0.07
3        Hotel  0.06
4          Gym  0.04
         venue  freq
0  Coffee Shop  0.12
1         Café  0.07
2        Hotel  0.05
3         

`Create a dataframe and display the top 10 venues for each neighborhood`

In [32]:
indicators = ['st','nd','rd']
columns = ['Neighborhood']

for ind in np.arange(10):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
columns

['Neighborhood',
 '1st Most Common Venue',
 '2nd Most Common Venue',
 '3rd Most Common Venue',
 '4th Most Common Venue',
 '5th Most Common Venue',
 '6th Most Common Venue',
 '7th Most Common Venue',
 '8th Most Common Venue',
 '9th Most Common Venue',
 '10th Most Common Venue']

In [33]:
neigh_sorted = pd.DataFrame(columns=columns)
neigh_sorted['Neighborhood'] = d_grouped['Neighborhood']
neigh_sorted.head()

def return_most_common_venues(row,t):
    row_cat = row.iloc[1:]
    row_cat_sorted = row_cat.sort_values(ascending=False)
    return row_cat_sorted.index.values[0:t]

for ind in np.arange(d_grouped.shape[0]):
    neigh_sorted.iloc[ind,1:] = return_most_common_venues(d_grouped.iloc[ind, :],10)
    
neigh_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Cheese Shop,Café,Cocktail Bar,Bakery,Farmers Market,Restaurant,Seafood Restaurant,Beer Bar,Pharmacy
1,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Airport Terminal,Airport Lounge,Sculpture Garden,Rental Car Location,Boutique,Coffee Shop,Harbor / Marina,Airport Gate,Airport Food Court
2,Central Bay Street,Coffee Shop,Sandwich Place,Café,Italian Restaurant,Salad Place,Bubble Tea Shop,Japanese Restaurant,Burger Joint,Department Store,Yoga Studio
3,Christie,Grocery Store,Café,Park,Coffee Shop,Nightclub,Diner,Italian Restaurant,Restaurant,Candy Store,Baby Store
4,Church and Wellesley,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Restaurant,Gay Bar,Hotel,Pub,Dance Studio,Bubble Tea Shop,Café


`Cluster Neighborhoods using k-means`

In [34]:
kcluster=5;
cluster_group = d_grouped.drop('Neighborhood',1)

kmeans = KMeans(n_clusters=kcluster, random_state=0).fit(cluster_group)
kmeans.labels_[0:10]

array([4, 3, 0, 2, 4, 4, 4, 4, 4, 4], dtype=int32)

In [35]:
neigh_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
neigh_sorted.head()

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,4,Berczy Park,Coffee Shop,Cheese Shop,Café,Cocktail Bar,Bakery,Farmers Market,Restaurant,Seafood Restaurant,Beer Bar,Pharmacy
1,3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Airport Terminal,Airport Lounge,Sculpture Garden,Rental Car Location,Boutique,Coffee Shop,Harbor / Marina,Airport Gate,Airport Food Court
2,0,Central Bay Street,Coffee Shop,Sandwich Place,Café,Italian Restaurant,Salad Place,Bubble Tea Shop,Japanese Restaurant,Burger Joint,Department Store,Yoga Studio
3,2,Christie,Grocery Store,Café,Park,Coffee Shop,Nightclub,Diner,Italian Restaurant,Restaurant,Candy Store,Baby Store
4,4,Church and Wellesley,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Restaurant,Gay Bar,Hotel,Pub,Dance Studio,Bubble Tea Shop,Café


In [36]:
downtown_merged = downtown_canada
downtown_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


In [37]:
downtown_merged = downtown_merged.join(neigh_sorted.set_index('Neighborhood'), on='Neighborhood')
downtown_merged

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,4,Coffee Shop,Pub,Bakery,Park,Breakfast Spot,Café,Theater,Yoga Studio,Dessert Shop,Restaurant
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0,Coffee Shop,Diner,Yoga Studio,Bank,College Auditorium,Park,Mexican Restaurant,Creperie,Café,Portuguese Restaurant
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,4,Clothing Store,Coffee Shop,Café,Cosmetics Shop,Bubble Tea Shop,Japanese Restaurant,Italian Restaurant,Plaza,Pizza Place,Bookstore
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,4,Café,Coffee Shop,Restaurant,Clothing Store,Cocktail Bar,Cosmetics Shop,American Restaurant,Department Store,Art Gallery,Park
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,4,Coffee Shop,Cheese Shop,Café,Cocktail Bar,Bakery,Farmers Market,Restaurant,Seafood Restaurant,Beer Bar,Pharmacy
5,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,Coffee Shop,Sandwich Place,Café,Italian Restaurant,Salad Place,Bubble Tea Shop,Japanese Restaurant,Burger Joint,Department Store,Yoga Studio
6,M6G,Downtown Toronto,Christie,43.669542,-79.422564,2,Grocery Store,Café,Park,Coffee Shop,Nightclub,Diner,Italian Restaurant,Restaurant,Candy Store,Baby Store
7,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568,4,Coffee Shop,Café,Clothing Store,Hotel,Restaurant,Gym,Bar,Steakhouse,Thai Restaurant,Concert Hall
8,M5J,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.640816,-79.381752,4,Coffee Shop,Aquarium,Hotel,Café,Brewery,Restaurant,Scenic Lookout,Fried Chicken Joint,Plaza,Pizza Place
9,M5K,Downtown Toronto,"Toronto Dominion Centre, Design Exchange",43.647177,-79.381576,4,Coffee Shop,Hotel,Café,Restaurant,American Restaurant,Italian Restaurant,Salad Place,Seafood Restaurant,Japanese Restaurant,Deli / Bodega


`Visualize the resulting clusters`

In [38]:
map_clusters = folium.Map(location = [latitude, longitude], zoom_start=11)
x = np.arange(kcluster)
ys = [i + x + (i*x)**2 for i in range(kcluster)]
colors_array = cm.rainbow(np.linspace(0,1,len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
for lat,lon,n,cluster in zip(downtown_merged['Latitude'], downtown_merged['Longitude'], downtown_merged['Neighborhood'], downtown_merged['Cluster Labels']):
    label = folium.Popup(str(n) + 'Cluster' + str(cluster), parse_html=True)
    folium.CircleMarker(
    [lat,lon],
    radius=5,
    popup=label,
    color=rainbow[cluster-1],
    fill=True,
    fill_color=rainbow[cluster-1],
    fill_opacity=0.7).add_to(map_clusters)
map_clusters