# Assignment PART 1: Segmenting and Clustering Neighborhoods in Toronto

## Scrape Data from Wikipedia page 
https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M

In [2]:
# IMPORT ALL REQUIRED LIBRARIES

import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import json 
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim 
import requests 
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library
print('All of the libraries required have now been imported')

Collecting package metadata (repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (repodata.json): done
Solving environment: done

# All requested packages already installed.

All of the libraries required have now been imported


#### Scrape Data off the internet

In [7]:
# SCRAPE WIKI PAGE FOR TORONTO DATA
import requests
link = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
df = pd.read_html(link, header=0, attrs={"class":"wikitable sortable"})[0]
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Queen's Park,Not assigned
8,M8A,Not assigned,Not assigned
9,M9A,Queen's Park,Queen's Park


In [9]:
# DROP RECORDS FROM TORONTO DATA WHERE
# BOROUGH DATA VALUE IS "NOT ASSIGNED"
df = df[df.Borough != 'Not assigned']

# df = df.sort_values(by=['Postcode','Borough'])

df.reset_index(inplace=True)
df.drop('index',axis=1,inplace=True)

# CHECK BY SHOWING FIRST 5 RECORDS
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,Rouge
1,M1B,Scarborough,Malvern
2,M1C,Scarborough,Highland Creek
3,M1C,Scarborough,Rouge Hill
4,M1C,Scarborough,Port Union


#### Combine Multiple Neighbourhoods that have the same Postal Code 

In [10]:
# COMBINE NEIGHBOURHOODS WHERE THERE ARE MULTIPLE NEIGHBOURHOODS
# LISTED PER POSTAL CODE.
# DELINEATE THE NEIGHBOURHOODS BY A COMMA
df = df.groupby(['Postcode','Borough'])['Neighbourhood'].apply(list)
df = df.sample(frac=1).reset_index()
df['Neighbourhood'] = df['Neighbourhood'].str.join(',')

# CHECK BY SHOWING FIRST 5 RECORDS
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M4C,East York,Woodbine Heights
1,M2N,North York,Willowdale South
2,M1T,Scarborough,"Clarks Corners,Sullivan,Tam O'Shanter"
3,M1V,Scarborough,"Agincourt North,L'Amoreaux East,Milliken,Steel..."
4,M8X,Etobicoke,"The Kingsway,Montgomery Road,Old Mill North"


#### Neighbourhoods that are "Not assigned" are set to equal the Borough name

In [23]:
# If a cell has a Borough but a "Not assigned" Neighborhood, 
# then set the neighborhood to be the same as the borough. 
df.loc[df.Neighbourhood == "Not assigned", 'Neighbourhood'] = df['Borough']
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M4C,East York,Woodbine Heights
1,M2N,North York,Willowdale South
2,M1T,Scarborough,"Clarks Corners,Sullivan,Tam O'Shanter"
3,M1V,Scarborough,"Agincourt North,L'Amoreaux East,Milliken,Steel..."
4,M8X,Etobicoke,"The Kingsway,Montgomery Road,Old Mill North"
5,M5X,Downtown Toronto,"First Canadian Place,Underground city"
6,M1E,Scarborough,"Guildwood,Morningside,West Hill"
7,M8Z,Etobicoke,"Kingsway Park South West,Mimico NW,The Queensw..."
8,M2L,North York,"Silver Hills,York Mills"
9,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"


#### Print the shape of the dataframe

In [36]:
"# of Rows & Columns in the dataframe ", df.shape

('# of Rows & Columns in the dataframe ', (103, 3))

# Assignment PART 2: Segmenting and Clustering Neighborhoods in Toronto

## Get Geospatial Data from csv file on the internet 
http://cocl.us/Geospatial_data

In [58]:
# Loads the csv file with latitude & longitude data
geo_df = pd.read_csv('http://cocl.us/Geospatial_data')
# Renames the column in the csv file to the same name 
# as in the main dataframe "Postcode".
geo_df = geo_df.rename(columns={'Postal Code': 'Postcode'})
# Merge two dataframes together.
df2 = pd.merge(df, geo_df, on='Postcode')
df2

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4C,East York,Woodbine Heights,43.695344,-79.318389
1,M2N,North York,Willowdale South,43.77012,-79.408493
2,M1T,Scarborough,"Clarks Corners,Sullivan,Tam O'Shanter",43.781638,-79.304302
3,M1V,Scarborough,"Agincourt North,L'Amoreaux East,Milliken,Steel...",43.815252,-79.284577
4,M8X,Etobicoke,"The Kingsway,Montgomery Road,Old Mill North",43.653654,-79.506944
5,M5X,Downtown Toronto,"First Canadian Place,Underground city",43.648429,-79.38228
6,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
7,M8Z,Etobicoke,"Kingsway Park South West,Mimico NW,The Queensw...",43.628841,-79.520999
8,M2L,North York,"Silver Hills,York Mills",43.75749,-79.374714
9,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497


# Assignment PART 3: Segmenting and Clustering Neighborhoods in Toronto

## Explore and Cluster Toronto neighbourhoods

In [68]:
# Create map of Toronto using latitude and longitude values
map_to = folium.Map(location=[43.6532, -79.3832], zoom_start=9.5)

# add markers to map
for lat, lng, borough, neighbourhood in zip(df2['Latitude'], df2['Longitude'], df2['Borough'], df2['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_to)  
    
map_to

#### Segment & Cluster North York data

In [70]:
# Segment North York data
northyork_data = df2[df2['Borough'] == 'North York'].reset_index(drop=True)
northyork_data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M2N,North York,Willowdale South,43.77012,-79.408493
1,M2L,North York,"Silver Hills,York Mills",43.75749,-79.374714
2,M3A,North York,Parkwoods,43.753259,-79.329656
3,M2R,North York,Willowdale West,43.782736,-79.442259
4,M3K,North York,"CFB Toronto,Downsview East",43.737473,-79.464763


#### Visualize Neighbourhoods in North York


In [76]:
# create map of North York using latitude and longitude values
map_northyork= folium.Map(location=[43.7615, -79.4111], zoom_start=12)

# add markers to map
for lat, lng, label in zip(northyork_data['Latitude'], northyork_data['Longitude'], northyork_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_northyork)  
    
map_northyork

#### Use Foursquare API to explore neighbourhoods and segment them

In [77]:
CLIENT_ID = '5M003IKTR4Y4AV1UJVTHI2LGQ1JOZLA5MXNBWVT3SW5BCP32' # your Foursquare ID
CLIENT_SECRET = 'WLX4XCT0PWVALPF2HBOUA3KHROFIUAWHS5Z0IA2GHGU2AMCD' # your Foursquare Secret
VERSION = '20181121' # Foursquare API version


In [79]:
# Get the Neighbourhood name of the first one listed
northyork_data.loc[0, 'Neighbourhood']

'Willowdale South'

In [81]:
# Get the Neighbourhood's latitude and longitude
neighbourhood_latitude = northyork_data.loc[0, 'Latitude'] # neighborhood latitude value
neighbourhood_longitude = northyork_data.loc[0, 'Longitude'] # neighborhood longitude value

neighbourhood_name = northyork_data.loc[0, 'Neighbourhood'] # neighborhood name

print('The Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, 
                                                               neighbourhood_latitude, 
                                                               neighbourhood_longitude))

The Latitude and longitude values of Willowdale South are 43.7701199, -79.40849279999999.


In [150]:
# Get top 5 venues within a 1000 metre radius of Willowdale South
LIMIT = 5 # limit of number of venues returned by Foursquare API

radius = 1000 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighbourhood_latitude, 
    neighbourhood_longitude, 
    radius, 
    LIMIT)
 # display URL
url

'https://api.foursquare.com/v2/venues/explore?&client_id=5M003IKTR4Y4AV1UJVTHI2LGQ1JOZLA5MXNBWVT3SW5BCP32&client_secret=WLX4XCT0PWVALPF2HBOUA3KHROFIUAWHS5Z0IA2GHGU2AMCD&v=20181121&ll=43.7701199,-79.40849279999999&radius=1000&limit=5'

In [151]:
# Send the GET request and examine the results
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ddb252371c428001bd64c4a'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Willowdale',
  'headerFullLocation': 'Willowdale, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 128,
  'suggestedBounds': {'ne': {'lat': 43.77911990900001,
    'lng': -79.39605277419672},
   'sw': {'lat': 43.761119890999986, 'lng': -79.42093282580326}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5a35b4443abcaf37eb1a0d88',
       'name': 'The Keg',
       'location': {'lat': 43.7665789176648,
        'lng': -79.41213141222555,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.7665789176648,
          'lng': -79.412131412

In [152]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [153]:
# Structure data into a dataframe
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues

Unnamed: 0,name,categories,lat,lng
0,The Keg,Steakhouse,43.766579,-79.412131
1,Konjiki Ramen,Ramen Restaurant,43.766998,-79.412222
2,The Captain's Boil,Seafood Restaurant,43.773255,-79.413805
3,Maryam Hotel,Hotel,43.766961,-79.401199
4,Loblaws,Grocery Store,43.768722,-79.412101


### Cluster the neighbourhood of North York

In [155]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)


In [183]:
# Set variable to store North York venues
northyork_venues = getNearbyVenues(names=northyork_data['Neighbourhood'],
                                   latitudes=northyork_data['Latitude'],
                                   longitudes=northyork_data['Longitude']
                                  )

# one hot encoding
northyork_onehot = pd.get_dummies(northyork_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
northyork_onehot['Neighbourhood'] = northyork_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [northyork_onehot.columns[-1]] + list(northyork_onehot.columns[:-1])
northyork_onehot = northyork_onehot[fixed_columns]

northyork_onehot.head()

Willowdale South
Silver Hills,York Mills
Parkwoods
Willowdale West
CFB Toronto,Downsview East
Downsview Northwest
Humber Summit
Flemingdon Park,Don Mills South
Downsview,North Park,Upwood Park
Emery,Humberlea
Fairview,Henry Farm,Oriole
Hillcrest Village
Northwood Park,York University
Downsview West
Bayview Village
York Mills West
Lawrence Heights,Lawrence Manor
Don Mills North
Newtonbrook,Willowdale
Bathurst Manor,Downsview North,Wilson Heights
Downsview Central
Glencairn
Victoria Village
Bedford Park,Lawrence Manor East


Unnamed: 0,Neighbourhood,Airport,Athletics & Sports,Bakery,Bank,Bar,Baseball Field,Basketball Court,Boutique,Bridal Shop,Burger Joint,Cafeteria,Café,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Coffee Shop,Construction & Landscaping,Convenience Store,Deli / Bodega,Discount Store,Dog Run,Food & Drink Shop,Food Truck,French Restaurant,Furniture / Home Store,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Hockey Arena,Hotel,Intersection,Italian Restaurant,Japanese Restaurant,Liquor Store,Massage Studio,Mediterranean Restaurant,Movie Theater,Park,Pharmacy,Pizza Place,Playground,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Restaurant,Shopping Mall,Steakhouse,Toy / Game Store,Vietnamese Restaurant
0,Willowdale South,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Willowdale South,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
2,Willowdale South,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0
3,Willowdale South,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Willowdale South,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [184]:
northyork_grouped = northyork_onehot.groupby('Neighbourhood').mean().reset_index()
northyork_grouped

Unnamed: 0,Neighbourhood,Airport,Athletics & Sports,Bakery,Bank,Bar,Baseball Field,Basketball Court,Boutique,Bridal Shop,Burger Joint,Cafeteria,Café,Caribbean Restaurant,Chinese Restaurant,Clothing Store,Coffee Shop,Construction & Landscaping,Convenience Store,Deli / Bodega,Discount Store,Dog Run,Food & Drink Shop,Food Truck,French Restaurant,Furniture / Home Store,Golf Course,Grocery Store,Gym,Gym / Fitness Center,Hockey Arena,Hotel,Intersection,Italian Restaurant,Japanese Restaurant,Liquor Store,Massage Studio,Mediterranean Restaurant,Movie Theater,Park,Pharmacy,Pizza Place,Playground,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Restaurant,Shopping Mall,Steakhouse,Toy / Game Store,Vietnamese Restaurant
0,"Bathurst Manor,Downsview North,Wilson Heights",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park,Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0
3,"CFB Toronto,Downsview East",0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Don Mills North,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.2,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Downsview Central,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Downsview Northwest,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Downsview West,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0
8,"Downsview,North Park,Upwood Park",0.0,0.0,0.25,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Emery,Humberlea",0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Cluster North York Neighbourhoods into 5 clusters

In [186]:
# set number of clusters
kclusters = 5

northyork_grouped_clustering = northyork_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(northyork_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([4, 0, 4, 2, 0, 3, 0, 2, 2, 3], dtype=int32)

#### Print each neighborhood along with the top 5 most common venues

In [191]:
num_top_venues = 5

for hood in northyork_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = northyork_grouped[northyork_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bathurst Manor,Downsview North,Wilson Heights----
           venue  freq
0    Coffee Shop   0.4
1     Restaurant   0.2
2    Bridal Shop   0.2
3  Deli / Bodega   0.2
4        Airport   0.0


----Bayview Village----
                 venue  freq
0   Chinese Restaurant  0.25
1                 Bank  0.25
2  Japanese Restaurant  0.25
3                 Café  0.25
4             Pharmacy  0.00


----Bedford Park,Lawrence Manor East----
                venue  freq
0         Coffee Shop   0.4
1          Restaurant   0.2
2  Italian Restaurant   0.2
3                Café   0.2
4             Airport   0.0


----CFB Toronto,Downsview East----
                  venue  freq
0               Airport   0.5
1                  Park   0.5
2  Gym / Fitness Center   0.0
3          Hockey Arena   0.0
4                 Hotel   0.0


----Don Mills North----
                  venue  freq
0  Gym / Fitness Center   0.2
1        Baseball Field   0.2
2   Japanese Restaurant   0.2
3                  Café   0.2
4  C

#### Create Dataframe

In [192]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = northyork_grouped['Neighbourhood']

for ind in np.arange(northyork_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(northyork_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor,Downsview North,Wilson Heights",Coffee Shop,Restaurant,Deli / Bodega,Bridal Shop,Vietnamese Restaurant,Caribbean Restaurant,Food Truck,Food & Drink Shop,Dog Run,Discount Store
1,Bayview Village,Chinese Restaurant,Bank,Japanese Restaurant,Café,Vietnamese Restaurant,Food Truck,Food & Drink Shop,Dog Run,Discount Store,Deli / Bodega
2,"Bedford Park,Lawrence Manor East",Coffee Shop,Café,Restaurant,Italian Restaurant,Caribbean Restaurant,Food & Drink Shop,Dog Run,Discount Store,Deli / Bodega,Convenience Store
3,"CFB Toronto,Downsview East",Airport,Park,Caribbean Restaurant,Food Truck,Food & Drink Shop,Dog Run,Discount Store,Deli / Bodega,Convenience Store,Construction & Landscaping
4,Don Mills North,Caribbean Restaurant,Gym / Fitness Center,Baseball Field,Japanese Restaurant,Café,Chinese Restaurant,Food & Drink Shop,Dog Run,Discount Store,Deli / Bodega


#### Visualize created clusters

In [202]:
# add clustering labels
#neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

northyork_merged = northyork_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighbourhood
northyork_merged = northyork_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood', how = 'right')

northyork_merged.head() # check the last columns!

# create map
map_clusters = folium.Map(location=[43.7615, -79.4111], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(northyork_merged['Latitude'], northyork_merged['Longitude'], northyork_merged['Neighbourhood'], northyork_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color = rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### Examine Cluster 1

In [204]:
northyork_merged.loc[northyork_merged['Cluster Labels'] == 0, northyork_merged.columns[[1] + list(range(5, northyork_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,0,Movie Theater,Café,Steakhouse,Grocery Store,Ramen Restaurant,Dog Run,Discount Store,Deli / Bodega,Convenience Store,Construction & Landscaping
3,North York,0,Grocery Store,Discount Store,Coffee Shop,Pizza Place,Pharmacy,Vietnamese Restaurant,Caribbean Restaurant,Food & Drink Shop,Dog Run,Deli / Bodega
5,North York,0,Athletics & Sports,Grocery Store,Gym / Fitness Center,Discount Store,Liquor Store,Vietnamese Restaurant,Chinese Restaurant,Food & Drink Shop,Dog Run,Deli / Bodega
6,North York,0,Construction & Landscaping,Pizza Place,Vietnamese Restaurant,Caribbean Restaurant,Food Truck,Food & Drink Shop,Dog Run,Discount Store,Deli / Bodega,Convenience Store
7,North York,0,Restaurant,Gym,Discount Store,Italian Restaurant,Clothing Store,Vietnamese Restaurant,Caribbean Restaurant,Food & Drink Shop,Dog Run,Deli / Bodega
10,North York,0,Movie Theater,Toy / Game Store,Shopping Mall,Bakery,Burger Joint,Caribbean Restaurant,Food & Drink Shop,Dog Run,Discount Store,Deli / Bodega
11,North York,0,Golf Course,Mediterranean Restaurant,Dog Run,Pool,Food & Drink Shop,Discount Store,Deli / Bodega,Convenience Store,Construction & Landscaping,Coffee Shop
14,North York,0,Chinese Restaurant,Bank,Japanese Restaurant,Café,Vietnamese Restaurant,Food Truck,Food & Drink Shop,Dog Run,Discount Store,Deli / Bodega
17,North York,0,Caribbean Restaurant,Gym / Fitness Center,Baseball Field,Japanese Restaurant,Café,Chinese Restaurant,Food & Drink Shop,Dog Run,Discount Store,Deli / Bodega
21,North York,0,Bakery,Pub,Playground,Pizza Place,Japanese Restaurant,Vietnamese Restaurant,Caribbean Restaurant,Dog Run,Discount Store,Deli / Bodega
