## Capstone Assignment Module 3 ##
#### Applied Data Science Capstone ####
###### IBM Data Science Professional Certificate Specialization #####

In [6]:
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans 
import seaborn as sns 
import json 
from geopy.geocoders import Nominatim 
from pandas.io.json import json_normalize 
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium 

### Implement Web-Scraping ##

In [7]:
url=requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text #URL
soup=BeautifulSoup(url,'lxml') 
table = soup.find("table",{"class":"wikitable sortable"}) #table Class
text_data = [] #declare List for storage text from table  
text_sys = table.findAll('tr') #detect all <tr> syntax include text

for text_loop in text_sys:
    ex_text = text_loop.findChildren(recursive=False) #explode list from text_sys 
    text_store = [] #declare list for store string text as list
    for read in ex_text:   #2nd loop in ex_text
        text_index = read.text.strip() #arrange text and strip
        text_store.append(text_index) #combine 3-text string as list  
            
    text_data.append(text_store) #out of 2nd loop save whole list text to one variable list 
text_data #try print out data

[['Postcode', 'Borough', 'Neighbourhood'],
 ['M1A', 'Not assigned', 'Not assigned'],
 ['M2A', 'Not assigned', 'Not assigned'],
 ['M3A', 'North York', 'Parkwoods'],
 ['M4A', 'North York', 'Victoria Village'],
 ['M5A', 'Downtown Toronto', 'Harbourfront'],
 ['M5A', 'Downtown Toronto', 'Regent Park'],
 ['M6A', 'North York', 'Lawrence Heights'],
 ['M6A', 'North York', 'Lawrence Manor'],
 ['M7A', "Queen's Park", 'Not assigned'],
 ['M8A', 'Not assigned', 'Not assigned'],
 ['M9A', 'Etobicoke', 'Islington Avenue'],
 ['M1B', 'Scarborough', 'Rouge'],
 ['M1B', 'Scarborough', 'Malvern'],
 ['M2B', 'Not assigned', 'Not assigned'],
 ['M3B', 'North York', 'Don Mills North'],
 ['M4B', 'East York', 'Woodbine Gardens'],
 ['M4B', 'East York', 'Parkview Hill'],
 ['M5B', 'Downtown Toronto', 'Ryerson'],
 ['M5B', 'Downtown Toronto', 'Garden District'],
 ['M6B', 'North York', 'Glencairn'],
 ['M7B', 'Not assigned', 'Not assigned'],
 ['M8B', 'Not assigned', 'Not assigned'],
 ['M9B', 'Etobicoke', 'Cloverdale'],
 [

### Implement Data-to-DataFrame

In [8]:
df=pd.DataFrame(text_data,columns=["Postcode","Borough","Neighbourhood"]) #Assign data to dataframe
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,Postcode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village


In [9]:
#If Neighbour index have "Not assigned" it will instead a index of Borough.
df.loc[df.Neighbourhood=="Not assigned","Neighbourhood"] = df.Borough 
df.drop(df[df.Borough == 'Borough'].index, inplace=True) #remove name column in index
df.drop(df[df.Borough == 'Not assigned'].index, inplace=True) #remove Borough rows if index =="Not assigned"
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M5A,Downtown Toronto,Regent Park
7,M6A,North York,Lawrence Heights


In [10]:
#sort dataframe 
df_ = df.sort_values(by=['Postcode'])
df_.reset_index(inplace=True)
df_.drop('index',axis=1,inplace=True)
df_.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,Rouge
1,M1B,Scarborough,Malvern
2,M1C,Scarborough,Port Union
3,M1C,Scarborough,Rouge Hill
4,M1C,Scarborough,Highland Creek


In [11]:
df_.shape

(212, 3)

In [12]:
#sort dataframe by postcode
df = (df_.groupby(['Postcode','Borough'])['Neighbourhood'].apply(lambda x: ','.join(set(x.dropna()))).reset_index())
df.columns=["Postcode","Borough","Neighbourhood"]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Port Union,Rouge Hill"
2,M1E,Scarborough,"West Hill,Guildwood,Morningside"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [13]:
df.shape

(103, 3)

### Build Location for mapping ###

In [14]:
#load geo_data
geo_df = pd.read_csv("http://cocl.us/Geospatial_data")
geo_df.columns=["Postcode","Latitude","Longitude"]
geo_df.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [15]:
#merge_data
df=pd.merge(df,geo_df,on="Postcode")
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Port Union,Rouge Hill",43.784535,-79.160497
2,M1E,Scarborough,"West Hill,Guildwood,Morningside",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [16]:
#arrage to mapping
df_geo=df[["Borough","Neighbourhood","Latitude","Longitude"]]
df_geo.head()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,Scarborough,"Highland Creek,Port Union,Rouge Hill",43.784535,-79.160497
2,Scarborough,"West Hill,Guildwood,Morningside",43.763573,-79.188711
3,Scarborough,Woburn,43.770992,-79.216917
4,Scarborough,Cedarbrae,43.773136,-79.239476


In [17]:
address = 'Toronto, ON'
geolocator = Nominatim(user_agent='eiei')
location = geolocator.geocode(address)
Latitude = location.latitude
Longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(Latitude, Longitude))

The geograpical coordinates of Toronto are 43.653963, -79.387207.


## Map Toronto

In [18]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[Latitude, Longitude], zoom_start=10)
# add markers to map
for lat, lng, borough, neighbourhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(df_geo, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto

## Let's Analysis

In [19]:
df['Borough'].value_counts()

North York          24
Downtown Toronto    18
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
East Toronto         5
York                 5
East York            5
Queen's Park         1
Mississauga          1
Name: Borough, dtype: int64

#### Downtown Toronto. Is my choice###

In [20]:
df_down=df[df['Borough']=='Downtown Toronto'].reset_index(drop=True)
df_down.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529
1,M4X,Downtown Toronto,"Cabbagetown,St. James Town",43.667967,-79.367675
2,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316
3,M5A,Downtown Toronto,"Regent Park,Harbourfront",43.65426,-79.360636
4,M5B,Downtown Toronto,"Garden District,Ryerson",43.657162,-79.378937


### Downtown Toronto Map###

In [21]:
address = 'Downtown Toronto, ON'
geolocator = Nominatim(user_agent='eiei')
location = geolocator.geocode(address)
Latitude = location.latitude
Longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(Latitude, Longitude))
# create map of Toronto using latitude and longitude values
map_Downtown_Toronto = folium.Map(location=[Latitude, Longitude], zoom_start=13)

# add markers to map
for lat, lng, borough, neighbourhood in zip(df_down['Latitude'], df_down['Longitude'], df_down['Borough'], df_down['Neighbourhood']):
    label = '{}, {}'.format(df_down, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_Downtown_Toronto)  
    
map_Downtown_Toronto

The geograpical coordinates of Toronto are 43.655115, -79.380219.


### Foursquare API to explore Downtown Toronto ###

In [22]:
CLIENT_ID = 'PQHF3XY3005LN3ZFE5RXCAEEYUKGHM3RZW2NHYTRWV1U325A' #Foursquare ID
CLIENT_SECRET = 'OSTVDXLO41ZEQER32DK3QCRYW1LF3D4IOO4OTBYICHLQYUUD' # Foursquare Secret
VERSION = '20180602' 

In [23]:
df_down['Neighbourhood'][0]

'Rosedale'

In [24]:
neighborhood_latitude = df_down['Latitude'][0] 
neighborhood_longitude = df_down['Longitude'][0] 

In [39]:
LIMIT = 100   
radius = 5000
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=PQHF3XY3005LN3ZFE5RXCAEEYUKGHM3RZW2NHYTRWV1U325A&client_secret=OSTVDXLO41ZEQER32DK3QCRYW1LF3D4IOO4OTBYICHLQYUUD&v=20180602&ll=43.6795626,-79.37752940000001&radius=5000&limit=100'

In [43]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5bcb411b4434b9406ea78ce8'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Toronto',
  'headerFullLocation': 'Toronto',
  'headerLocationGranularity': 'city',
  'totalResults': 242,
  'suggestedBounds': {'ne': {'lat': 43.72456264500004,
    'lng': -79.31542322743701},
   'sw': {'lat': 43.63456255499995, 'lng': -79.43963557256302}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4adcb343f964a520e32e21e3',
       'name': 'Summerhill Market',
       'location': {'address': '446 Summerhill Ave',
        'crossStreet': 'btwn. MacLennan Ave. and Glen Rd.',
        'lat': 43.68626482142425,
        'lng': -79.37545823237794,
        'labeledLatLngs':

**get_category_type from Foursqure API** 

In [44]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

**Cleansing JSON** 

In [45]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Summerhill Market,Grocery Store,43.686265,-79.375458
1,Black Camel,BBQ Joint,43.677016,-79.389367
2,Greenhouse Juice Co,Juice Bar,43.679101,-79.390686
3,LCBO,Liquor Store,43.681497,-79.391261
4,Evergreen Brick Works,Historic Site,43.684362,-79.365445


**And how many venues were returned by Foursquare?**

In [47]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

100 venues were returned by Foursquare.


In [48]:
nearby_venues.shape

(100, 4)

In [51]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [50]:
downtown_venues = getNearbyVenues(names=df_down['Neighbourhood'],
                                   latitudes=df_down['Latitude'],
                                   longitudes=df_down['Longitude']
                                  )

Rosedale
Cabbagetown,St. James Town
Church and Wellesley
Regent Park,Harbourfront
Garden District,Ryerson
St. James Town
Berczy Park
Central Bay Street
King,Richmond,Adelaide
Union Station,Toronto Islands,Harbourfront East
Toronto Dominion Centre,Design Exchange
Commerce Court,Victoria Hotel
Harbord,University of Toronto
Kensington Market,Chinatown,Grange Park
CN Tower,Bathurst Quay,Harbourfront West,King and Spadina,Railway Lands,Island airport,South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place,Underground city
Christie


#### Let's check the size of the resulting dataframe

In [52]:
print(downtown_venues.shape)
downtown_venues.head()

(1283, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Rosedale,43.679563,-79.377529,Rosedale Park,43.682328,-79.378934,Playground
1,Rosedale,43.679563,-79.377529,Whitney Park,43.682036,-79.373788,Park
2,Rosedale,43.679563,-79.377529,Alex Murray Parkette,43.6783,-79.382773,Park
3,Rosedale,43.679563,-79.377529,Milkman's Lane,43.676352,-79.373842,Trail
4,"Cabbagetown,St. James Town",43.667967,-79.367675,Cranberries,43.667843,-79.369407,Diner


****Let's find out how many unique categories can be curated from all the returned venues****

In [53]:
print('There are {} uniques categories.'.format(len(downtown_venues['Venue Category'].unique())))

There are 204 uniques categories.


## Analyze Each Neighborhood

In [60]:
# one hot encoding
downtown_onehot = pd.get_dummies(downtown_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
downtown_onehot['Neighborhood'] = downtown_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [downtown_onehot.columns[-1]] + list(downtown_onehot.columns[:-1])
downtown_onehot = downtown_onehot[fixed_columns]

print(downtown_onehot.shape)
downtown_onehot.head()

(1283, 204)


Unnamed: 0,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Theater,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [63]:
downtown_grouped = downtown_onehot.groupby('Neighborhood').mean().reset_index()
print(downtown_grouped.shape)
downtown_grouped.head()

(18, 204)


Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,...,Theater,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"CN Tower,Bathurst Quay,Harbourfront West,King ...",0.0,0.0,0.0,0.0,0.071429,0.071429,0.071429,0.142857,0.142857,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Cabbagetown,St. James Town",0.020833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Central Bay Street,0.012195,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.012195,0.0,0.0,0.012195,0.0
4,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Let's print each neighborhood along with the top 5 most common venues

In [66]:
num_top_venues = 5

for hood in downtown_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = downtown_grouped[downtown_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
                venue  freq
0         Coffee Shop  0.09
1        Cocktail Bar  0.06
2  Seafood Restaurant  0.04
3            Beer Bar  0.04
4      Farmers Market  0.04


----CN Tower,Bathurst Quay,Harbourfront West,King and Spadina,Railway Lands,Island airport,South Niagara----
              venue  freq
0  Airport Terminal  0.14
1   Airport Service  0.14
2    Airport Lounge  0.14
3     Boat or Ferry  0.07
4      Airport Gate  0.07


----Cabbagetown,St. James Town----
                venue  freq
0         Coffee Shop  0.10
1          Restaurant  0.08
2   Indian Restaurant  0.04
3  Italian Restaurant  0.04
4                Café  0.04


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.15
1                Café  0.06
2  Italian Restaurant  0.05
3                 Bar  0.04
4      Sandwich Place  0.04


----Christie----
           venue  freq
0           Café  0.19
1  Grocery Store  0.19
2           Park  0.12
3      Nightclub  0.06
4     Bab

#### Let's put that into a *pandas* dataframe

In [67]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [69]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = downtown_grouped['Neighborhood']

for ind in np.arange(downtown_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(downtown_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Bakery,Seafood Restaurant,Beer Bar,Farmers Market,Cheese Shop,Café,Steakhouse,Restaurant
1,"CN Tower,Bathurst Quay,Harbourfront West,King ...",Airport Terminal,Airport Lounge,Airport Service,Boat or Ferry,Harbor / Marina,Airport,Airport Food Court,Airport Gate,Plane,Boutique
2,"Cabbagetown,St. James Town",Coffee Shop,Restaurant,Café,Pub,Bakery,Pizza Place,Park,Chinese Restaurant,Indian Restaurant,Italian Restaurant
3,Central Bay Street,Coffee Shop,Café,Italian Restaurant,Bar,Bubble Tea Shop,Burger Joint,Sandwich Place,Japanese Restaurant,Ice Cream Shop,Salad Place
4,Christie,Grocery Store,Café,Park,Athletics & Sports,Nightclub,Convenience Store,Restaurant,Diner,Baby Store,Italian Restaurant


## 4. Cluster Neighborhoods
Run *k*-means to cluster the neighborhood into 5 clusters.

In [76]:
# set number of clusters
kclusters = 5

downtown_grouped_clustering = downtown_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(downtown_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:18] 

array([4, 3, 2, 2, 0, 2, 4, 4, 2, 2, 2, 4, 4, 1, 4, 4, 4, 4])

In [77]:
df_down.shape

(18, 5)

In [81]:
downtown_merged = df_down
# add clustering labels
downtown_merged['Cluster Labels'] = kmeans.labels_
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
downtown_merged = downtown_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')
downtown_merged.head(10) # check the last columns!

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,4,Park,Trail,Playground,Deli / Bodega,Electronics Store,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store
1,M4X,Downtown Toronto,"Cabbagetown,St. James Town",43.667967,-79.367675,3,Coffee Shop,Restaurant,Café,Pub,Bakery,Pizza Place,Park,Chinese Restaurant,Indian Restaurant,Italian Restaurant
2,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,2,Japanese Restaurant,Coffee Shop,Sushi Restaurant,Burger Joint,Gay Bar,Restaurant,Bubble Tea Shop,Café,Men's Store,Mediterranean Restaurant
3,M5A,Downtown Toronto,"Regent Park,Harbourfront",43.65426,-79.360636,2,Coffee Shop,Bakery,Park,Café,Restaurant,Pub,Mexican Restaurant,Breakfast Spot,Theater,Health Food Store
4,M5B,Downtown Toronto,"Garden District,Ryerson",43.657162,-79.378937,0,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Bar,Japanese Restaurant,Ramen Restaurant,Restaurant,Movie Theater,Sandwich Place
5,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,2,Coffee Shop,Café,Restaurant,Hotel,Clothing Store,Cocktail Bar,Gastropub,Bakery,Italian Restaurant,Cosmetics Shop
6,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,4,Coffee Shop,Cocktail Bar,Bakery,Seafood Restaurant,Beer Bar,Farmers Market,Cheese Shop,Café,Steakhouse,Restaurant
7,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,4,Coffee Shop,Café,Italian Restaurant,Bar,Bubble Tea Shop,Burger Joint,Sandwich Place,Japanese Restaurant,Ice Cream Shop,Salad Place
8,M5H,Downtown Toronto,"King,Richmond,Adelaide",43.650571,-79.384568,2,Coffee Shop,Café,American Restaurant,Steakhouse,Thai Restaurant,Cosmetics Shop,Restaurant,Hotel,Bar,Gym
9,M5J,Downtown Toronto,"Union Station,Toronto Islands,Harbourfront East",43.640816,-79.381752,2,Coffee Shop,Hotel,Pizza Place,Aquarium,Café,Sports Bar,Italian Restaurant,Scenic Lookout,Brewery,Steakhouse


#### let's visualize the resulting clusters

In [87]:
# create map
map_clusters = folium.Map(location=[Latitude, Longitude], zoom_start=14)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(downtown_merged['Latitude'], downtown_merged['Longitude'], downtown_merged['Neighbourhood'], downtown_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Clusters

### Clusters 1

In [89]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 0, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Downtown Toronto,0,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Bar,Japanese Restaurant,Ramen Restaurant,Restaurant,Movie Theater,Sandwich Place


### Clusters 2

In [90]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 1, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,Downtown Toronto,1,Café,Vegetarian / Vegan Restaurant,Chinese Restaurant,Bar,Vietnamese Restaurant,Mexican Restaurant,Bakery,Coffee Shop,Dumpling Restaurant,Noodle House


### Clusters 3

In [91]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 2, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,2,Japanese Restaurant,Coffee Shop,Sushi Restaurant,Burger Joint,Gay Bar,Restaurant,Bubble Tea Shop,Café,Men's Store,Mediterranean Restaurant
3,Downtown Toronto,2,Coffee Shop,Bakery,Park,Café,Restaurant,Pub,Mexican Restaurant,Breakfast Spot,Theater,Health Food Store
5,Downtown Toronto,2,Coffee Shop,Café,Restaurant,Hotel,Clothing Store,Cocktail Bar,Gastropub,Bakery,Italian Restaurant,Cosmetics Shop
8,Downtown Toronto,2,Coffee Shop,Café,American Restaurant,Steakhouse,Thai Restaurant,Cosmetics Shop,Restaurant,Hotel,Bar,Gym
9,Downtown Toronto,2,Coffee Shop,Hotel,Pizza Place,Aquarium,Café,Sports Bar,Italian Restaurant,Scenic Lookout,Brewery,Steakhouse
10,Downtown Toronto,2,Coffee Shop,Hotel,Café,American Restaurant,Restaurant,Gastropub,Gym,Sports Bar,Deli / Bodega,Italian Restaurant


### Clusters 4

In [92]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 3, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Downtown Toronto,3,Coffee Shop,Restaurant,Café,Pub,Bakery,Pizza Place,Park,Chinese Restaurant,Indian Restaurant,Italian Restaurant


### Clusters 5

In [93]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 4, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,4,Park,Trail,Playground,Deli / Bodega,Electronics Store,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Discount Store
6,Downtown Toronto,4,Coffee Shop,Cocktail Bar,Bakery,Seafood Restaurant,Beer Bar,Farmers Market,Cheese Shop,Café,Steakhouse,Restaurant
7,Downtown Toronto,4,Coffee Shop,Café,Italian Restaurant,Bar,Bubble Tea Shop,Burger Joint,Sandwich Place,Japanese Restaurant,Ice Cream Shop,Salad Place
11,Downtown Toronto,4,Coffee Shop,Café,Hotel,Restaurant,American Restaurant,Deli / Bodega,Gastropub,Steakhouse,Italian Restaurant,Seafood Restaurant
12,Downtown Toronto,4,Café,Coffee Shop,Theater,Bar,Japanese Restaurant,Restaurant,Bakery,Bookstore,Pub,Poutine Place
14,Downtown Toronto,4,Airport Terminal,Airport Lounge,Airport Service,Boat or Ferry,Harbor / Marina,Airport,Airport Food Court,Airport Gate,Plane,Boutique
15,Downtown Toronto,4,Coffee Shop,Café,Restaurant,Hotel,Seafood Restaurant,Cocktail Bar,Beer Bar,Creperie,Farmers Market,Pub
16,Downtown Toronto,4,Coffee Shop,Café,Hotel,Restaurant,Steakhouse,Gym,Deli / Bodega,American Restaurant,Gastropub,Bar
17,Downtown Toronto,4,Grocery Store,Café,Park,Athletics & Sports,Nightclub,Convenience Store,Restaurant,Diner,Baby Store,Italian Restaurant
