# Question 1 - Creating DataFrame from HTML table

In [21]:
import requests
import lxml.html as lh
import pandas as pd

url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = requests.get(url)
doc = lh.fromstring(page.content)
tr_elements = doc.xpath('//tr')

In [22]:
tr_elements = doc.xpath('//tr')

col=[]
i=0
for t in tr_elements[0]:
    i+=1
    name=t.text_content()
    col.append((name,[]))

In [23]:
ln=len(tr_elements[0])

for j in range(1,len(tr_elements)):
    #T is our j'th row
    T=tr_elements[j]
    
    if len(T)!=ln:
        break
    i=0
    
    for t in T.iterchildren():
        data=t.text_content() 
        if i>0:
            try:
                data=int(data)
            except:
                pass
        col[i][1].append(data)
        i+=1

In [24]:
Dict={title:column for (title,column) in col}
df=pd.DataFrame(Dict)
df.rename(columns={df.columns[0]:df.columns[0][:-1],df.columns[1]:df.columns[1][:-1],df.columns[2]:df.columns[2][:-1]},inplace=True)
df.rename(columns={'Neighbourhood':'Neighborhood'},inplace=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A\n,Not assigned\n,Not assigned\n
1,M2A\n,Not assigned\n,Not assigned\n
2,M3A\n,North York\n,Parkwoods\n
3,M4A\n,North York\n,Victoria Village\n
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"


## Data wrangling

In [25]:
df.replace('\n','',regex=True,inplace=True)

#Select only assigned borough
df=df[df['Borough']!='Not assigned']
df=df[:-1]

### Finding "Not assigned" Neighbourhood with "Not assigned" Borough

In [26]:
df[(df['Neighborhood']=='Not assigned')&(df['Borough']!='Not assigned')]

Unnamed: 0,Postal Code,Borough,Neighborhood


No values in dataframe with this characteristics are present
 
  

Creating lines for multiple Neighborhoods in Boroughs

In [27]:
df_adj=pd.DataFrame(columns=df.columns)
d=df.columns

for a,ind in zip(df['Neighborhood'],df.index):
    if a.find(',')>0:
        ls=[]
        ls=a.split(',')
        for l in ls:
            d=df_adj.columns
            df_adj=df_adj.append({d[0]:df['Postal Code'][ind],d[1]:df['Borough'][ind],d[2]:l},ignore_index=True)
        #df_tor=df_tor.drop([ind])
    else:
        df_adj=df_adj.append({d[0]:df['Postal Code'][ind],d[1]:df['Borough'][ind],d[2]:df['Neighborhood'][ind]},ignore_index=True)
        
df_adj

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park
3,M5A,Downtown Toronto,Harbourfront
4,M6A,North York,Lawrence Manor
...,...,...,...
212,M8Z,Etobicoke,Mimico NW
213,M8Z,Etobicoke,The Queensway West
214,M8Z,Etobicoke,South of Bloor
215,M8Z,Etobicoke,Kingsway Park South West


In [28]:
df_adj.shape

(217, 3)

# Question 2 - Retrieving Latitude and Longitude

Setting function that retrieves Latitude and Longitude

In [29]:
!pip install geocoder
import geocoder

def latlon(postal_code):
    lat_lng_coords = None

    while(lat_lng_coords is None):
      g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
      lat_lng_coords = g.latlng

    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]

    
    return latitude,longitude



In [30]:
df_adj['Latitude'],df_adj['Longitude']='',''
pos_cod=pd.DataFrame(df_adj['Postal Code'].unique())
pos_cod.rename(columns={0:'Postal Code'},inplace=True)
pos_cod

Unnamed: 0,Postal Code
0,M3A
1,M4A
2,M5A
3,M6A
4,M7A
...,...
98,M8X
99,M4Y
100,M7Y
101,M8Y


In [31]:
import sys
for i in df_adj.index:
    df_adj['Latitude'][i]=latlon(df_adj['Postal Code'][i])[0]
    df_adj['Longitude'][i]=latlon(df_adj['Postal Code'][i])[1]
        
    sys.stdout.write('\r'+'Done '+str(i)+' of '+str(df_adj.index.stop-1))
    sys.stdout.flush()
    
df_adj.head()

Done 216 of 217

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7525,-79.3299
1,M4A,North York,Victoria Village,43.7306,-79.3131
2,M5A,Downtown Toronto,Regent Park,43.6551,-79.3626
3,M5A,Downtown Toronto,Harbourfront,43.6551,-79.3626
4,M6A,North York,Lawrence Manor,43.7233,-79.4504


# Question 3 - Clusternig

## Filtering data for Toronto area

In [12]:
from sklearn.cluster import KMeans
df_tor=df_adj[df_adj['Borough'].str.contains('Toronto')].reset_index(drop=True)
df_tor=df_tor.iloc[1:,:]
df_tor

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
1,M5A,Downtown Toronto,Harbourfront,43.6551,-79.3626
2,M7A,Downtown Toronto,Queen's Park,43.6625,-79.3919
3,M7A,Downtown Toronto,Ontario Provincial Government,43.6625,-79.3919
4,M5B,Downtown Toronto,Garden District,43.6574,-79.378
5,M5B,Downtown Toronto,Ryerson,43.6574,-79.378
...,...,...,...,...,...
73,M5X,Downtown Toronto,First Canadian Place,43.6483,-79.3815
74,M5X,Downtown Toronto,Underground city,43.6483,-79.3815
75,M4Y,Downtown Toronto,Church and Wellesley,43.6666,-79.3813
76,M7Y,East Toronto,Business reply mail Processing Centre,43.6487,-79.3854


In [46]:
!pip install folium
import folium
map_toronto = folium.Map(location=[43.648690000000045, -79.38543999999996], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_adj['Latitude'], df_adj['Longitude'], df_adj['Borough'], df_adj['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto



### Setting Foursquare credentials and defining function for getting nearby venues

In [43]:
CLIENT_ID = 'AMQOC0PAD201KRERHODMBXYHTVPMF1UMLLO5UDZVUB4NWOPH' # your Foursquare ID
CLIENT_SECRET = 'HDTMQKXWJD3ISPTMZPT0KL1GUYYFT1ABL4GVVIXBWL0BDDX4' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 100


def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng, i in zip(names, latitudes, longitudes, range(1,len(names))):
        
        sys.stdout.write('\r'+'Getting nearby values for '+str(i)+' of '+str(len(names)-1))
        sys.stdout.flush()
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [44]:
toronto_venues = getNearbyVenues(names=df_tor['Neighborhood'],
                                   latitudes=df_tor['Latitude'],
                                   longitudes=df_tor['Longitude']
                                  )

Getting nearby values for 76 of 76

In [45]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Lawrence Park,3,3,3,3,3,3
Adelaide,100,100,100,100,100,100
Bathurst Quay,77,77,77,77,77,77
Cabbagetown,48,48,48,48,48,48
Chinatown,57,57,57,57,57,57
...,...,...,...,...,...,...
The Annex,21,21,21,21,21,21
The Beaches,4,4,4,4,4,4
The Danforth West,8,8,8,8,8,8
Toronto Dominion Centre,100,100,100,100,100,100


In [47]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))
toronto_venues.head()

There are 229 uniques categories.


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Harbourfront,43.65512,-79.36264,Roselle Desserts,43.653447,-79.362017,Bakery
1,Harbourfront,43.65512,-79.36264,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,Harbourfront,43.65512,-79.36264,Figs Breakfast & Lunch,43.655675,-79.364503,Breakfast Spot
3,Harbourfront,43.65512,-79.36264,The Yoga Lounge,43.655515,-79.364955,Yoga Studio
4,Harbourfront,43.65512,-79.36264,Body Blitz Spa East,43.654735,-79.359874,Spa


## Analyzing each Neighborhood

In [54]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] =toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Afghan Restaurant,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [55]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,Lawrence Park,0.000000,0.0,0.0,0.00,0.0,0.0,0.000000,0.0,0.000000,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00
1,Adelaide,0.000000,0.0,0.0,0.03,0.0,0.0,0.010000,0.0,0.000000,...,0.00,0.000000,0.010000,0.000000,0.000000,0.000000,0.010000,0.0,0.000000,0.01
2,Bathurst Quay,0.012987,0.0,0.0,0.00,0.0,0.0,0.000000,0.0,0.000000,...,0.00,0.012987,0.000000,0.012987,0.000000,0.000000,0.000000,0.0,0.000000,0.00
3,Cabbagetown,0.000000,0.0,0.0,0.00,0.0,0.0,0.000000,0.0,0.000000,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00
4,Chinatown,0.000000,0.0,0.0,0.00,0.0,0.0,0.017544,0.0,0.017544,...,0.00,0.000000,0.052632,0.000000,0.000000,0.035088,0.017544,0.0,0.000000,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69,The Annex,0.000000,0.0,0.0,0.00,0.0,0.0,0.000000,0.0,0.000000,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00
70,The Beaches,0.000000,0.0,0.0,0.00,0.0,0.0,0.000000,0.0,0.000000,...,0.25,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00
71,The Danforth West,0.000000,0.0,0.0,0.00,0.0,0.0,0.000000,0.0,0.000000,...,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.00
72,Toronto Dominion Centre,0.000000,0.0,0.0,0.03,0.0,0.0,0.010000,0.0,0.000000,...,0.00,0.010000,0.010000,0.000000,0.000000,0.000000,0.010000,0.0,0.000000,0.00


In [56]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----  Lawrence Park----
                venue  freq
0          Playground  0.33
1            Gym Pool  0.33
2                Park  0.33
3        Noodle House  0.00
4  Miscellaneous Shop  0.00


---- Adelaide----
                 venue  freq
0                Hotel  0.07
1          Coffee Shop  0.06
2                 Café  0.06
3                  Gym  0.05
4  Japanese Restaurant  0.04


---- Bathurst Quay----
                venue  freq
0  Italian Restaurant  0.08
1         Coffee Shop  0.06
2                Café  0.06
3   French Restaurant  0.04
4                 Bar  0.04


---- Cabbagetown----
                venue  freq
0         Coffee Shop  0.08
1         Pizza Place  0.06
2          Restaurant  0.06
3                Café  0.06
4  Italian Restaurant  0.04


---- Chinatown----
                           venue  freq
0                           Café  0.07
1  Vegetarian / Vegan Restaurant  0.05
2                    Coffee Shop  0.05
3                            Bar  0.05
4             

                 venue  freq
0          Coffee Shop  0.12
1                Hotel  0.07
2                 Café  0.06
3           Restaurant  0.05
4  American Restaurant  0.04


----Forest Hill North & West----
                venue  freq
0   French Restaurant   0.5
1                Park   0.5
2         Yoga Studio   0.0
3        Noodle House   0.0
4  Miscellaneous Shop   0.0


----Garden District----
                       venue  freq
0                Coffee Shop  0.12
1             Clothing Store  0.07
2                       Café  0.04
3  Middle Eastern Restaurant  0.03
4        Japanese Restaurant  0.03


----Harbourfront East----
                 venue  freq
0          Coffee Shop  0.12
1                Hotel  0.07
2  Japanese Restaurant  0.05
3           Restaurant  0.05
4                Plaza  0.05


----High Park----
                                      venue  freq
0                            Sandwich Place  0.25
1                         Convenience Store  0.25
2  Residential 

In [57]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [58]:
import numpy as np
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Lawrence Park,Playground,Gym Pool,Park,Dumpling Restaurant,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
1,Adelaide,Hotel,Coffee Shop,Café,Gym,Japanese Restaurant,Asian Restaurant,Salad Place,Steakhouse,American Restaurant,Restaurant
2,Bathurst Quay,Italian Restaurant,Café,Coffee Shop,French Restaurant,Park,Bar,Lounge,Restaurant,Speakeasy,Bakery
3,Cabbagetown,Coffee Shop,Café,Restaurant,Pizza Place,Park,Chinese Restaurant,Italian Restaurant,Pub,Market,Bakery
4,Chinatown,Café,Mexican Restaurant,Coffee Shop,Vegetarian / Vegan Restaurant,Bar,Grocery Store,Thai Restaurant,Park,Dumpling Restaurant,Pizza Place


## Cluster Neighborhood

In [59]:
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 3, 3, 3, 3, 2, 3, 3, 3, 0], dtype=int32)

In [70]:
# add clustering labels
#neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_tor

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head()

0     1
1     3
2     3
3     3
4     3
     ..
69    3
70    3
71    3
72    3
73    3
Name: Cluster Labels, Length: 74, dtype: int32

In [63]:
from geopy.geocoders import Nominatim
address = 'Toronto, CN'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6425012, -79.3871153.


In [81]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
toronto_merged['Cluster Labels']=toronto_merged['Cluster Labels'].fillna(0).astype(int)

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters



## Examine clusters

In [82]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
34,Central Toronto,0,,,,,,,,,,
36,Central Toronto,0,Park,French Restaurant,Women's Store,Dumpling Restaurant,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
37,Central Toronto,0,Park,French Restaurant,Women's Store,Dumpling Restaurant,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
77,East Toronto,0,,,,,,,,,,


In [83]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
40,Central Toronto,1,Playground,Gym Pool,Park,Dumpling Restaurant,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
41,Central Toronto,1,Playground,Gym Pool,Park,Dumpling Restaurant,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
52,Central Toronto,1,Playground,Gym,Trail,Donut Shop,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
53,Central Toronto,1,Playground,Gym,Trail,Donut Shop,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
69,Downtown Toronto,1,Tennis Court,Playground,Park,Bike Trail,Shop & Service,Dumpling Restaurant,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant


In [84]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
57,Central Toronto,2,Light Rail Station,Coffee Shop,Liquor Store,Supermarket,Women's Store,Electronics Store,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm
58,Central Toronto,2,Light Rail Station,Coffee Shop,Liquor Store,Supermarket,Women's Store,Electronics Store,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm
59,Central Toronto,2,Light Rail Station,Coffee Shop,Liquor Store,Supermarket,Women's Store,Electronics Store,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm
60,Central Toronto,2,Light Rail Station,Coffee Shop,Liquor Store,Supermarket,Women's Store,Electronics Store,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm
61,Central Toronto,2,Light Rail Station,Coffee Shop,Liquor Store,Supermarket,Women's Store,Electronics Store,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm


In [85]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Downtown Toronto,3,Coffee Shop,Breakfast Spot,Theater,Pub,Distribution Center,Restaurant,Electronics Store,Event Space,Spa,Food Truck
2,Downtown Toronto,3,Coffee Shop,Sandwich Place,Bank,Theater,Café,Falafel Restaurant,Fried Chicken Joint,Burrito Place,Italian Restaurant,Gastropub
3,Downtown Toronto,3,Coffee Shop,Sandwich Place,Bank,Theater,Café,Falafel Restaurant,Fried Chicken Joint,Burrito Place,Italian Restaurant,Gastropub
4,Downtown Toronto,3,Coffee Shop,Clothing Store,Café,Japanese Restaurant,Cosmetics Shop,Middle Eastern Restaurant,Ramen Restaurant,Theater,Italian Restaurant,Movie Theater
5,Downtown Toronto,3,Coffee Shop,Clothing Store,Café,Japanese Restaurant,Cosmetics Shop,Middle Eastern Restaurant,Ramen Restaurant,Theater,Italian Restaurant,Movie Theater
...,...,...,...,...,...,...,...,...,...,...,...,...
72,Downtown Toronto,3,Coffee Shop,Café,Restaurant,Pizza Place,Park,Chinese Restaurant,Italian Restaurant,Pub,Market,Bakery
73,Downtown Toronto,3,Coffee Shop,Hotel,Café,Restaurant,American Restaurant,Gym,Seafood Restaurant,Asian Restaurant,Japanese Restaurant,Deli / Bodega
74,Downtown Toronto,3,Coffee Shop,Hotel,Café,Restaurant,American Restaurant,Gym,Seafood Restaurant,Asian Restaurant,Japanese Restaurant,Deli / Bodega
75,Downtown Toronto,3,Coffee Shop,Gay Bar,Japanese Restaurant,Restaurant,Café,Sushi Restaurant,Mediterranean Restaurant,Dance Studio,Pub,Men's Store


In [86]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
38,West Toronto,4,Convenience Store,Park,Residential Building (Apartment / Condo),Sandwich Place,Donut Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
39,West Toronto,4,Convenience Store,Park,Residential Building (Apartment / Condo),Sandwich Place,Donut Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Event Space
