In [1]:
import pandas as pd
import numpy as np
import requests
import json
from bs4 import BeautifulSoup
import urllib.request
import itertools
 
from geopy.geocoders import Nominatim
print("Packages ready!")

Packages ready!


# 1. Get the data from Wikipedia and format it to a Dataframe

The Postalcode data on Wikipedia has changed during making the course, so I used the old table which contains the old formatting of the course. But the content in the table changed a bit to the example of the course.

In [2]:
# Get the wikipedia page with Toronto postal codes
url = "https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=945633050"
response = requests.get(url)
response

<Response [200]>

In [3]:
soup = BeautifulSoup(response.text, "lxml")

## Get the data out of the table and convert the data in the right format to a Dataframe. 

Initialize variables

In [14]:
df = {}
df["Postcode"] = []
df["Borough"] = []
df["Neighborhood"] = []

Get the right tags from the Wikipedia page

In [7]:
tr = soup.find("tbody")
data = tr.find_all("tr")

Create Dataframe with Postcode, Borough and Neighborhood

In [8]:
for row in data:
    cols=row.find_all('td')
    cols=[x.text.strip() for x in cols]
    if not cols :
        continue 
    df["Postcode"].extend([cols[0]])
    df["Borough"].append(cols[1])
    df["Neighborhood"].append(cols[2])
p = pd.DataFrame({"Postcode": df["Postcode"], "Borough":df["Borough"], "Neighborhood": df["Neighborhood"]})
p.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


Clean the resulting Dataframe. Remove rows with "not assigned"

In [16]:
p["Borough"].replace("Not assigned", np.nan, inplace=True)
p.dropna(subset=["Borough"], axis=0, inplace=True)
p.reset_index(drop=True)
p.head()

Unnamed: 0,Postcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


Get rows with duplicate postcodes. Create Dataframe where duplicates are eliminated.

In [10]:
duplicateRowsDF = p[p.duplicated(['Postcode'], keep=False)]
elim_duplicate = p.drop_duplicates(subset="Postcode", keep=False, inplace=False)

Merge the duplicate rows. The rows will be combined into one row with the neighborhoods separated with a comma

In [11]:
merge_duplicate = duplicateRowsDF.groupby(["Postcode", "Borough"])["Neighborhood"].apply(','.join).reset_index()

Combine the Dataframe without the duplicates and the Dataframe with merged duplicates

In [17]:
mergedStuff = [elim_duplicate, merge_duplicate]
result = pd.concat(mergedStuff)
result.sort_values("Borough", ascending=True, inplace=True)
result.reset_index(drop=True, inplace=True)
print("The resulting dataframe for Question 1")
result.head()

The resulting dataframe for Question 1


Unnamed: 0,Postcode,Borough,Neighborhood
0,M4N,Central Toronto,Lawrence Park
1,M4R,Central Toronto,North Toronto West
2,M4P,Central Toronto,Davisville North
3,M5P,Central Toronto,"Forest Hill North,Forest Hill West"
4,M5N,Central Toronto,Roselawn


In [13]:
result.shape

(103, 3)

# 2. Get Latitude and Longitude, combine it with created Dataframe

In [18]:
import geocoder

### The geocoder package couldn't return valid data

In [None]:
lat_lng_coords = None
# loop until you get the coordinates
while(lat_lng_coords is None):
    print("next try")
    g = geocoder.google('{}, Toronto, Ontario'.format("M6A"))
    lat_lng_coords = g.latlng
    print(lat_lng_coords)

latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]
print(latitude, longitude)

### Using of the provided csv file to get the latitude, longitude values

In [20]:
geo = pd.read_csv("Geospatial_Coordinates.csv")
geo.rename(columns={"Postal Code": "Postcode"}, inplace=True)
geo.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Merge the Dataframe with postalcodes with the Dataframe of latitude, longitude

In [21]:
mergelatlon = pd.merge(result, geo, on=["Postcode"], how="inner")
mergelatlon.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
1,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
2,M4P,Central Toronto,Davisville North,43.712751,-79.390197
3,M5P,Central Toronto,"Forest Hill North,Forest Hill West",43.696948,-79.411307
4,M5N,Central Toronto,Roselawn,43.711695,-79.416936


# 3. Cluster and explore the neighborhoods in Toronto. Only the Boroughs with "Toronto" within will be examined

In [23]:
from sklearn.cluster import KMeans
import folium

### Get the Boroughs that contain "Toronto" in a new Dataframe

In [25]:
toronto_data = mergelatlon[mergelatlon['Borough'].str.contains("Toronto")].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
1,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
2,M4P,Central Toronto,Davisville North,43.712751,-79.390197
3,M5P,Central Toronto,"Forest Hill North,Forest Hill West",43.696948,-79.411307
4,M5N,Central Toronto,Roselawn,43.711695,-79.416936


### Get latitude, longitude of Toronto

In [26]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="toronto")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 43.6534817, -79.3839347.


In [27]:
# add markers to map
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## Set up Foursquare and retrieve data

### Get near venues for all latitude, longitude values in Toronto with Foursquare

In [29]:
def get_near_venues(names, latitude, longitude, radius=500, limit=100):
    
    venues_list = []
    for name, lat, lon in zip(names, latitude, longitude):
        print(name)
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        lat, 
        lon, 
        radius, 
        limit)
    
        results = requests.get(url).json()["response"]["groups"][0]["items"]
  
        venues_list.append([(name, 
                       lat,
                       lon,
                       v["venue"]["name"],
                       v["venue"]["location"]["lat"],
                       v["venue"]["location"]["lng"],
                       v["venue"]["categories"][0]["name"]) for v in results])
    
    data = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    data.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']

    return(data)

In [30]:
toronto_venue = get_near_venues(names=toronto_data["Neighborhood"], latitude=toronto_data["Latitude"], longitude=toronto_data["Longitude"])

Lawrence Park
North Toronto West
Davisville North
Forest Hill North,Forest Hill West
Roselawn
The Annex,North Midtown,Yorkville
Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West
Moore Park,Summerhill East
Davisville
CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara
Queen's Park
Rosedale
Harbord,University of Toronto
Chinatown,Grange Park,Kensington Market
St. James Town
First Canadian Place,Underground city
Adelaide,King,Richmond
Stn A PO Boxes 25 The Esplanade
Church and Wellesley
Commerce Court,Victoria Hotel
Ryerson,Garden District
Christie
Central Bay Street
Cabbagetown,St. James Town
Harbourfront East,Toronto Islands,Union Station
Berczy Park
Design Exchange,Toronto Dominion Centre
Harbourfront
Business Reply Mail Processing Centre 969 Eastern
Studio District
The Beaches West,India Bazaar
The Danforth West,Riverdale
The Beaches
Dovercourt Village,Dufferin
Little Portugal,Trinity
Parkdale,Roncesvalles
High Park,The Junc

In [31]:
toronto_venue.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Lawrence Park,43.72802,-79.38879,Lawrence Park Ravine,43.726963,-79.394382,Park
1,Lawrence Park,43.72802,-79.38879,Averax Group,43.727406,-79.383103,Construction & Landscaping
2,Lawrence Park,43.72802,-79.38879,Zodiac Swim School,43.728532,-79.38286,Swim School
3,Lawrence Park,43.72802,-79.38879,TTC Bus #162 - Lawrence-Donway,43.728026,-79.382805,Bus Line
4,North Toronto West,43.715383,-79.405678,Barreworks,43.71407,-79.400109,Yoga Studio


### One hot encode venues for Toronto, such that KMeans Clustering can be done

In [32]:
toronto_onehot = pd.get_dummies(toronto_venue['Venue Category'], prefix="", prefix_sep="")
toronto_onehot['Neighborhood2'] = toronto_venue['Neighborhood']
fixed = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot =  toronto_onehot[fixed]
toronto_onehot.head()

Unnamed: 0,Neighborhood2,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,North Toronto West,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


### Get the mean of the venues for all the Neighborhoods

In [33]:
grouped =toronto_onehot.groupby('Neighborhood2').mean().reset_index()
grouped.head()

Unnamed: 0,Neighborhood2,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,"Adelaide,King,Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,...,0.0,0.0,0.0,0.010638,0.0,0.0,0.0,0.0,0.010638,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0
2,"Brockton,Exhibition Place,Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556
4,"CN Tower,Bathurst Quay,Island airport,Harbourf...",0.058824,0.058824,0.058824,0.117647,0.176471,0.117647,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Drop Neighborhood axis with categorical values

In [34]:
kmeans_data = grouped.drop('Neighborhood2', axis=1)
kmeans_data.head()

Unnamed: 0,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Women's Store,Yoga Studio
0,0.0,0.0,0.0,0.0,0.0,0.0,0.021277,0.0,0.0,0.010638,...,0.0,0.0,0.0,0.010638,0.0,0.0,0.0,0.0,0.010638,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017544,...,0.0,0.0,0.0,0.017544,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556
4,0.058824,0.058824,0.058824,0.117647,0.176471,0.117647,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Cluster with KMeans and 5 different cluster

In [35]:
kclusters = 5
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(kmeans_data)
kmeans.labels_[0:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [36]:
grouped["Neighborhood2"]=grouped["Neighborhood2"].astype(str)

### Get the top5 venues for all Neighborhoods

In [None]:
num_top_venues = 5

for hood in grouped['Neighborhood2']:
    print("----"+hood+"----")
    temp = grouped[grouped['Neighborhood2'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

### Create Dataframe with top 10 venues for all Neighborhoods

In [38]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [39]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood2']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood2'] = grouped['Neighborhood2']

for ind in np.arange(grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood2,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond",Coffee Shop,Café,Restaurant,Gym,Deli / Bodega,Clothing Store,Thai Restaurant,Hotel,Concert Hall,Bakery
1,Berczy Park,Coffee Shop,Cocktail Bar,Café,Cheese Shop,Bakery,Beer Bar,Restaurant,Seafood Restaurant,Clothing Store,Beach
2,"Brockton,Exhibition Place,Parkdale Village",Café,Breakfast Spot,Coffee Shop,Nightclub,Burrito Place,Restaurant,Italian Restaurant,Stadium,Intersection,Bar
3,Business Reply Mail Processing Centre 969 Eastern,Yoga Studio,Pizza Place,Spa,Fast Food Restaurant,Auto Workshop,Farmers Market,Burrito Place,Smoke Shop,Recording Studio,Garden
4,"CN Tower,Bathurst Quay,Island airport,Harbourf...",Airport Service,Airport Lounge,Airport Terminal,Airport,Bar,Plane,Rental Car Location,Sculpture Garden,Boat or Ferry,Boutique


### Insert Cluster Labels

In [40]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
toronto_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood2'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,3,Park,Construction & Landscaping,Bus Line,Swim School,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
1,M4R,Central Toronto,North Toronto West,43.715383,-79.405678,0,Clothing Store,Coffee Shop,Yoga Studio,Rental Car Location,Restaurant,Sporting Goods Shop,Chinese Restaurant,Diner,Spa,Mexican Restaurant
2,M4P,Central Toronto,Davisville North,43.712751,-79.390197,0,Gym,Hotel,Breakfast Spot,Food & Drink Shop,Sandwich Place,Dog Run,Department Store,Park,Cosmetics Shop,Distribution Center
3,M5P,Central Toronto,"Forest Hill North,Forest Hill West",43.696948,-79.411307,3,Park,Jewelry Store,Trail,Bus Line,Sushi Restaurant,Yoga Studio,Dessert Shop,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
4,M5N,Central Toronto,Roselawn,43.711695,-79.416936,2,Garden,Yoga Studio,Department Store,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant


### Visualize the clusters with folium

In [41]:
import matplotlib.cm as cm
import matplotlib.colors as colors


In [42]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Analyze the resulting cluster

In [43]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Central Toronto,0,Clothing Store,Coffee Shop,Yoga Studio,Rental Car Location,Restaurant,Sporting Goods Shop,Chinese Restaurant,Diner,Spa,Mexican Restaurant
2,Central Toronto,0,Gym,Hotel,Breakfast Spot,Food & Drink Shop,Sandwich Place,Dog Run,Department Store,Park,Cosmetics Shop,Distribution Center
5,Central Toronto,0,Sandwich Place,Café,Coffee Shop,BBQ Joint,Donut Shop,Burger Joint,Middle Eastern Restaurant,Indian Restaurant,Pub,History Museum
6,Central Toronto,0,Coffee Shop,Pub,Health & Beauty Service,Liquor Store,Restaurant,Sports Bar,Bank,Bagel Shop,Supermarket,Fried Chicken Joint
8,Central Toronto,0,Dessert Shop,Pizza Place,Sandwich Place,Gym,Sushi Restaurant,Italian Restaurant,Café,Coffee Shop,Japanese Restaurant,American Restaurant
9,Downtown Toronto,0,Airport Service,Airport Lounge,Airport Terminal,Airport,Bar,Plane,Rental Car Location,Sculpture Garden,Boat or Ferry,Boutique
10,Downtown Toronto,0,Coffee Shop,Sushi Restaurant,Diner,Yoga Studio,Mexican Restaurant,Juice Bar,Japanese Restaurant,Italian Restaurant,Hobby Shop,Fried Chicken Joint
12,Downtown Toronto,0,Café,Bar,Italian Restaurant,Japanese Restaurant,Bookstore,Restaurant,Bakery,Yoga Studio,Pub,Beer Bar
13,Downtown Toronto,0,Café,Coffee Shop,Vietnamese Restaurant,Mexican Restaurant,Bakery,Dessert Shop,Vegetarian / Vegan Restaurant,Gaming Cafe,Bar,Juice Bar
14,Downtown Toronto,0,Coffee Shop,Café,Gastropub,American Restaurant,Cocktail Bar,Seafood Restaurant,Hotel,Italian Restaurant,Lingerie Store,Department Store


In [44]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Downtown Toronto,1,Park,Playground,Trail,Yoga Studio,Department Store,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop


In [45]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Central Toronto,2,Garden,Yoga Studio,Department Store,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant


In [46]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,3,Park,Construction & Landscaping,Bus Line,Swim School,Yoga Studio,Dessert Shop,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
3,Central Toronto,3,Park,Jewelry Store,Trail,Bus Line,Sushi Restaurant,Yoga Studio,Dessert Shop,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant


In [47]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Central Toronto,4,Park,Yoga Studio,Department Store,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Donut Shop,Doner Restaurant
