<h3> Cluster Neighborhoods in Toronto City </h3>

**First, the code below scrapes Wiki page to obtain data**

In [2]:
###scraping data 
##https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M
### Three columns: PostalCode, Borough, Neighborhood

#ignore cells that Borough is Not Assigned
#cell with Borough, but not assigned Neighborhood -- Neigh==Borough
# import k-means from clustering stage

from sklearn.cluster import KMeans
from bs4 import BeautifulSoup
import requests
import pandas as pd

import numpy as np
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import folium # map rendering library

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [1]:


page_link="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

page_response = requests.get(page_link, timeout=5)
soup = BeautifulSoup(page_response.content, "html.parser")

My_table = soup.find("table",{"class":"wikitable sortable"})


links=My_table.find_all('tr')

PostalCode=[]
Borough=[]
Neighborhood=[]

index=1;
for row in links:
    if index!=1:
        PostalCode.append(row.find('td').text)
        Borough.append(row.find('td').find_next('td').text)
        Neighborhood.append(row.find('td').find_next('td').find_next("td").text.replace('\n',' '))
    else:
        index+=1

**Build Dataframe**

In [17]:


df=pd.DataFrame()
df["PostalCode"]=PostalCode
df["Borough"]=Borough
df["Neighborhood"]=Neighborhood

### drop rows with Borough N.A.
df=df[df["Borough"] != "Not assigned"]

### Neighborhood == Borough if unasigned
index=df[df["Neighborhood"] == "Not assigned "].index
df["Neighborhood"][index]=df["Borough"][index]

### Combine rows with duplicate Postal Codes

output_series =df.groupby(["PostalCode", "Borough"])["Neighborhood"].apply(', '.join).reset_index()
df_o=pd.DataFrame(output_series)
df_o

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge , Malvern"
1,M1C,Scarborough,"Highland Creek , Rouge Hill , Port Union"
2,M1E,Scarborough,"Guildwood , Morningside , West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park , Ionview , Kennedy Park"
7,M1L,Scarborough,"Clairlea , Golden Mile , Oakridge"
8,M1M,Scarborough,"Cliffcrest , Cliffside , Scarborough Village W..."
9,M1N,Scarborough,"Birch Cliff , Cliffside West"


In [40]:
###NUMBER OF ROWS IN THE DATAFRAME

df_o.shape

(103, 3)

**read coordinate CSV and update dataframe **

In [18]:
geodata = pd.read_csv('Geospatial_Coordinates.csv')
#display(geodata)
df_o["Latitude"]= geodata.iloc[:,1].values
df_o["Longitude"]= geodata.iloc[:,2].values
df_o

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge , Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek , Rouge Hill , Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood , Morningside , West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park , Ionview , Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea , Golden Mile , Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest , Cliffside , Scarborough Village W...",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff , Cliffside West",43.692657,-79.264848


**Last part: Data visualization: Explore and Cluster Neighborhoods**

In [19]:
### CREATE FOLIUM MAP OF TORONTO WITH MARKERS FOR ALL THE BOROUGHS 

address = 'Toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

#add markers to map
for lat, lng, label in zip(df_o['Latitude'], df_o['Longitude'], df_o['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

*Pull in Foursquare credentials*

In [47]:
CLIENT_ID = 'ORY0V4OVKEXCJE5VVHVKGNDUUC5IDRP2SWJS3025PIUCQHLW' # your Foursquare ID
CLIENT_SECRET = 'V10OEE24NR5QGJWYG3U3LXV5LUOGNA4ZQGTZSXIHHHKFRV0Y' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

### DEFINE FIRST BOROUGH TO SEARCH THROUGH

#scarborough_data = df_o[df_o['Borough'] == 'Scarborough'].reset_index(drop=True)

scarborough_data = df_o.reset_index(drop=True)

display(scarborough_data)

Your credentails:
CLIENT_ID: ORY0V4OVKEXCJE5VVHVKGNDUUC5IDRP2SWJS3025PIUCQHLW
CLIENT_SECRET:V10OEE24NR5QGJWYG3U3LXV5LUOGNA4ZQGTZSXIHHHKFRV0Y


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge , Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek , Rouge Hill , Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood , Morningside , West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park , Ionview , Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea , Golden Mile , Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest , Cliffside , Scarborough Village W...",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff , Cliffside West",43.692657,-79.264848


In [7]:
### Define function to search for venues

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [48]:
LIMIT = 100
radius = 500

scarborough_venues = getNearbyVenues(names=scarborough_data['Neighborhood'],
                                   latitudes=scarborough_data['Latitude'],
                                   longitudes=scarborough_data['Longitude']
                                  )

Rouge , Malvern 
Highland Creek , Rouge Hill , Port Union 
Guildwood , Morningside , West Hill 
Woburn 
Cedarbrae 
Scarborough Village 
East Birchmount Park , Ionview , Kennedy Park 
Clairlea , Golden Mile , Oakridge 
Cliffcrest , Cliffside , Scarborough Village West 
Birch Cliff , Cliffside West 
Dorset Park , Scarborough Town Centre , Wexford Heights 
Maryvale , Wexford 
Agincourt 
Clarks Corners , Sullivan , Tam O'Shanter 
Agincourt North , L'Amoreaux East , Milliken , Steeles East 
L'Amoreaux West 
Upper Rouge 
Hillcrest Village 
Fairview , Henry Farm , Oriole 
Bayview Village 
Silver Hills , York Mills 
Newtonbrook , Willowdale 
Willowdale South 
York Mills West 
Willowdale West 
Parkwoods 
Don Mills North 
Flemingdon Park , Don Mills South 
Bathurst Manor , Downsview North , Wilson Heights 
Northwood Park , York University 
CFB Toronto , Downsview East 
Downsview West 
Downsview Central 
Downsview Northwest 
Victoria Village 
Woodbine Gardens , Parkview Hill 
Woodbine Heights 
Th

**Observations**

In [49]:
print(scarborough_venues.shape)
scarborough_venues.head()

### HOW MANY VENUES PER BOROUGH

scarborough_venues.groupby("Neighborhood").count()


(2254, 7)


Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide , King , Richmond",100,100,100,100,100,100
Agincourt,4,4,4,4,4,4
"Agincourt North , L'Amoreaux East , Milliken , Steeles East",3,3,3,3,3,3
"Albion Gardens , Beaumond Heights , Humbergate , Jamestown , Mount Olive , Silverstone , South Steeles , Thistletown",9,9,9,9,9,9
"Alderwood , Long Branch",10,10,10,10,10,10
"Bathurst Manor , Downsview North , Wilson Heights",19,19,19,19,19,19
Bayview Village,4,4,4,4,4,4
"Bedford Park , Lawrence Manor East",22,22,22,22,22,22
Berczy Park,55,55,55,55,55,55
"Birch Cliff , Cliffside West",4,4,4,4,4,4


In [50]:
## How many unique catagories can be curated from all returned values
print('There are {} uniques categories.'.format(len(scarborough_venues['Venue Category'].unique())))

There are 277 uniques categories.


**Analyze Each Neighborhood**

In [51]:
# one hot encoding
scarborough_onehot = pd.get_dummies(scarborough_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
scarborough_onehot['Neighborhood'] = scarborough_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [scarborough_onehot.columns[-1]] + list(scarborough_onehot.columns[:-1])
scarborough_onehot = scarborough_onehot[fixed_columns]

#display(scarborough_onehot)

scarborough_grouped = scarborough_onehot.groupby('Neighborhood').mean().reset_index()
#scarborough_grouped

In [52]:
### PRINT EACH NEIGHBORHOOD WITH TOP 5 MOST COMMON VENUES

num_top_venues = 5

for hood in scarborough_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = scarborough_grouped[scarborough_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide , King , Richmond ----
             venue  freq
0      Coffee Shop  0.08
1             Café  0.05
2              Bar  0.04
3       Steakhouse  0.04
4  Thai Restaurant  0.03


----Agincourt ----
            venue  freq
0  Clothing Store  0.25
1  Breakfast Spot  0.25
2          Lounge  0.25
3    Skating Rink  0.25
4     Yoga Studio  0.00


----Agincourt North , L'Amoreaux East , Milliken , Steeles East ----
                 venue  freq
0                 Park  0.67
1           Playground  0.33
2          Yoga Studio  0.00
3   Mexican Restaurant  0.00
4  Monument / Landmark  0.00


----Albion Gardens , Beaumond Heights , Humbergate , Jamestown , Mount Olive , Silverstone , South Steeles , Thistletown ----
                  venue  freq
0         Grocery Store  0.22
1           Pizza Place  0.11
2              Pharmacy  0.11
3  Fast Food Restaurant  0.11
4        Sandwich Place  0.11


----Alderwood , Long Branch ----
            venue  freq
0     Pizza Place   0.2
1        Phar

           venue  freq
0  Grocery Store  0.33
1  Shopping Mall  0.17
2           Bank  0.17
3           Park  0.17
4          Hotel  0.17


----East Birchmount Park , Ionview , Kennedy Park ----
              venue  freq
0    Discount Store  0.33
1       Coffee Shop  0.17
2        Hobby Shop  0.17
3  Department Store  0.17
4       Bus Station  0.17


----East Toronto ----
                       venue  freq
0                       Park  0.67
1          Convenience Store  0.33
2                Yoga Studio  0.00
3  Middle Eastern Restaurant  0.00
4                      Motel  0.00


----Emery , Humberlea ----
                             venue  freq
0                   Baseball Field   1.0
1                      Yoga Studio   0.0
2        Middle Eastern Restaurant   0.0
3              Monument / Landmark   0.0
4  Molecular Gastronomy Restaurant   0.0


----Fairview , Henry Farm , Oriole ----
                  venue  freq
0        Clothing Store  0.15
1  Fast Food Restaurant  0.08
2       

                venue  freq
0                Café  0.09
1    Sushi Restaurant  0.09
2         Coffee Shop  0.06
3  Italian Restaurant  0.06
4                 Bar  0.03


----Ryerson , Garden District ----
                       venue  freq
0             Clothing Store  0.08
1                Coffee Shop  0.08
2  Middle Eastern Restaurant  0.03
3                       Café  0.03
4             Cosmetics Shop  0.03


----Scarborough Village ----
                       venue  freq
0                 Playground   0.5
1          Convenience Store   0.5
2                Yoga Studio   0.0
3  Middle Eastern Restaurant   0.0
4                      Motel   0.0


----St. James Town ----
                venue  freq
0         Coffee Shop  0.07
1                Café  0.06
2          Restaurant  0.05
3               Hotel  0.05
4  Italian Restaurant  0.04


----Stn A PO Boxes 25 The Esplanade ----
                venue  freq
0         Coffee Shop  0.10
1                Café  0.04
2          Restaurant  

**LETS MAKE A PANDAS DATAFRAME**

First, let's write a function to sort the venues in descending order.

In [53]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

And, cluster neighborhoods: Run k-means to cluster neighborhoods into five sorts

In [115]:

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = scarborough_grouped['Neighborhood']

for ind in np.arange(scarborough_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(scarborough_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()


# set number of clusters
kclusters = 6

### drop the neighborhood column which cannot be clustered
scarborough_grouped_clustering = scarborough_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(scarborough_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

scarborough_merged = scarborough_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
scarborough_merged = scarborough_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
scarborough_merged.dropna(inplace=True)

display(scarborough_merged)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge , Malvern",43.806686,-79.194353,2.0,Fast Food Restaurant,Women's Store,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store
1,M1C,Scarborough,"Highland Creek , Rouge Hill , Port Union",43.784535,-79.160497,0.0,Bar,Women's Store,Electronics Store,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Empanada Restaurant,Discount Store
2,M1E,Scarborough,"Guildwood , Morningside , West Hill",43.763573,-79.188711,2.0,Rental Car Location,Intersection,Pizza Place,Spa,Breakfast Spot,Electronics Store,Mexican Restaurant,Medical Center,Women's Store,Drugstore
3,M1G,Scarborough,Woburn,43.770992,-79.216917,2.0,Coffee Shop,Korean Restaurant,Indian Restaurant,Electronics Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,2.0,Hakka Restaurant,Thai Restaurant,Athletics & Sports,Caribbean Restaurant,Bakery,Bank,Fried Chicken Joint,Empanada Restaurant,Electronics Store,Eastern European Restaurant
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,1.0,Playground,Convenience Store,Women's Store,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
6,M1K,Scarborough,"East Birchmount Park , Ionview , Kennedy Park",43.727929,-79.262029,2.0,Discount Store,Department Store,Hobby Shop,Bus Station,Coffee Shop,Dumpling Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore
7,M1L,Scarborough,"Clairlea , Golden Mile , Oakridge",43.711112,-79.284577,2.0,Bus Line,Bakery,Soccer Field,Intersection,Bus Station,Metro Station,Fast Food Restaurant,Park,Gift Shop,General Travel
8,M1M,Scarborough,"Cliffcrest , Cliffside , Scarborough Village W...",43.716316,-79.239476,2.0,Motel,American Restaurant,Women's Store,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant
9,M1N,Scarborough,"Birch Cliff , Cliffside West",43.692657,-79.264848,2.0,College Stadium,Skating Rink,General Entertainment,Café,Event Space,Falafel Restaurant,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Diner


**Let's visualize the clusters~**

In [116]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(scarborough_merged['Latitude'], scarborough_merged['Longitude'], scarborough_merged['Neighborhood'], scarborough_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Now that we have clustered the neighborhoods, let's determine the discriminating venue categories that distinguish each cluster. This way, we can define some name to each cluster.

***Cluster 0***  Bars first!

In [117]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 0, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,0.0,Bar,Women's Store,Electronics Store,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Empanada Restaurant,Discount Store


***Cluster 1***  NOT SURE HOW TO TITLE IT

In [118]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 1, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Scarborough,1.0,Playground,Convenience Store,Women's Store,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
80,York,1.0,Restaurant,Discount Store,Check Cashing Service,Sandwich Place,Convenience Store,Drugstore,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant


***Cluster 2***  Lots of different kinds of restaurants!

In [119]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 2, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,2.0,Fast Food Restaurant,Women's Store,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store
2,Scarborough,2.0,Rental Car Location,Intersection,Pizza Place,Spa,Breakfast Spot,Electronics Store,Mexican Restaurant,Medical Center,Women's Store,Drugstore
3,Scarborough,2.0,Coffee Shop,Korean Restaurant,Indian Restaurant,Electronics Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant
4,Scarborough,2.0,Hakka Restaurant,Thai Restaurant,Athletics & Sports,Caribbean Restaurant,Bakery,Bank,Fried Chicken Joint,Empanada Restaurant,Electronics Store,Eastern European Restaurant
6,Scarborough,2.0,Discount Store,Department Store,Hobby Shop,Bus Station,Coffee Shop,Dumpling Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore
7,Scarborough,2.0,Bus Line,Bakery,Soccer Field,Intersection,Bus Station,Metro Station,Fast Food Restaurant,Park,Gift Shop,General Travel
8,Scarborough,2.0,Motel,American Restaurant,Women's Store,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant
9,Scarborough,2.0,College Stadium,Skating Rink,General Entertainment,Café,Event Space,Falafel Restaurant,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Diner
10,Scarborough,2.0,Indian Restaurant,Chinese Restaurant,Light Rail Station,Latin American Restaurant,Pet Store,Vietnamese Restaurant,Concert Hall,Discount Store,Falafel Restaurant,Event Space
11,Scarborough,2.0,Smoke Shop,Breakfast Spot,Bakery,Middle Eastern Restaurant,Women's Store,Eastern European Restaurant,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant


***Cluster 3***  Isolated... 

In [120]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 3, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
96,North York,3.0,Empanada Restaurant,Women's Store,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store


***Cluster 4***  PARK FIRST!

In [121]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 4, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Scarborough,4.0,Park,Playground,Women's Store,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
23,North York,4.0,Park,Bank,Convenience Store,Women's Store,Electronics Store,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant
25,North York,4.0,Fast Food Restaurant,Food & Drink Shop,Park,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant
30,North York,4.0,Park,Airport,Women's Store,Electronics Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant
40,East York,4.0,Park,Convenience Store,Women's Store,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
44,Central Toronto,4.0,Park,Swim School,Bus Line,Women's Store,Dumpling Restaurant,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant
48,Central Toronto,4.0,Park,Tennis Court,Women's Store,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant
50,Downtown Toronto,4.0,Park,Trail,Playground,Building,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore
74,York,4.0,Park,Women's Store,Market,Fast Food Restaurant,Concert Hall,Construction & Landscaping,Farmers Market,Comic Shop,Falafel Restaurant,Event Space
90,Etobicoke,4.0,River,Park,Pool,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore


***Cluster 5***  BASEBALL FIELD FIRST!

In [122]:
scarborough_merged.loc[scarborough_merged['Cluster Labels'] == 5, scarborough_merged.columns[[1] + list(range(5, scarborough_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
91,Etobicoke,5.0,Baseball Field,Pool,Women's Store,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant
97,North York,5.0,Baseball Field,Women's Store,Electronics Store,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Eastern European Restaurant,Empanada Restaurant,Discount Store


**Super interesting findings**
Two communities (Etobicoke, North York) belong in Cluster 5 (orange) which have lots of baseball fields! 

Lots of communities have mostly parks!