In [43]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

In [55]:
conda install -c conda-forge geopy --y

Collecting package metadata (current_repodata.json): done
Solving environment: | 
  - anaconda/osx-64::conda-4.8.1-py37_0
  - defaults/osx-64::conda-4.8.1-py37done

## Package Plan ##

  environment location: /Users/draganastajic/anaconda3

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2019.11.28         |           py37_0         148 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         240 KB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              conda-forge/noarch::geopy-1.20.0-py_0

The following packages will be UPDATED:

Let's use BeautifulSoup to scrape the table from Wikipedia and turn it into a dataframe with three columns named PostalCode, Borough, and Neighborhood.

In [44]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
r = requests.get(url)
html = r.text
soup = BeautifulSoup(html)
table = soup.find('table', {"class": "wikitable sortable"})
rows = table.find_all('tr')
data = []
for row in rows[1:]:
    cols = row.find_all('td')
    cols = [ele.text.strip() for ele in cols]
    data.append([ele for ele in cols if ele])

df = pd.DataFrame(data, columns=['PostalCode', 'Borough', 'Neighborhood'])
df.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
8,M8A,Not assigned,Not assigned
9,M9A,Queen's Park,Not assigned


Let's filter out all rows with a borough that is not assigned

In [45]:
df.drop(df.index[df['Borough'] == "Not assigned"], inplace = True)

Let's merge rows with the same postal code by using the groupby function. To avoid having duplicate values for borough (i.e. ScarboroughScarborough), we aggregate this column by 'first'. We want to join and separate neighborhoods by commas, so we use .join with ', '. We also need to reset row indices and reindex everything.


In [46]:
newdf=(df.groupby('PostalCode')
   .agg({'Borough' : 'first', 'Neighborhood' : ', '.join})
   .reset_index()
   .reindex(columns=df.columns))
newdf.tail(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
93,M9A,Queen's Park,Not assigned
94,M9B,Etobicoke,"Cloverdale, Islington, Martin Grove, Princess ..."
95,M9C,Etobicoke,"Bloordale Gardens, Eringate, Markland Wood, Ol..."
96,M9L,North York,Humber Summit
97,M9M,North York,"Emery, Humberlea"
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv..."
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."
102,M9W,Etobicoke,Northwest


Now, let's change the values of 'not assigned' neighborhoods to the name of their borough

In [47]:
newdf['Neighborhood'] = np.where(newdf['Neighborhood'] == "Not assigned", newdf['Borough'], newdf['Neighborhood'])
newdf.tail(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
93,M9A,Queen's Park,Queen's Park
94,M9B,Etobicoke,"Cloverdale, Islington, Martin Grove, Princess ..."
95,M9C,Etobicoke,"Bloordale Gardens, Eringate, Markland Wood, Ol..."
96,M9L,North York,Humber Summit
97,M9M,North York,"Emery, Humberlea"
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv..."
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."
102,M9W,Etobicoke,Northwest


Finally, let's print the number of rows/columns of the dataframe:

In [48]:
newdf.shape

(103, 3)

---

# *Part 2*

I downloaded the Geospatial_Coordinates.csv and read it to a dataframe I called geodf

In [49]:
geodf = pd.read_csv ("/Users/draganastajic/Downloads/Geospatial_Coordinates.csv")

To make merging of the dataframes easier, we can rename the Postal Code column in geodf

In [50]:
geodf.rename(columns={'Postal Code':'PostalCode'}, inplace=True)
geodf.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


Now that we have a column with the same name in both datasets, we can merge them, which will add latitude and longitude columns to newdf rows that have matching PostalCode values with geodf

In [51]:
merged = pd.merge(left=newdf,right=geodf)
merged.head(10)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [52]:
merged.shape

(103, 5)

---

# *Part 3*

Let's create a map of Toronto with markers based on our data

In [54]:
import folium
latitude = 43.6532
longitude = -79.3832
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(merged['Latitude'], merged['Longitude'], merged['Borough'], merged['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

For part 3, I am going to observe only the Central Toronto borough and run the analysis we did to the New York City data

In [13]:
toronto_data = merged[merged['Borough'] == 'Central Toronto'].reset_index(drop=True)
toronto_data.head()
# create map of Manhattan using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Foursquare credentials

In [58]:
CLIENT_ID = 'NELQTOMDEJOL2U0H201N2PHYOCHOGEEJCYK4HPLO40IU0OQ2' # your Foursquare ID
CLIENT_SECRET = 'GEBUF2CMEDMZTXJWWPQA442Y5LPBXQDPAMQ2PLZ0HAHVEJKJ' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100
radius = 500

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: NELQTOMDEJOL2U0H201N2PHYOCHOGEEJCYK4HPLO40IU0OQ2
CLIENT_SECRET:GEBUF2CMEDMZTXJWWPQA442Y5LPBXQDPAMQ2PLZ0HAHVEJKJ


In [59]:
toronto_data.loc[0, 'Neighborhood']
merged_latitude = toronto_data.loc[0, 'Latitude'] # neighborhood latitude value
merged_longitude = toronto_data.loc[0, 'Longitude'] # neighborhood longitude value

merged_name = toronto_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(merged_name, 
                                                               merged_latitude, 
                                                               merged_longitude))

Latitude and longitude values of Lawrence Park are 43.7280205, -79.3887901.


In [60]:
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    merged_latitude, 
    merged_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=NELQTOMDEJOL2U0H201N2PHYOCHOGEEJCYK4HPLO40IU0OQ2&client_secret=GEBUF2CMEDMZTXJWWPQA442Y5LPBXQDPAMQ2PLZ0HAHVEJKJ&v=20180605&ll=43.7280205,-79.3887901&radius=500&limit=100'

In [67]:
import io
from pandas.io.json import json_normalize

In [69]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [70]:

results = requests.get(url).json()
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Lawrence Park Ravine,Park,43.726963,-79.394382
1,Booty Camp Fitness,Gym / Fitness Center,43.728051,-79.387853
2,Zodiac Swim School,Swim School,43.728532,-79.38286
3,TTC Bus #162 - Lawrence-Donway,Bus Line,43.728026,-79.382805


In [22]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [71]:
toronto_venues = getNearbyVenues(names=toronto_data['Neighborhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park, Summerhill East
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
Roselawn
Forest Hill North, Forest Hill West
The Annex, North Midtown, Yorkville


In [72]:
print(toronto_venues.shape)
toronto_venues.head()

(118, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Lawrence Park,43.72802,-79.38879,Lawrence Park Ravine,43.726963,-79.394382,Park
1,Lawrence Park,43.72802,-79.38879,Booty Camp Fitness,43.728051,-79.387853,Gym / Fitness Center
2,Lawrence Park,43.72802,-79.38879,Zodiac Swim School,43.728532,-79.38286,Swim School
3,Lawrence Park,43.72802,-79.38879,TTC Bus #162 - Lawrence-Donway,43.728026,-79.382805,Bus Line
4,Davisville North,43.712751,-79.390197,Summerhill Market North,43.715499,-79.392881,Food & Drink Shop


In [73]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Davisville,34,34,34,34,34,34
Davisville North,8,8,8,8,8,8
"Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West",15,15,15,15,15,15
"Forest Hill North, Forest Hill West",5,5,5,5,5,5
Lawrence Park,4,4,4,4,4,4
"Moore Park, Summerhill East",3,3,3,3,3,3
North Toronto West,24,24,24,24,24,24
Roselawn,3,3,3,3,3,3
"The Annex, North Midtown, Yorkville",22,22,22,22,22,22


In [74]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 64 uniques categories.


Analyze each neighborhood:

In [75]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,BBQ Joint,Bagel Shop,Bookstore,Breakfast Spot,Brewery,Burger Joint,Bus Line,Café,...,Sports Bar,Supermarket,Sushi Restaurant,Swim School,Thai Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoga Studio
0,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Lawrence Park,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
3,Lawrence Park,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4,Davisville North,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [76]:
toronto_onehot.shape

(118, 65)

In [77]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,American Restaurant,BBQ Joint,Bagel Shop,Bookstore,Breakfast Spot,Brewery,Burger Joint,Bus Line,Café,...,Sports Bar,Supermarket,Sushi Restaurant,Swim School,Thai Restaurant,Toy / Game Store,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoga Studio
0,Davisville,0.0,0.0,0.0,0.029412,0.0,0.029412,0.0,0.0,0.058824,...,0.0,0.0,0.058824,0.0,0.029412,0.029412,0.0,0.0,0.0,0.0
1,Davisville North,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",0.066667,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,...,0.066667,0.066667,0.066667,0.0,0.0,0.0,0.0,0.0,0.066667,0.0
3,"Forest Hill North, Forest Hill West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,...,0.0,0.0,0.2,0.0,0.0,0.0,0.2,0.0,0.0,0.0
4,Lawrence Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,...,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0
5,"Moore Park, Summerhill East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,North Toronto West,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.041667,...,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.041667
7,Roselawn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"The Annex, North Midtown, Yorkville",0.045455,0.045455,0.0,0.0,0.0,0.0,0.045455,0.0,0.136364,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0


In [78]:
toronto_grouped.shape

(9, 65)

In [79]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Davisville----
            venue  freq
0  Sandwich Place  0.09
1     Pizza Place  0.09
2    Dessert Shop  0.09
3             Gym  0.06
4            Café  0.06


----Davisville North----
            venue  freq
0  Sandwich Place  0.12
1           Hotel  0.12
2    Dance Studio  0.12
3             Gym  0.12
4  Clothing Store  0.12


----Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West----
                 venue  freq
0                  Pub  0.13
1          Coffee Shop  0.13
2  American Restaurant  0.07
3          Supermarket  0.07
4         Liquor Store  0.07


----Forest Hill North, Forest Hill West----
              venue  freq
0     Jewelry Store   0.2
1              Park   0.2
2             Trail   0.2
3          Bus Line   0.2
4  Sushi Restaurant   0.2


----Lawrence Park----
                   venue  freq
0            Swim School  0.25
1   Gym / Fitness Center  0.25
2               Bus Line  0.25
3                   Park  0.25
4  Portuguese Restaurant  0.00


--

In [80]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [81]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
merged_venues_sorted = pd.DataFrame(columns=columns)
merged_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    merged_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

merged_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Davisville,Dessert Shop,Pizza Place,Sandwich Place,Coffee Shop,Italian Restaurant,Sushi Restaurant,Café,Gym,Portuguese Restaurant,Greek Restaurant
1,Davisville North,Food & Drink Shop,Clothing Store,Hotel,Dance Studio,Breakfast Spot,Gym,Sandwich Place,Park,Fried Chicken Joint,Furniture / Home Store
2,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",Pub,Coffee Shop,American Restaurant,Sports Bar,Restaurant,Fried Chicken Joint,Liquor Store,Light Rail Station,Pizza Place,Supermarket
3,"Forest Hill North, Forest Hill West",Trail,Park,Jewelry Store,Sushi Restaurant,Bus Line,Yoga Studio,Gourmet Shop,Food & Drink Shop,Fried Chicken Joint,Furniture / Home Store
4,Lawrence Park,Gym / Fitness Center,Swim School,Park,Bus Line,Yoga Studio,Gourmet Shop,Food & Drink Shop,Fried Chicken Joint,Furniture / Home Store,Garden


Let's cluster neighborhoods into 5 clusters and run k-means clustering

In [83]:
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 2, 2, 3, 4, 0, 2, 1, 2], dtype=int32)

In [84]:
# add clustering labels
merged_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(merged_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() 

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,4,Gym / Fitness Center,Swim School,Park,Bus Line,Yoga Studio,Gourmet Shop,Food & Drink Shop,Fried Chicken Joint,Furniture / Home Store,Garden
1,M4P,Central Toronto,Davisville North,43.712751,-79.390197,2,Food & Drink Shop,Clothing Store,Hotel,Dance Studio,Breakfast Spot,Gym,Sandwich Place,Park,Fried Chicken Joint,Furniture / Home Store
2,M4R,Central Toronto,North Toronto West,43.715383,-79.405678,2,Coffee Shop,Sporting Goods Shop,Clothing Store,Salon / Barbershop,Grocery Store,Furniture / Home Store,Metro Station,Mexican Restaurant,Park,Diner
3,M4S,Central Toronto,Davisville,43.704324,-79.38879,2,Dessert Shop,Pizza Place,Sandwich Place,Coffee Shop,Italian Restaurant,Sushi Restaurant,Café,Gym,Portuguese Restaurant,Greek Restaurant
4,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316,0,Restaurant,Playground,Gym,Yoga Studio,Garden,Flower Shop,Food & Drink Shop,Fried Chicken Joint,Furniture / Home Store,Gourmet Shop


In [88]:
# create map
import matplotlib.cm as cm
import matplotlib.colors as colors
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Examine clusters

In [89]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Central Toronto,0,Restaurant,Playground,Gym,Yoga Studio,Garden,Flower Shop,Food & Drink Shop,Fried Chicken Joint,Furniture / Home Store,Gourmet Shop


In [90]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Central Toronto,1,Home Service,Garden,Pool,Hotel,History Museum,Health & Beauty Service,Gym / Fitness Center,Gym,Grocery Store,Greek Restaurant


In [91]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Central Toronto,2,Food & Drink Shop,Clothing Store,Hotel,Dance Studio,Breakfast Spot,Gym,Sandwich Place,Park,Fried Chicken Joint,Furniture / Home Store
2,Central Toronto,2,Coffee Shop,Sporting Goods Shop,Clothing Store,Salon / Barbershop,Grocery Store,Furniture / Home Store,Metro Station,Mexican Restaurant,Park,Diner
3,Central Toronto,2,Dessert Shop,Pizza Place,Sandwich Place,Coffee Shop,Italian Restaurant,Sushi Restaurant,Café,Gym,Portuguese Restaurant,Greek Restaurant
5,Central Toronto,2,Pub,Coffee Shop,American Restaurant,Sports Bar,Restaurant,Fried Chicken Joint,Liquor Store,Light Rail Station,Pizza Place,Supermarket
8,Central Toronto,2,Café,Sandwich Place,Coffee Shop,Indian Restaurant,Pub,BBQ Joint,Burger Joint,Cosmetics Shop,Flower Shop,History Museum


In [92]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,Central Toronto,3,Trail,Park,Jewelry Store,Sushi Restaurant,Bus Line,Yoga Studio,Gourmet Shop,Food & Drink Shop,Fried Chicken Joint,Furniture / Home Store


In [93]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,4,Gym / Fitness Center,Swim School,Park,Bus Line,Yoga Studio,Gourmet Shop,Food & Drink Shop,Fried Chicken Joint,Furniture / Home Store,Garden
