### PART ONE

In [1]:
import pandas as pd 
import numpy as np 
import requests 
from bs4 import BeautifulSoup
import re

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
g=requests.get(url)

In [3]:
soup = BeautifulSoup(g.text, "html5lib")
table = soup.find("tbody")
cell = table.find_all("p")
all_cell = []

for i in range(0,len(cell)): 
    try: 
        all_cell.append(cell[i].text)
    except (RuntimeError, TypeError, NameError):
        pass

In [4]:
all_cell[0:5]

['M1ANot assigned\n',
 'M2ANot assigned\n',
 'M3ANorth York(Parkwoods)\n',
 'M4ANorth York(Victoria Village)\n',
 'M5ADowntown Toronto(Regent Park / Harbourfront)\n']

In [5]:
df=pd.DataFrame(all_cell)

In [6]:
df=df.rename(columns={0:"overall"})
df["postalcode"]=df.overall.apply(lambda x: x[0:3])
df["neighborhood"]=df.overall.apply(lambda x:    str(re.findall(r"\(.*?\)" , x))  )
df["neighborhood"] =df.neighborhood.apply (lambda x: x.replace("/",","))
df["neighborhood"]=df.neighborhood.apply(lambda x:    re.sub(r"[^\w\s\,]", "", x))
df["borough"]=df.overall.apply(lambda x: x[3:])
df["borough"]=df.borough.apply(lambda x: x.split("(")[0])
df["borough"] =df.borough.apply(lambda x: x.replace("\n",""))
df=df.drop(columns=("overall"))

In [10]:
df=df[df.borough !="Not assigned"]
df.reset_index(drop=True, inplace = True)

In [11]:
df

Unnamed: 0,postalcode,neighborhood,borough
0,M3A,Parkwoods,North York
1,M4A,Victoria Village,North York
2,M5A,"Regent Park , Harbourfront",Downtown Toronto
3,M6A,"Lawrence Manor , Lawrence Heights",North York
4,M7A,Ontario Provincial Government,Queen's Park
...,...,...,...
98,M8X,"The Kingsway , Montgomery Road , Old Mill North",Etobicoke
99,M4Y,Church and Wellesley,Downtown Toronto
100,M7Y,Enclave of M4L,East TorontoBusiness reply mail Processing Cen...
101,M8Y,"Old Mill South , Kings Mill Park , Sunnylea , ...",Etobicoke


In [16]:
df[df.neighborhood=="Not assigned"]

Unnamed: 0,postalcode,neighborhood,borough


In [17]:
df.shape

(103, 3)

### PART TWO

In [54]:
geo= pd.read_csv("Geospatial_Coordinates.csv")
geo.rename(columns={"Postal Code":"postalcode"},inplace = True)
geo.head()

Unnamed: 0,postalcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [56]:
combine = pd.merge(df,geo,how="left",on = "postalcode")
combine.shape

(103, 5)

In [57]:
combine.head()

Unnamed: 0,postalcode,neighborhood,borough,Latitude,Longitude
0,M3A,Parkwoods,North York,43.753259,-79.329656
1,M4A,Victoria Village,North York,43.725882,-79.315572
2,M5A,"Regent Park , Harbourfront",Downtown Toronto,43.65426,-79.360636
3,M6A,"Lawrence Manor , Lawrence Heights",North York,43.718518,-79.464763
4,M7A,Ontario Provincial Government,Queen's Park,43.662301,-79.389494


### PART THREE

In [62]:
import matplotlib.cm as cm 
import matplotlib.colors as colors 
import matplotlib.pyplot as plt 
from sklearn.cluster import KMeans 
import folium
from geopy.geocoders import Nominatim

In [58]:
combine.borough.unique()

array(['North York', 'Downtown Toronto', "Queen's Park", 'Etobicoke',
       'Scarborough', 'East York', 'York', 'East Toronto', 'West Toronto',
       'East YorkEast Toronto', 'Central Toronto',
       'MississaugaCanada Post Gateway Processing Centre',
       'Downtown TorontoStn A PO Boxes25 The Esplanade',
       'EtobicokeNorthwest',
       'East TorontoBusiness reply mail Processing Centre969 Eastern'],
      dtype=object)

In [72]:
address = "Toronto, Ontario" 
geolocator = Nominatim(user_agent="gta_explorer")
location = geolocator.geocode(address)
latitude =location.latitude
longitude = location.longitude

In [73]:
# create map of New York using latitude and longitude values
map_GTA = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(combine['Latitude'], combine['Longitude'], combine['borough'], combine['neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_GTA)  
    
map_GTA

In [127]:
York= combine[combine["borough"].str.contains("York")]
York.reset_index(drop=True, inplace=True)
York.shape
York.head()

Unnamed: 0,postalcode,neighborhood,borough,Latitude,Longitude
0,M3A,Parkwoods,North York,43.753259,-79.329656
1,M4A,Victoria Village,North York,43.725882,-79.315572
2,M6A,"Lawrence Manor , Lawrence Heights",North York,43.718518,-79.464763
3,M3B,Don Mills,North York,43.745906,-79.352188
4,M4B,"Parkview Hill , Woodbine Gardens",East York,43.706397,-79.309937


In [95]:
CLIENT_ID = 'T3LYSVVFWCXCPBIHPLDIXEPPS2JNTX2LACLOQU0MXTR5VYU5' # your Foursquare ID
CLIENT_SECRET = 'PBUOZ1LKV1D14RQKXICJZT2FXNXDZ43Q3VD1L4VO0HRLX2N2' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: T3LYSVVFWCXCPBIHPLDIXEPPS2JNTX2LACLOQU0MXTR5VYU5
CLIENT_SECRET:PBUOZ1LKV1D14RQKXICJZT2FXNXDZ43Q3VD1L4VO0HRLX2N2


In [97]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [98]:
# type your answer here
york_venues = getNearbyVenues(names=York['neighborhood'],
                                   latitudes=York['Latitude'],
                                   longitudes=York['Longitude']
                                  )

Parkwoods
Victoria Village
Lawrence Manor , Lawrence Heights
Don Mills
Parkview Hill , Woodbine Gardens
Glencairn
Don Mills, Flemingdon Park
Woodbine Heights
HumewoodCedarvale
CaledoniaFairbanks
Leaside
Hillcrest Village
Bathurst Manor , Wilson Heights , Downsview North
Thorncliffe Park
Fairview , Henry Farm , Oriole
Northwood Park , York University
The Danforth  East
Bayview Village
Downsview, CFB Toronto
York Mills , Silver Hills
Downsview
North Park , Maple Leaf Park , Upwood Park
Humber Summit
Willowdale , Newtonbrook
Downsview
Bedford Park , Lawrence Manor East
Del Ray , Mount Dennis , Keelsdale and Silverthorn
Humberlea , Emery
Willowdale
Downsview
Runnymede , The Junction North
Weston
York Mills West
Willowdale


In [99]:
print(york_venues.shape)

(336, 7)


In [100]:
york_venues.head(7)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,KFC,43.754387,-79.333021,Fast Food Restaurant
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop
5,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant
6,Victoria Village,43.725882,-79.315572,Pizza Nova,43.725824,-79.31286,Pizza Place


In [102]:
york_venues.groupby("Neighborhood").count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor , Wilson Heights , Downsview North",21,21,21,21,21,21
Bayview Village,4,4,4,4,4,4
"Bedford Park , Lawrence Manor East",25,25,25,25,25,25
CaledoniaFairbanks,4,4,4,4,4,4
"Del Ray , Mount Dennis , Keelsdale and Silverthorn",4,4,4,4,4,4
Don Mills,5,5,5,5,5,5
"Don Mills, Flemingdon Park",20,20,20,20,20,20
Downsview,12,12,12,12,12,12
"Downsview, CFB Toronto",3,3,3,3,3,3
"Fairview , Henry Farm , Oriole",67,67,67,67,67,67


In [103]:
print('There are {} uniques categories.'.format(len(york_venues['Venue Category'].unique())))

There are 121 uniques categories.


In [104]:
# one hot encoding
york_onehot = pd.get_dummies(york_venues[['Venue Category']], prefix="", prefix_sep="")

In [105]:
york_onehot

Unnamed: 0,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bagel Shop,Bakery,Bank,...,Thai Restaurant,Theater,Toy / Game Store,Trail,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
331,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
332,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
333,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
334,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [106]:
york_onehot["Neighborhood"] = york_venues["Neighborhood"]

In [107]:
york_onehot.head()

Unnamed: 0,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bagel Shop,Bakery,Bank,...,Theater,Toy / Game Store,Trail,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Women's Store,Yoga Studio,Neighborhood
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Parkwoods
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Parkwoods
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Parkwoods
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Victoria Village
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Victoria Village


In [109]:
fixed_columns = [york_onehot.columns[-1]]+list(york_onehot.columns[:-1])
york_onehot = york_onehot[fixed_columns]
york_onehot.head()

Unnamed: 0,Yoga Studio,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bagel Shop,...,Tennis Court,Thai Restaurant,Theater,Toy / Game Store,Trail,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Women's Store
0,0,Parkwoods,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,Parkwoods,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,Parkwoods,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,Victoria Village,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,Victoria Village,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [110]:
york_onehot.shape

(336, 122)

In [116]:
york_group = york_onehot.groupby("Neighborhood").mean().reset_index()
york_group.shape

(30, 122)

In [117]:
york_group.head()

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bagel Shop,...,Tennis Court,Thai Restaurant,Theater,Toy / Game Store,Trail,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Women's Store
0,"Bathurst Manor , Wilson Heights , Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park , Lawrence Manor East",0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,...,0.0,0.04,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0
3,CaledoniaFairbanks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25
4,"Del Ray , Mount Dennis , Keelsdale and Silvert...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [118]:
num_top_venues = 5

for hood in york_group['Neighborhood']:
    print("----"+hood+"----")
    temp = york_group[york_group['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bathurst Manor , Wilson Heights , Downsview North----
              venue  freq
0              Bank  0.10
1       Coffee Shop  0.10
2  Sushi Restaurant  0.05
3        Restaurant  0.05
4     Shopping Mall  0.05


----Bayview Village----
                 venue  freq
0                 Bank  0.25
1   Chinese Restaurant  0.25
2                 Café  0.25
3  Japanese Restaurant  0.25
4          Yoga Studio  0.00


----Bedford Park , Lawrence Manor East----
                     venue  freq
0              Coffee Shop  0.08
1           Sandwich Place  0.08
2       Italian Restaurant  0.08
3  Comfort Food Restaurant  0.04
4                Juice Bar  0.04


----CaledoniaFairbanks----
           venue  freq
0           Park  0.50
1  Women's Store  0.25
2           Pool  0.25
3    Pizza Place  0.00
4      Pet Store  0.00


----Del Ray , Mount Dennis , Keelsdale and Silverthorn----
               venue  freq
0        Coffee Shop  0.25
1  Convenience Store  0.25
2     Sandwich Place  0.25
3     D

In [119]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [122]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = york_group['Neighborhood']

for ind in np.arange(york_group.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(york_group.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor , Wilson Heights , Downsview North",Coffee Shop,Bank,Park,Diner,Middle Eastern Restaurant,Mobile Phone Shop,Pharmacy,Pizza Place,Bridal Shop,Intersection
1,Bayview Village,Japanese Restaurant,Chinese Restaurant,Café,Bank,Women's Store,Deli / Bodega,Dim Sum Restaurant,Dessert Shop,Department Store,Curling Ice
2,"Bedford Park , Lawrence Manor East",Coffee Shop,Italian Restaurant,Sandwich Place,Hobby Shop,Comfort Food Restaurant,Café,Liquor Store,Juice Bar,Pharmacy,Pizza Place
3,CaledoniaFairbanks,Park,Women's Store,Pool,Dim Sum Restaurant,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cupcake Shop
4,"Del Ray , Mount Dennis , Keelsdale and Silvert...",Discount Store,Coffee Shop,Convenience Store,Sandwich Place,Diner,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Cupcake Shop,Curling Ice


In [123]:
# set number of clusters
kclusters = 5

york_grouped_clustering = york_group.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(york_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([4, 4, 4, 0, 4, 4, 4, 4, 2, 4])

In [125]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,4,"Bathurst Manor , Wilson Heights , Downsview North",Coffee Shop,Bank,Park,Diner,Middle Eastern Restaurant,Mobile Phone Shop,Pharmacy,Pizza Place,Bridal Shop,Intersection
1,4,Bayview Village,Japanese Restaurant,Chinese Restaurant,Café,Bank,Women's Store,Deli / Bodega,Dim Sum Restaurant,Dessert Shop,Department Store,Curling Ice
2,4,"Bedford Park , Lawrence Manor East",Coffee Shop,Italian Restaurant,Sandwich Place,Hobby Shop,Comfort Food Restaurant,Café,Liquor Store,Juice Bar,Pharmacy,Pizza Place
3,0,CaledoniaFairbanks,Park,Women's Store,Pool,Dim Sum Restaurant,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cupcake Shop
4,4,"Del Ray , Mount Dennis , Keelsdale and Silvert...",Discount Store,Coffee Shop,Convenience Store,Sandwich Place,Diner,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Cupcake Shop,Curling Ice
5,4,Don Mills,Caribbean Restaurant,Gym,Café,Dessert Shop,Japanese Restaurant,Women's Store,Diner,Construction & Landscaping,Convenience Store,Cosmetics Shop
6,4,"Don Mills, Flemingdon Park",Sporting Goods Shop,Restaurant,Gym,Coffee Shop,Beer Store,Chinese Restaurant,Italian Restaurant,Dim Sum Restaurant,Sandwich Place,Bike Shop
7,4,Downsview,Grocery Store,Park,Athletics & Sports,Shopping Mall,Baseball Field,Home Service,Bank,Discount Store,Gym / Fitness Center,Food Truck
8,2,"Downsview, CFB Toronto",Park,Airport,Business Service,Discount Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cupcake Shop,Curling Ice
9,4,"Fairview , Henry Farm , Oriole",Clothing Store,Coffee Shop,Fast Food Restaurant,Restaurant,Shoe Store,Cosmetics Shop,Food Court,Chinese Restaurant,Japanese Restaurant,Bakery


In [131]:
York2 = York.rename(columns ={"neighborhood":"Neighborhood"})
York2.set_index("Neighborhood",inplace=True)
York2.head()

Unnamed: 0_level_0,postalcode,borough,Latitude,Longitude
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Parkwoods,M3A,North York,43.753259,-79.329656
Victoria Village,M4A,North York,43.725882,-79.315572
"Lawrence Manor , Lawrence Heights",M6A,North York,43.718518,-79.464763
Don Mills,M3B,North York,43.745906,-79.352188
"Parkview Hill , Woodbine Gardens",M4B,East York,43.706397,-79.309937


In [132]:
york_merged = York2
york_merged = york_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
york_merged.head()

Unnamed: 0_level_0,postalcode,borough,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Parkwoods,M3A,North York,43.753259,-79.329656,2.0,Park,Food & Drink Shop,Fast Food Restaurant,Women's Store,Diner,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cupcake Shop
Victoria Village,M4A,North York,43.725882,-79.315572,4.0,Pizza Place,Coffee Shop,Portuguese Restaurant,Hockey Arena,Women's Store,Dim Sum Restaurant,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop
"Lawrence Manor , Lawrence Heights",M6A,North York,43.718518,-79.464763,4.0,Clothing Store,Coffee Shop,Furniture / Home Store,Event Space,Boutique,Accessories Store,Vietnamese Restaurant,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
Don Mills,M3B,North York,43.745906,-79.352188,4.0,Caribbean Restaurant,Gym,Café,Dessert Shop,Japanese Restaurant,Women's Store,Diner,Construction & Landscaping,Convenience Store,Cosmetics Shop
"Parkview Hill , Woodbine Gardens",M4B,East York,43.706397,-79.309937,4.0,Pizza Place,Pharmacy,Gastropub,Pet Store,Breakfast Spot,Flea Market,Gym / Fitness Center,Athletics & Sports,Intersection,Bank


In [136]:
york_merged.reset_index(inplace = True)
york_merged.head()

Unnamed: 0,index,Neighborhood,postalcode,borough,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,Parkwoods,M3A,North York,43.753259,-79.329656,2.0,Park,Food & Drink Shop,Fast Food Restaurant,Women's Store,Diner,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cupcake Shop
1,1,Victoria Village,M4A,North York,43.725882,-79.315572,4.0,Pizza Place,Coffee Shop,Portuguese Restaurant,Hockey Arena,Women's Store,Dim Sum Restaurant,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop
2,2,"Lawrence Manor , Lawrence Heights",M6A,North York,43.718518,-79.464763,4.0,Clothing Store,Coffee Shop,Furniture / Home Store,Event Space,Boutique,Accessories Store,Vietnamese Restaurant,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
3,3,Don Mills,M3B,North York,43.745906,-79.352188,4.0,Caribbean Restaurant,Gym,Café,Dessert Shop,Japanese Restaurant,Women's Store,Diner,Construction & Landscaping,Convenience Store,Cosmetics Shop
4,4,"Parkview Hill , Woodbine Gardens",M4B,East York,43.706397,-79.309937,4.0,Pizza Place,Pharmacy,Gastropub,Pet Store,Breakfast Spot,Flea Market,Gym / Fitness Center,Athletics & Sports,Intersection,Bank


In [145]:
york_merged["Cluster Labels"].isnull().sum()

1

In [146]:
york_merged.dropna(subset = ["Cluster Labels"], inplace=True)
york_merged["Cluster Labels"].isnull().sum()

0

In [147]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(york_merged['Latitude'], york_merged['Longitude'], york_merged['Neighborhood'], york_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [148]:
york_merged.loc[york_merged['Cluster Labels'] == 0, york_merged.columns[[1] + list(range(5, york_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,CaledoniaFairbanks,-79.453512,0.0,Park,Women's Store,Pool,Dim Sum Restaurant,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cupcake Shop
23,"Willowdale , Newtonbrook",-79.408493,0.0,Park,Women's Store,Clothing Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cupcake Shop,Curling Ice,Dance Studio
31,Weston,-79.518188,0.0,Park,Women's Store,Clothing Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cupcake Shop,Curling Ice,Dance Studio


## Label 0: with parks, women's store

In [149]:
york_merged.loc[york_merged['Cluster Labels'] == 1, york_merged.columns[[1] + list(range(5, york_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
30,"Runnymede , The Junction North",-79.487262,1.0,Caribbean Restaurant,Convenience Store,Women's Store,Clothing Store,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Cupcake Shop,Curling Ice,Dance Studio


## Label 1: lots of caribbean restaurant

In [150]:
york_merged.loc[york_merged['Cluster Labels'] == 2, york_merged.columns[[1] + list(range(5, york_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Parkwoods,-79.329656,2.0,Park,Food & Drink Shop,Fast Food Restaurant,Women's Store,Diner,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cupcake Shop
7,Woodbine Heights,-79.318389,2.0,Beer Store,Curling Ice,Skating Rink,Park,Intersection,Dance Studio,Athletics & Sports,Video Store,Cosmetics Shop,Cupcake Shop
16,The Danforth East,-79.338106,2.0,Park,Convenience Store,Rental Car Location,Intersection,Diner,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Cupcake Shop
18,"Downsview, CFB Toronto",-79.464763,2.0,Park,Airport,Business Service,Discount Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cupcake Shop,Curling Ice
21,"North Park , Maple Leaf Park , Upwood Park",-79.490074,2.0,Park,Construction & Landscaping,Bakery,Basketball Court,Discount Store,Comfort Food Restaurant,Convenience Store,Cosmetics Shop,Cupcake Shop,Curling Ice
32,York Mills West,-79.400049,2.0,Park,Construction & Landscaping,Convenience Store,Women's Store,Clothing Store,Comfort Food Restaurant,Cosmetics Shop,Cupcake Shop,Curling Ice,Dance Studio


## Label 2: Park

In [151]:
york_merged.loc[york_merged['Cluster Labels'] == 3, york_merged.columns[[1] + list(range(5,york_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
27,"Humberlea , Emery",-79.532242,3.0,Baseball Field,Women's Store,Discount Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Cupcake Shop,Curling Ice,Dance Studio


## Label 3: Baseball

In [152]:
york_merged.loc[york_merged['Cluster Labels'] == 4, york_merged.columns[[1] + list(range(5, york_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Victoria Village,-79.315572,4.0,Pizza Place,Coffee Shop,Portuguese Restaurant,Hockey Arena,Women's Store,Dim Sum Restaurant,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop
2,"Lawrence Manor , Lawrence Heights",-79.464763,4.0,Clothing Store,Coffee Shop,Furniture / Home Store,Event Space,Boutique,Accessories Store,Vietnamese Restaurant,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
3,Don Mills,-79.352188,4.0,Caribbean Restaurant,Gym,Café,Dessert Shop,Japanese Restaurant,Women's Store,Diner,Construction & Landscaping,Convenience Store,Cosmetics Shop
4,"Parkview Hill , Woodbine Gardens",-79.309937,4.0,Pizza Place,Pharmacy,Gastropub,Pet Store,Breakfast Spot,Flea Market,Gym / Fitness Center,Athletics & Sports,Intersection,Bank
5,Glencairn,-79.445073,4.0,Metro Station,Asian Restaurant,Bakery,Japanese Restaurant,Cosmetics Shop,Cupcake Shop,Curling Ice,Convenience Store,Discount Store,Dance Studio
6,"Don Mills, Flemingdon Park",-79.340923,4.0,Sporting Goods Shop,Restaurant,Gym,Coffee Shop,Beer Store,Chinese Restaurant,Italian Restaurant,Dim Sum Restaurant,Sandwich Place,Bike Shop
8,HumewoodCedarvale,-79.428191,4.0,Tennis Court,Playground,Trail,Field,Hockey Arena,Cosmetics Shop,Department Store,Deli / Bodega,Dance Studio,Curling Ice
10,Leaside,-79.363452,4.0,Coffee Shop,Sporting Goods Shop,Bank,Burger Joint,Sushi Restaurant,Grocery Store,Beer Store,Restaurant,Sandwich Place,Brewery
11,Hillcrest Village,-79.363452,4.0,Golf Course,Fast Food Restaurant,Dog Run,Mediterranean Restaurant,Athletics & Sports,Pool,Convenience Store,Cosmetics Shop,Cupcake Shop,Dim Sum Restaurant
12,"Bathurst Manor , Wilson Heights , Downsview North",-79.442259,4.0,Coffee Shop,Bank,Park,Diner,Middle Eastern Restaurant,Mobile Phone Shop,Pharmacy,Pizza Place,Bridal Shop,Intersection


## Label 4: food, Coffee shop