### PART ONE

In [1]:
import pandas as pd 
import numpy as np 
import requests 
from bs4 import BeautifulSoup
import re

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
g=requests.get(url)

In [3]:
soup = BeautifulSoup(g.text, "html5lib")
table = soup.find("tbody")
cell = table.find_all("p")
all_cell = []

for i in range(0,len(cell)): 
    try: 
        all_cell.append(cell[i].text)
    except (RuntimeError, TypeError, NameError):
        pass

In [4]:
all_cell[0:5]

['M1ANot assigned\n',
 'M2ANot assigned\n',
 'M3ANorth York(Parkwoods)\n',
 'M4ANorth York(Victoria Village)\n',
 'M5ADowntown Toronto(Regent Park / Harbourfront)\n']

In [5]:
df=pd.DataFrame(all_cell)

In [6]:
df=df.rename(columns={0:"overall"})
df["postalcode"]=df.overall.apply(lambda x: x[0:3])
df["neighborhood"]=df.overall.apply(lambda x:    str(re.findall(r"\(.*?\)" , x))  )
df["neighborhood"] =df.neighborhood.apply (lambda x: x.replace("/",","))
df["neighborhood"]=df.neighborhood.apply(lambda x:    re.sub(r"[^\w\s\,]", "", x))
df["borough"]=df.overall.apply(lambda x: x[3:])
df["borough"]=df.borough.apply(lambda x: x.split("(")[0])
df["borough"] =df.borough.apply(lambda x: x.replace("\n",""))
df=df.drop(columns=("overall"))

In [7]:
df=df[df.borough !="Not assigned"]
df.reset_index(drop=True, inplace = True)

In [8]:
df

Unnamed: 0,postalcode,neighborhood,borough
0,M3A,Parkwoods,North York
1,M4A,Victoria Village,North York
2,M5A,"Regent Park , Harbourfront",Downtown Toronto
3,M6A,"Lawrence Manor , Lawrence Heights",North York
4,M7A,Ontario Provincial Government,Queen's Park
...,...,...,...
98,M8X,"The Kingsway , Montgomery Road , Old Mill North",Etobicoke
99,M4Y,Church and Wellesley,Downtown Toronto
100,M7Y,Enclave of M4L,East TorontoBusiness reply mail Processing Cen...
101,M8Y,"Old Mill South , Kings Mill Park , Sunnylea , ...",Etobicoke


In [9]:
df[df.neighborhood=="Not assigned"]

Unnamed: 0,postalcode,neighborhood,borough


In [10]:
df.shape

(103, 3)

### PART TWO

In [11]:
geo= pd.read_csv("Geospatial_Coordinates.csv")
geo.rename(columns={"Postal Code":"postalcode"},inplace = True)
geo.head()

Unnamed: 0,postalcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [12]:
combine = pd.merge(df,geo,how="left",on = "postalcode")
combine.shape

(103, 5)

In [13]:
combine.head()

Unnamed: 0,postalcode,neighborhood,borough,Latitude,Longitude
0,M3A,Parkwoods,North York,43.753259,-79.329656
1,M4A,Victoria Village,North York,43.725882,-79.315572
2,M5A,"Regent Park , Harbourfront",Downtown Toronto,43.65426,-79.360636
3,M6A,"Lawrence Manor , Lawrence Heights",North York,43.718518,-79.464763
4,M7A,Ontario Provincial Government,Queen's Park,43.662301,-79.389494


### PART THREE

In [14]:
import matplotlib.cm as cm 
import matplotlib.colors as colors 
import matplotlib.pyplot as plt 
from sklearn.cluster import KMeans 
import folium
from geopy.geocoders import Nominatim

In [15]:
combine.borough.unique()

array(['North York', 'Downtown Toronto', "Queen's Park", 'Etobicoke',
       'Scarborough', 'East York', 'York', 'East Toronto', 'West Toronto',
       'East YorkEast Toronto', 'Central Toronto',
       'MississaugaCanada Post Gateway Processing Centre',
       'Downtown TorontoStn A PO Boxes25 The Esplanade',
       'EtobicokeNorthwest',
       'East TorontoBusiness reply mail Processing Centre969 Eastern'],
      dtype=object)

In [16]:
address = "Toronto, Ontario" 
geolocator = Nominatim(user_agent="gta_explorer")
location = geolocator.geocode(address)
latitude =location.latitude
longitude = location.longitude

In [17]:
# create map of New York using latitude and longitude values
map_GTA = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(combine['Latitude'], combine['Longitude'], combine['borough'], combine['neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_GTA)  
    
map_GTA

In [18]:
York= combine[combine["borough"].str.contains("York")]
York.reset_index(drop=True, inplace=True)
York.shape
York.head()

Unnamed: 0,postalcode,neighborhood,borough,Latitude,Longitude
0,M3A,Parkwoods,North York,43.753259,-79.329656
1,M4A,Victoria Village,North York,43.725882,-79.315572
2,M6A,"Lawrence Manor , Lawrence Heights",North York,43.718518,-79.464763
3,M3B,Don Mills,North York,43.745906,-79.352188
4,M4B,"Parkview Hill , Woodbine Gardens",East York,43.706397,-79.309937


In [19]:
CLIENT_ID = 'T3LYSVVFWCXCPBIHPLDIXEPPS2JNTX2LACLOQU0MXTR5VYU5' # your Foursquare ID
CLIENT_SECRET = 'PBUOZ1LKV1D14RQKXICJZT2FXNXDZ43Q3VD1L4VO0HRLX2N2' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: T3LYSVVFWCXCPBIHPLDIXEPPS2JNTX2LACLOQU0MXTR5VYU5
CLIENT_SECRET:PBUOZ1LKV1D14RQKXICJZT2FXNXDZ43Q3VD1L4VO0HRLX2N2


In [20]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [21]:
# type your answer here
york_venues = getNearbyVenues(names=York['neighborhood'],
                                   latitudes=York['Latitude'],
                                   longitudes=York['Longitude']
                                  )

Parkwoods
Victoria Village
Lawrence Manor , Lawrence Heights
Don Mills
Parkview Hill , Woodbine Gardens
Glencairn
Don Mills, Flemingdon Park
Woodbine Heights
HumewoodCedarvale
CaledoniaFairbanks
Leaside
Hillcrest Village
Bathurst Manor , Wilson Heights , Downsview North
Thorncliffe Park
Fairview , Henry Farm , Oriole
Northwood Park , York University
The Danforth  East
Bayview Village
Downsview, CFB Toronto
York Mills , Silver Hills
Downsview
North Park , Maple Leaf Park , Upwood Park
Humber Summit
Willowdale , Newtonbrook
Downsview
Bedford Park , Lawrence Manor East
Del Ray , Mount Dennis , Keelsdale and Silverthorn
Humberlea , Emery
Willowdale
Downsview
Runnymede , The Junction North
Weston
York Mills West
Willowdale


In [22]:
print(york_venues.shape)

(344, 7)


In [23]:
york_venues.head(7)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,KFC,43.754387,-79.333021,Fast Food Restaurant
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Parkwoods,43.753259,-79.329656,TTC stop - 44 Valley Woods,43.755402,-79.333741,Bus Stop
4,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
5,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop
6,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant


In [24]:
york_venues.groupby("Neighborhood").count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor , Wilson Heights , Downsview North",23,23,23,23,23,23
Bayview Village,4,4,4,4,4,4
"Bedford Park , Lawrence Manor East",24,24,24,24,24,24
CaledoniaFairbanks,4,4,4,4,4,4
"Del Ray , Mount Dennis , Keelsdale and Silverthorn",4,4,4,4,4,4
Don Mills,6,6,6,6,6,6
"Don Mills, Flemingdon Park",19,19,19,19,19,19
Downsview,11,11,11,11,11,11
"Downsview, CFB Toronto",3,3,3,3,3,3
"Fairview , Henry Farm , Oriole",71,71,71,71,71,71


In [25]:
print('There are {} uniques categories.'.format(len(york_venues['Venue Category'].unique())))

There are 118 uniques categories.


In [26]:
# one hot encoding
york_onehot = pd.get_dummies(york_venues[['Venue Category']], prefix="", prefix_sep="")

In [27]:
york_onehot

Unnamed: 0,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bagel Shop,Bakery,Bank,...,Tennis Court,Thai Restaurant,Theater,Toy / Game Store,Trail,Video Game Store,Vietnamese Restaurant,Warehouse Store,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
339,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
340,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
341,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
342,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [28]:
york_onehot["Neighborhood"] = york_venues["Neighborhood"]

In [29]:
york_onehot.head()

Unnamed: 0,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bagel Shop,Bakery,Bank,...,Thai Restaurant,Theater,Toy / Game Store,Trail,Video Game Store,Vietnamese Restaurant,Warehouse Store,Women's Store,Yoga Studio,Neighborhood
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Parkwoods
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Parkwoods
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Parkwoods
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Parkwoods
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Victoria Village


In [30]:
fixed_columns = [york_onehot.columns[-1]]+list(york_onehot.columns[:-1])
york_onehot = york_onehot[fixed_columns]
york_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bagel Shop,Bakery,...,Tennis Court,Thai Restaurant,Theater,Toy / Game Store,Trail,Video Game Store,Vietnamese Restaurant,Warehouse Store,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [31]:
york_onehot.shape

(344, 119)

In [32]:
york_group = york_onehot.groupby("Neighborhood").mean().reset_index()
york_group.shape

(29, 119)

In [33]:
york_group.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bagel Shop,Bakery,...,Tennis Court,Thai Restaurant,Theater,Toy / Game Store,Trail,Video Game Store,Vietnamese Restaurant,Warehouse Store,Women's Store,Yoga Studio
0,"Bathurst Manor , Wilson Heights , Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park , Lawrence Manor East",0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,CaledoniaFairbanks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0
4,"Del Ray , Mount Dennis , Keelsdale and Silvert...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [34]:
num_top_venues = 5

for hood in york_group['Neighborhood']:
    print("----"+hood+"----")
    temp = york_group[york_group['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bathurst Manor , Wilson Heights , Downsview North----
            venue  freq
0     Coffee Shop  0.09
1            Bank  0.09
2  Ice Cream Shop  0.04
3     Supermarket  0.04
4        Pharmacy  0.04


----Bayview Village----
                 venue  freq
0   Chinese Restaurant  0.25
1                 Café  0.25
2  Japanese Restaurant  0.25
3                 Bank  0.25
4    Accessories Store  0.00


----Bedford Park , Lawrence Manor East----
                venue  freq
0      Sandwich Place  0.08
1  Italian Restaurant  0.08
2         Coffee Shop  0.08
3    Greek Restaurant  0.04
4       Grocery Store  0.04


----CaledoniaFairbanks----
               venue  freq
0               Park  0.50
1      Women's Store  0.25
2               Pool  0.25
3  Accessories Store  0.00
4          Locksmith  0.00


----Del Ray , Mount Dennis , Keelsdale and Silverthorn----
               venue  freq
0  Convenience Store  0.25
1     Sandwich Place  0.25
2     Discount Store  0.25
3       Skating Rink  0.2

In [35]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [36]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = york_group['Neighborhood']

for ind in np.arange(york_group.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(york_group.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor , Wilson Heights , Downsview North",Coffee Shop,Bank,Chinese Restaurant,Deli / Bodega,Bridal Shop,Restaurant,Intersection,Sandwich Place,Pizza Place,Shopping Mall
1,Bayview Village,Chinese Restaurant,Café,Bank,Japanese Restaurant,Yoga Studio,Dog Run,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice
2,"Bedford Park , Lawrence Manor East",Italian Restaurant,Sandwich Place,Coffee Shop,Café,Indian Restaurant,Locksmith,Butcher,Liquor Store,Pharmacy,Pizza Place
3,CaledoniaFairbanks,Park,Pool,Women's Store,Frozen Yogurt Shop,Diner,Coffee Shop,Comfort Food Restaurant,Furniture / Home Store,Construction & Landscaping,Convenience Store
4,"Del Ray , Mount Dennis , Keelsdale and Silvert...",Discount Store,Convenience Store,Sandwich Place,Skating Rink,Yoga Studio,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Curling Ice


In [37]:
# set number of clusters
kclusters = 5

york_grouped_clustering = york_group.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(york_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 2, 0, 0, 0, 0, 2, 0])

In [38]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [39]:
York2 = York.rename(columns ={"neighborhood":"Neighborhood"})
York2.set_index("Neighborhood",inplace=True)
York2.head()

Unnamed: 0_level_0,postalcode,borough,Latitude,Longitude
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Parkwoods,M3A,North York,43.753259,-79.329656
Victoria Village,M4A,North York,43.725882,-79.315572
"Lawrence Manor , Lawrence Heights",M6A,North York,43.718518,-79.464763
Don Mills,M3B,North York,43.745906,-79.352188
"Parkview Hill , Woodbine Gardens",M4B,East York,43.706397,-79.309937


In [40]:
york_merged = York2
york_merged = york_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
york_merged.head()

Unnamed: 0_level_0,postalcode,borough,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Parkwoods,M3A,North York,43.753259,-79.329656,0.0,Park,Bus Stop,Food & Drink Shop,Fast Food Restaurant,Yoga Studio,Discount Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop
Victoria Village,M4A,North York,43.725882,-79.315572,0.0,Pizza Place,Coffee Shop,French Restaurant,Hockey Arena,Portuguese Restaurant,Yoga Studio,Diner,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
"Lawrence Manor , Lawrence Heights",M6A,North York,43.718518,-79.464763,0.0,Clothing Store,Accessories Store,Boutique,Furniture / Home Store,Event Space,Coffee Shop,Women's Store,Arts & Crafts Store,Vietnamese Restaurant,Asian Restaurant
Don Mills,M3B,North York,43.745906,-79.352188,0.0,Dessert Shop,Gym,Caribbean Restaurant,Café,Athletics & Sports,Japanese Restaurant,Yoga Studio,Discount Store,Diner,Dim Sum Restaurant
"Parkview Hill , Woodbine Gardens",M4B,East York,43.706397,-79.309937,0.0,Pizza Place,Pet Store,Athletics & Sports,Breakfast Spot,Intersection,Flea Market,Gym / Fitness Center,Bank,Pharmacy,Gastropub


In [41]:
york_merged.reset_index(inplace = True)
york_merged.head()

Unnamed: 0,Neighborhood,postalcode,borough,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Parkwoods,M3A,North York,43.753259,-79.329656,0.0,Park,Bus Stop,Food & Drink Shop,Fast Food Restaurant,Yoga Studio,Discount Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop
1,Victoria Village,M4A,North York,43.725882,-79.315572,0.0,Pizza Place,Coffee Shop,French Restaurant,Hockey Arena,Portuguese Restaurant,Yoga Studio,Diner,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
2,"Lawrence Manor , Lawrence Heights",M6A,North York,43.718518,-79.464763,0.0,Clothing Store,Accessories Store,Boutique,Furniture / Home Store,Event Space,Coffee Shop,Women's Store,Arts & Crafts Store,Vietnamese Restaurant,Asian Restaurant
3,Don Mills,M3B,North York,43.745906,-79.352188,0.0,Dessert Shop,Gym,Caribbean Restaurant,Café,Athletics & Sports,Japanese Restaurant,Yoga Studio,Discount Store,Diner,Dim Sum Restaurant
4,"Parkview Hill , Woodbine Gardens",M4B,East York,43.706397,-79.309937,0.0,Pizza Place,Pet Store,Athletics & Sports,Breakfast Spot,Intersection,Flea Market,Gym / Fitness Center,Bank,Pharmacy,Gastropub


In [42]:
york_merged["Cluster Labels"].isnull().sum()

2

In [43]:
york_merged.dropna(subset = ["Cluster Labels"], inplace=True)
york_merged["Cluster Labels"].isnull().sum()

0

In [44]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(york_merged['Latitude'], york_merged['Longitude'], york_merged['Neighborhood'], york_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [45]:
york_merged.loc[york_merged['Cluster Labels'] == 0, york_merged.columns[[1] + list(range(5, york_merged.shape[1]))]]

Unnamed: 0,postalcode,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,0.0,Park,Bus Stop,Food & Drink Shop,Fast Food Restaurant,Yoga Studio,Discount Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop
1,M4A,0.0,Pizza Place,Coffee Shop,French Restaurant,Hockey Arena,Portuguese Restaurant,Yoga Studio,Diner,Comfort Food Restaurant,Construction & Landscaping,Convenience Store
2,M6A,0.0,Clothing Store,Accessories Store,Boutique,Furniture / Home Store,Event Space,Coffee Shop,Women's Store,Arts & Crafts Store,Vietnamese Restaurant,Asian Restaurant
3,M3B,0.0,Dessert Shop,Gym,Caribbean Restaurant,Café,Athletics & Sports,Japanese Restaurant,Yoga Studio,Discount Store,Diner,Dim Sum Restaurant
4,M4B,0.0,Pizza Place,Pet Store,Athletics & Sports,Breakfast Spot,Intersection,Flea Market,Gym / Fitness Center,Bank,Pharmacy,Gastropub
5,M6B,0.0,Park,Pizza Place,Bakery,Japanese Restaurant,Discount Store,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop
6,M3C,0.0,Coffee Shop,Gym,Restaurant,Sporting Goods Shop,Clothing Store,Chinese Restaurant,Café,Dim Sum Restaurant,Discount Store,Sandwich Place
7,M4C,0.0,Park,Skating Rink,Spa,Beer Store,Athletics & Sports,Curling Ice,Intersection,Dessert Shop,Diner,Dim Sum Restaurant
8,M6C,0.0,Tennis Court,Playground,Trail,Field,Hockey Arena,Curling Ice,Dim Sum Restaurant,Dessert Shop,Department Store,Deli / Bodega
10,M4G,0.0,Coffee Shop,Sporting Goods Shop,Burger Joint,Sushi Restaurant,Bank,Smoothie Shop,Pet Store,Department Store,Brewery,Breakfast Spot


## Label 0: with parks, women's store

In [46]:
york_merged.loc[york_merged['Cluster Labels'] == 1, york_merged.columns[[1] + list(range(5, york_merged.shape[1]))]]

Unnamed: 0,postalcode,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
31,M9N,1.0,Jewelry Store,Yoga Studio,Electronics Store,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Deli / Bodega,Department Store


## Label 1: lots of caribbean restaurant

In [47]:
york_merged.loc[york_merged['Cluster Labels'] == 2, york_merged.columns[[1] + list(range(5, york_merged.shape[1]))]]

Unnamed: 0,postalcode,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,M6E,2.0,Park,Pool,Women's Store,Frozen Yogurt Shop,Diner,Coffee Shop,Comfort Food Restaurant,Furniture / Home Store,Construction & Landscaping,Convenience Store
16,M4J,2.0,Park,Convenience Store,Intersection,Yoga Studio,Dog Run,Comfort Food Restaurant,Construction & Landscaping,Cosmetics Shop,Curling Ice,Deli / Bodega
18,M3K,2.0,Park,Airport,Playground,Dog Run,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice
32,M2P,2.0,Park,Construction & Landscaping,Convenience Store,Yoga Studio,Dog Run,Comfort Food Restaurant,Cosmetics Shop,Curling Ice,Deli / Bodega,Department Store


## Label 2: Park

In [48]:
york_merged.loc[york_merged['Cluster Labels'] == 3, york_merged.columns[[1] + list(range(5,york_merged.shape[1]))]]

Unnamed: 0,postalcode,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,M9L,3.0,Pizza Place,Gym,Dog Run,Coffee Shop,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Deli / Bodega


## Label 3: Baseball

In [49]:
york_merged.loc[york_merged['Cluster Labels'] == 4, york_merged.columns[[1] + list(range(5, york_merged.shape[1]))]]

Unnamed: 0,postalcode,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
23,M2M,4.0,Park,Yoga Studio,Dog Run,Comfort Food Restaurant,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Deli / Bodega,Department Store


## Label 4: food, Coffee shop