# Capstone Project - The Battle of the Neighborhoods -
## Finding a Better Place in North York, Toronto
### Applied Data Science Capstone by IBM/Coursera

In [1]:
# import required library
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

In [2]:
# assign the data source for beautifulsoup Data scrap
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source, 'html5lib')

In [3]:
postal_codes_dict = {} # initialize an empty dictionary to save the data in

In [4]:
for table_cell in soup.find_all('td'):
    try:
        postal_code = table_cell.p.b.text # get the postal code
        postal_code_investigate = table_cell.span.text
        neighborhoods_data = table_cell.span.text # get the rest of the data in the cell
        borough = neighborhoods_data.split('(')[0] # get the borough in the cell
        
        # if the cell is not assigned then ignore it
        if neighborhoods_data == 'Not assigned':
            neighborhoods = []
        # else process the data and add it to the dictionary
        else:
            postal_codes_dict[postal_code] = {}
            
            try:
                neighborhoods = neighborhoods_data.split('(')[1]
            
                # remove parantheses from neighborhoods string
                neighborhoods = neighborhoods.replace('(', ' ')
                neighborhoods = neighborhoods.replace(')', ' ')

                neighborhoods_names = neighborhoods.split('/')
                neighborhoods_clean = ', '.join([name.strip() for name in neighborhoods_names])
            except:
                borough = borough.strip('\n')
                neighborhoods_clean = borough
 
            # add borough and neighborhood to dictionary
            postal_codes_dict[postal_code]['borough'] = borough
            postal_codes_dict[postal_code]['neighborhoods'] = neighborhoods_clean
    except:
        pass

In [5]:
# create an empty dataframe
columns = ['Postal Code', 'Borough', 'Neighborhood']
toronto_data = pd.DataFrame(columns=columns)
toronto_data

Unnamed: 0,Postal Code,Borough,Neighborhood


In [6]:
# populate dataframe with data from dictionary
for ind, postal_code in enumerate(postal_codes_dict):
    borough = postal_codes_dict[postal_code]['borough']
    neighborhood = postal_codes_dict[postal_code]['neighborhoods']
    toronto_data = toronto_data.append({"Postal Code": postal_code, 
                                        "Borough": borough, 
                                        "Neighborhood": neighborhood},
                                        ignore_index=True)

In [7]:
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


In [8]:
toronto_data.describe()

Unnamed: 0,Postal Code,Borough,Neighborhood
count,103,103,103
unique,103,15,103
top,M6M,North York,"Alderwood, Long Branch"
freq,1,24,1


In [9]:
#Installing geocoder
!pip install geocoder



In [10]:
import geocoder # import geocoder

In [11]:
data = pd.read_csv("https://cocl.us/Geospatial_data")
data

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [12]:
# add latitude and longitude in the data 
combined_data = toronto_data.join(data.set_index('Postal Code'), on='Postal Code', how='inner')
northyork_data=combined_data[combined_data['Borough'].str.contains("North York")]
northyork_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
7,M3B,North York,Don Mills North,43.745906,-79.352188
10,M6B,North York,Glencairn,43.709577,-79.445073


In [13]:
!pip install geopy
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="geoapiExercises")
address = 'North York,Toronto'

location = geolocator.geocode(address)
latitude_x = location.latitude
longitude_y = location.longitude
print('The Geograpical Co-ordinate of Seattle,Washington are {}, {}.'.format(latitude_x, longitude_y))

The Geograpical Co-ordinate of Seattle,Washington are 43.7543263, -79.44911696639593.


In [14]:
!pip install folium
import folium

map_North_York = folium.Map(location=[latitude_x, longitude_y], zoom_start=10)
map_North_York



In [15]:
for lat, lng, nei in zip(northyork_data['Latitude'], northyork_data['Longitude'], northyork_data['Neighborhood']):
    
    label = '{}'.format(nei)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_North_York)  
    
map_North_York

In [16]:
map_North_York.save('noryork.html')

In [17]:
# setup Foursquare credentails:
CLIENT_ID = 'TMQ01RI1Q5OEA3Q4FXLGW0XKVW2GJSBWPN34RMA4YGEABFBQ' # my Foursquare ID
CLIENT_SECRET = '2E143V54OKLXVZGBGHP02EBCMS4KF50PES4FAC0AQ5K5AMFG' # my Foursquare Secret
VERSION = '20180604'
LIMIT = 100

In [18]:
radius = 500 
LIMIT = 100
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude_x, 
   longitude_y, 
    radius, 
   LIMIT)
results = requests.get(url).json()

In [19]:
venues=results['response']['groups'][0]['items']
nearby_venues = pd.json_normalize(venues)
nearby_venues

Unnamed: 0,referralId,reasons.count,reasons.items,venue.id,venue.name,venue.location.address,venue.location.crossStreet,venue.location.lat,venue.location.lng,venue.location.labeledLatLngs,venue.location.distance,venue.location.postalCode,venue.location.cc,venue.location.city,venue.location.state,venue.location.country,venue.location.formattedAddress,venue.categories,venue.photos.count,venue.photos.groups
0,e-0-5011603fe4b07c3cf1967fba-0,0,"[{'summary': 'This spot is popular', 'type': '...",5011603fe4b07c3cf1967fba,Tim Hortons,680 Sheppard Ave West,Bryant,43.754767,-79.44325,"[{'label': 'display', 'lat': 43.75476686520758...",474,M3H 2S5,CA,North York,ON,Canada,"[680 Sheppard Ave West (Bryant), North York ON...","[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",0,[]
1,e-0-4bc3acbdabf495210a22c493-1,0,"[{'summary': 'This spot is popular', 'type': '...",4bc3acbdabf495210a22c493,Orly Restaurant & Grill,660 Sheppard Avenue West,Bathurst,43.754493,-79.443507,"[{'label': 'display', 'lat': 43.75449324600633...",451,,CA,Toronto,ON,Canada,"[660 Sheppard Avenue West (Bathurst), Toronto ...","[{'id': '4bf58dd8d48988d115941735', 'name': 'M...",0,[]
2,e-0-50f9bbcc5d24acebc25936af-2,0,"[{'summary': 'This spot is popular', 'type': '...",50f9bbcc5d24acebc25936af,Domino's Pizza,820 Sheppard Ave W,,43.753127,-79.450926,"[{'label': 'display', 'lat': 43.75312660212406...",197,M3H 2T1,CA,Toronto,ON,Canada,"[820 Sheppard Ave W, Toronto ON M3H 2T1, Canada]","[{'id': '4bf58dd8d48988d1ca941735', 'name': 'P...",0,[]
3,e-0-5ad39ffb2aff31705da3c70e-3,0,"[{'summary': 'This spot is popular', 'type': '...",5ad39ffb2aff31705da3c70e,lori@itsyourtreasure.com,216 Reiner Rd,,43.750265,-79.447868,"[{'label': 'display', 'lat': 43.75026520087931...",463,M3H 2M1,CA,Toronto,ON,Canada,"[216 Reiner Rd, Toronto ON M3H 2M1, Canada]","[{'id': '4bf58dd8d48988d108951735', 'name': 'W...",0,[]


## Nearby Venues/Locations

In [20]:
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]
nearby_venues.head()

Unnamed: 0,venue.name,venue.categories,venue.location.lat,venue.location.lng
0,Tim Hortons,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",43.754767,-79.44325
1,Orly Restaurant & Grill,"[{'id': '4bf58dd8d48988d115941735', 'name': 'M...",43.754493,-79.443507
2,Domino's Pizza,"[{'id': '4bf58dd8d48988d1ca941735', 'name': 'P...",43.753127,-79.450926
3,lori@itsyourtreasure.com,"[{'id': '4bf58dd8d48988d108951735', 'name': 'W...",43.750265,-79.447868


## Categories of Nearby Venues/Locations

In [21]:
# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Tim Hortons,"[{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...",43.754767,-79.44325
1,Orly Restaurant & Grill,"[{'id': '4bf58dd8d48988d115941735', 'name': 'M...",43.754493,-79.443507
2,Domino's Pizza,"[{'id': '4bf58dd8d48988d1ca941735', 'name': 'P...",43.753127,-79.450926
3,lori@itsyourtreasure.com,"[{'id': '4bf58dd8d48988d108951735', 'name': 'W...",43.750265,-79.447868


In [22]:
# Top 10 Categories
a=pd.Series(nearby_venues.categories)
a.head()

0    [{'id': '4bf58dd8d48988d1e0931735', 'name': 'C...
1    [{'id': '4bf58dd8d48988d115941735', 'name': 'M...
2    [{'id': '4bf58dd8d48988d1ca941735', 'name': 'P...
3    [{'id': '4bf58dd8d48988d108951735', 'name': 'W...
Name: categories, dtype: object

In [23]:

def getNearbyVenues(names, latitudes, longitudes, radius=700):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # making GET request
        venue_results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in venue_results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [25]:
# Nearby Venues
Northyork_venues = getNearbyVenues(names=combined_data['Neighborhood'],
                                   latitudes=combined_data['Latitude'],
                                   longitudes=combined_data['Longitude']
                                  )

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Ontario Provincial Government
Islington Avenue
Malvern, Rouge
Don Mills North
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills South
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
The Danforth  East
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmount Park
Bayview Village
Downsview East
The Danforth

In [26]:
print('There are {} Uniques Categories.'.format(len(Northyork_venues['Venue Category'].unique())))
Northyork_venues.groupby('Neighborhood').count().head()

There are 311 Uniques Categories.


Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,8,8,8,8,8,8
"Alderwood, Long Branch",11,11,11,11,11,11
"Bathurst Manor, Wilson Heights, Downsview North",18,18,18,18,18,18
Bayview Village,9,9,9,9,9,9
"Bedford Park, Lawrence Manor East",32,32,32,32,32,32


## One Hot Encoding of Features

In [27]:
# one hot encoding
Northyork_onehot = pd.get_dummies(Northyork_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Northyork_onehot['Neighborhood'] = Northyork_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Northyork_onehot.columns[-1]] + list(Northyork_onehot.columns[:-1])
Northyork_onehot = Northyork_onehot[fixed_columns]
Northyork_grouped = Northyork_onehot.groupby('Neighborhood').mean().reset_index()
Northyork_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [28]:
num_top_venues = 5
for hood in Northyork_grouped['Neighborhood']:
    print("---- "+hood+" ----")
    temp =Northyork_grouped[Northyork_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

---- Agincourt ----
                       venue  freq
0        Shanghai Restaurant  0.12
1                     Lounge  0.12
2  Latin American Restaurant  0.12
3                  Pool Hall  0.12
4            Badminton Court  0.12


---- Alderwood, Long Branch ----
               venue  freq
0        Pizza Place  0.18
1  Convenience Store  0.18
2           Pharmacy  0.09
3     Sandwich Place  0.09
4       Dance Studio  0.09


---- Bathurst Manor, Wilson Heights, Downsview North ----
              venue  freq
0              Bank  0.11
1       Coffee Shop  0.11
2              Park  0.11
3  Community Center  0.06
4       Bridal Shop  0.06


---- Bayview Village ----
                 venue  freq
0                 Bank  0.22
1                 Café  0.11
2         Skating Rink  0.11
3  Japanese Restaurant  0.11
4         Intersection  0.11


---- Bedford Park, Lawrence Manor East ----
                venue  freq
0         Coffee Shop  0.09
1  Italian Restaurant  0.06
2          Restaurant  0.

            venue  freq
0     Coffee Shop  0.11
1   Boat or Ferry  0.05
2           Hotel  0.04
3  Sandwich Place  0.04
4            Café  0.03


---- High Park, The Junction South ----
           venue  freq
0            Bar  0.08
1           Café  0.08
2         Bakery  0.06
3           Park  0.04
4  Grocery Store  0.04


---- Hillcrest Village ----
                  venue  freq
0              Pharmacy  0.25
1  Fast Food Restaurant  0.12
2        Sandwich Place  0.12
3                Bakery  0.12
4            Restaurant  0.12


---- Humber Summit ----
                 venue  freq
0               Bakery  0.50
1          Pizza Place  0.25
2  Arts & Crafts Store  0.25
3          Yoga Studio  0.00
4        Moving Target  0.00


---- Humberlea, Emery ----
               venue  freq
0         Shoe Store   0.2
1     Baseball Field   0.2
2     Discount Store   0.2
3        Gas Station   0.2
4  Convenience Store   0.2


---- Humewood-Cedarvale ----
            venue  freq
0    Hockey Arena   

---- Steeles West, L'Amoreaux West ----
                  venue  freq
0  Fast Food Restaurant  0.14
1           Auto Garage  0.07
2           Pizza Place  0.07
3                  Bank  0.07
4              Pharmacy  0.07


---- Studio District ----
            venue  freq
0     Coffee Shop  0.09
1  Sandwich Place  0.05
2            Café  0.05
3          Bakery  0.05
4             Bar  0.05


---- Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park ----
                venue  freq
0         Coffee Shop  0.12
1  Italian Restaurant  0.06
2                Café  0.06
3         Pizza Place  0.04
4                Bank  0.04


---- The Annex, North Midtown, Yorkville ----
            venue  freq
0     Pizza Place  0.05
1  Sandwich Place  0.05
2     Coffee Shop  0.05
3            Café  0.05
4        Pharmacy  0.04


---- The Beaches ----
            venue  freq
0             Pub  0.10
1            Café  0.08
2  Sandwich Place  0.05
3       Gastropub  0.05
4  Breakfast Spot  0.05




In [29]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

## Most Common venues near neighborhood

In [30]:
import numpy as np
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Northyork_grouped['Neighborhood']

for ind in np.arange(Northyork_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Northyork_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Sandwich Place,Pool Hall,Shanghai Restaurant,Badminton Court,Latin American Restaurant,Breakfast Spot,Lounge,Convenience Store,Dry Cleaner,Dumpling Restaurant
1,"Alderwood, Long Branch",Pizza Place,Convenience Store,Pub,Dance Studio,Sandwich Place,Gas Station,Coffee Shop,Pharmacy,Pool,College Stadium
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Park,Bank,Ice Cream Shop,Community Center,Sandwich Place,Diner,Shopping Mall,Bridal Shop,Pharmacy
3,Bayview Village,Bank,Japanese Restaurant,Grocery Store,Intersection,Café,Skating Rink,Chinese Restaurant,Playground,Dry Cleaner,Dumpling Restaurant
4,"Bedford Park, Lawrence Manor East",Coffee Shop,Restaurant,Sandwich Place,Italian Restaurant,Women's Store,Pub,Cupcake Shop,Frozen Yogurt Shop,Thai Restaurant,Indian Restaurant
...,...,...,...,...,...,...,...,...,...,...,...
97,"Willowdale, Newtonbrook",Park,Coffee Shop,Trail,Donut Shop,Diner,Discount Store,Distribution Center,Dive Bar,Dog Run,Doner Restaurant
98,Woburn,Coffee Shop,Park,Business Service,College Stadium,College Theater,Diner,Discount Store,Distribution Center,Dive Bar,Dog Run
99,Woodbine Heights,Skating Rink,Athletics & Sports,Beer Store,Bus Line,Video Store,Park,Curling Ice,Donut Shop,Discount Store,Distribution Center
100,York Mills West,Park,Pet Store,Intersection,Gym,Bowling Alley,Tennis Court,Convenience Store,Women's Store,Doner Restaurant,Diner


In [31]:
storeddata=pd.ExcelWriter('storeddata.xlsx')
neighborhoods_venues_sorted.to_excel(storeddata)
storeddata.save()

In [32]:
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()  # for plot styling
from matplotlib import style

In [33]:
# Using K-Means to cluster neighborhood into 4 clusters
kclusters = 4
from sklearn.cluster import KMeans
Northyork_grouped_clustering = Northyork_grouped.drop('Neighborhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0)
kmeans.fit(Northyork_grouped_clustering)
# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [34]:
# add clustering labels

neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)


In [35]:
northyork_merged = northyork_data

northyork_merged
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
northyork_merged = northyork_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
northyork_merged["Cluster Labels"].astype(int)
northyork_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,0,Fast Food Restaurant,Pet Store,Park,Food & Drink Shop,Women's Store,Donut Shop,Diner,Discount Store,Distribution Center,Dive Bar
1,M4A,North York,Victoria Village,43.725882,-79.315572,1,Pizza Place,Hockey Arena,Sporting Goods Shop,Park,Portuguese Restaurant,Coffee Shop,Playground,College Stadium,Donut Shop,Diner
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,1,Clothing Store,Coffee Shop,Vietnamese Restaurant,Fast Food Restaurant,Paper / Office Supplies Store,Boutique,Bowling Alley,Seafood Restaurant,Café,Park
7,M3B,North York,Don Mills North,43.745906,-79.352188,1,Japanese Restaurant,Café,Gym,Paper / Office Supplies Store,Caribbean Restaurant,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center
10,M6B,North York,Glencairn,43.709577,-79.445073,1,Pizza Place,Gas Station,Metro Station,Coffee Shop,Fish Market,Latin American Restaurant,Sandwich Place,Restaurant,Ice Cream Shop,Italian Restaurant


In [49]:
mergeddata=pd.ExcelWriter('merged.xlsx')
northyork_merged.to_excel(mergeddata)
mergeddata.save()

In [36]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude_x, longitude_y], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(northyork_merged['Latitude'], northyork_merged['Longitude'], northyork_merged['Neighborhood'], northyork_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [37]:
map_clusters.save("clustermap.html")

### cluster1 

In [38]:
northyork_merged.loc[northyork_merged['Cluster Labels'] == 0, northyork_merged.columns[[2] + list(range(6, northyork_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Parkwoods,Fast Food Restaurant,Pet Store,Park,Food & Drink Shop,Women's Store,Donut Shop,Diner,Discount Store,Distribution Center,Dive Bar
52,"Willowdale, Newtonbrook",Park,Coffee Shop,Trail,Donut Shop,Diner,Discount Store,Distribution Center,Dive Bar,Dog Run,Doner Restaurant
66,York Mills West,Park,Pet Store,Intersection,Gym,Bowling Alley,Tennis Court,Convenience Store,Women's Store,Doner Restaurant,Diner


In [39]:
cluster1=northyork_merged.loc[northyork_merged['Cluster Labels'] == 0, northyork_merged.columns[[2] + list(range(6, northyork_merged.shape[1]))]]
cluster1data=pd.ExcelWriter('cluster1.xlsx')
cluster1.to_excel(cluster1data)
cluster1data.save()

### cluster2

In [40]:
northyork_merged.loc[northyork_merged['Cluster Labels'] == 1, northyork_merged.columns[[2] + list(range(6, northyork_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Victoria Village,Pizza Place,Hockey Arena,Sporting Goods Shop,Park,Portuguese Restaurant,Coffee Shop,Playground,College Stadium,Donut Shop,Diner
3,"Lawrence Manor, Lawrence Heights",Clothing Store,Coffee Shop,Vietnamese Restaurant,Fast Food Restaurant,Paper / Office Supplies Store,Boutique,Bowling Alley,Seafood Restaurant,Café,Park
7,Don Mills North,Japanese Restaurant,Café,Gym,Paper / Office Supplies Store,Caribbean Restaurant,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Distribution Center
10,Glencairn,Pizza Place,Gas Station,Metro Station,Coffee Shop,Fish Market,Latin American Restaurant,Sandwich Place,Restaurant,Ice Cream Shop,Italian Restaurant
13,Don Mills South,Gym,Restaurant,Intersection,Sporting Goods Shop,Supermarket,Italian Restaurant,Sandwich Place,Asian Restaurant,Art Gallery,Dim Sum Restaurant
27,Hillcrest Village,Pharmacy,Sandwich Place,Fast Food Restaurant,Chinese Restaurant,Bakery,Tennis Court,Restaurant,Dog Run,Dim Sum Restaurant,Diner
28,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Park,Bank,Ice Cream Shop,Community Center,Sandwich Place,Diner,Shopping Mall,Bridal Shop,Pharmacy
33,"Fairview, Henry Farm, Oriole",Clothing Store,Coffee Shop,Fast Food Restaurant,Restaurant,Bakery,Japanese Restaurant,Bank,Baseball Field,Jewelry Store,Park
34,"Northwood Park, York University",Coffee Shop,Japanese Restaurant,Caribbean Restaurant,Massage Studio,Bar,Furniture / Home Store,Fast Food Restaurant,Vietnamese Restaurant,Department Store,Dessert Shop
39,Bayview Village,Bank,Japanese Restaurant,Grocery Store,Intersection,Café,Skating Rink,Chinese Restaurant,Playground,Dry Cleaner,Dumpling Restaurant


In [41]:
cluster2=northyork_merged.loc[northyork_merged['Cluster Labels'] == 1, northyork_merged.columns[[2] + list(range(6, northyork_merged.shape[1]))]]
cluster2data=pd.ExcelWriter('cluster2.xlsx')
cluster2.to_excel(cluster2data)
cluster2data.save()

### cluster3

In [42]:
northyork_merged.loc[northyork_merged['Cluster Labels'] == 2, northyork_merged.columns[[2] + list(range(6, northyork_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


In [43]:
cluster3=northyork_merged.loc[northyork_merged['Cluster Labels'] == 2, northyork_merged.columns[[2] + list(range(6, northyork_merged.shape[1]))]]
cluster3data=pd.ExcelWriter('cluster3.xlsx')
cluster3.to_excel(cluster2data)
cluster3data.save()

### cluster4 

In [44]:
northyork_merged.loc[northyork_merged['Cluster Labels'] == 3, northyork_merged.columns[[2] + list(range(6, northyork_merged.shape[1]))]]

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
45,"York Mills, Silver Hills",Cafeteria,Women's Store,Dessert Shop,Diner,Discount Store,Distribution Center,Dive Bar,Dog Run,Doner Restaurant,Donut Shop


In [46]:
cluster4=northyork_merged.loc[northyork_merged['Cluster Labels'] == 3, northyork_merged.columns[[2] + list(range(6, northyork_merged.shape[1]))]]
cluster4data=pd.ExcelWriter('cluster4.xlsx')
cluster4.to_excel(cluster4data)
cluster4data.save()