<h1> Coursera Capstone Notebook </h1>

In [1]:
### import libraries
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup as bs
import requests
from sklearn.cluster import KMeans

In [2]:
!pip install folium
### start requests

[33mYou are using pip version 9.0.1, however version 19.1.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [3]:
import folium

In [4]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response = requests.get(url)
### check code output
response.status_code

200

In [5]:
### convert output into soup object
soup = bs(response.text, 'html.parser')
### scan object for table class
table = soup.find('table',class_ = 'wikitable sortable' )

In [6]:
### use list comprehension to split up table text
table = [i.strip('\n') for i in table.text.split('\n') if i !='']
### turn table from a list into an array to reshape
table = np.array(table)
table

array(['Postcode', 'Borough', 'Neighbourhood', 'M1A', 'Not assigned',
       'Not assigned', 'M2A', 'Not assigned', 'Not assigned', 'M3A',
       'North York', 'Parkwoods', 'M4A', 'North York', 'Victoria Village',
       'M5A', 'Downtown Toronto', 'Harbourfront', 'M5A',
       'Downtown Toronto', 'Regent Park', 'M6A', 'North York',
       'Lawrence Heights', 'M6A', 'North York', 'Lawrence Manor', 'M7A',
       "Queen's Park", 'Not assigned', 'M8A', 'Not assigned',
       'Not assigned', 'M9A', 'Etobicoke', 'Islington Avenue', 'M1B',
       'Scarborough', 'Rouge', 'M1B', 'Scarborough', 'Malvern', 'M2B',
       'Not assigned', 'Not assigned', 'M3B', 'North York',
       'Don Mills North', 'M4B', 'East York', 'Woodbine Gardens', 'M4B',
       'East York', 'Parkview Hill', 'M5B', 'Downtown Toronto', 'Ryerson',
       'M5B', 'Downtown Toronto', 'Garden District', 'M6B', 'North York',
       'Glencairn', 'M7B', 'Not assigned', 'Not assigned', 'M8B',
       'Not assigned', 'Not assigned', 'M9

In [7]:
### currnetly table is one dimenson
table.shape

(867,)

In [8]:
### input reshaped table into dataframe
df = pd.DataFrame(table.reshape(289,3),columns=['PostCode','Borough','Neighborhood'])
df.drop(df.index[0],inplace=True)

In [9]:
df.reset_index(inplace=True)

In [10]:
df.drop('index',axis=1, inplace=True)

In [11]:
### Check DataFrame
df.head(10)

Unnamed: 0,PostCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


In [12]:
df.columns

Index(['PostCode', 'Borough', 'Neighborhood'], dtype='object')

In [13]:
### Boolean index dataframe to return our criteria:: Only PostCodes with Boroughs
df = df.loc[((df['Borough'] != 'Not assigned')&(df['Neighborhood'] == 'Not assigned'))|
            ((df['Borough'] != 'Not assigned'))]

In [14]:
df.head(10)

Unnamed: 0,PostCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [15]:
### Check for Not assigned Neighborhood.
df.loc[df['Neighborhood'] == 'Not assigned']

Unnamed: 0,PostCode,Borough,Neighborhood
8,M7A,Queen's Park,Not assigned


In [16]:
### If Neighbhood is Not Assigned then set to Borough
df.loc[df['Neighborhood'] == 'Not assigned','Neighborhood'] = df['Borough']
df.head(10)
        

Unnamed: 0,PostCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [17]:
### exercise groupby method to aggregae by PostCode
df2 = df.groupby('PostCode').apply(lambda x: x+str(', ')).groupby('PostCode').sum()
df2.head()

Unnamed: 0_level_0,Borough,Neighborhood
PostCode,Unnamed: 1_level_1,Unnamed: 2_level_1
"M1B,","Scarborough, Scarborough,","Rouge, Malvern,"
"M1C,","Scarborough, Scarborough, Scarborough,","Highland Creek, Rouge Hill, Port Union,"
"M1E,","Scarborough, Scarborough, Scarborough,","Guildwood, Morningside, West Hill,"
"M1G,","Scarborough,","Woburn,"
"M1H,","Scarborough,","Cedarbrae,"


In [18]:
### Clean up Fields
def splitter(x):
    return x.split(',')[0]

def stripper(x):
    return x.strip(', ')

df2['Borough'] = df2['Borough'].apply(splitter)
df2['Neighborhood'] = df2['Neighborhood'].apply(stripper)
df2.reset_index(inplace=True)
df2['PostCode'] = df2['PostCode'].apply(stripper)

In [19]:
### Now lets check our DataFrame
df2

Unnamed: 0,PostCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [20]:
### Assignment request for DataFrame shape.
df2.shape

(103, 3)

In [21]:
### attempt to scrape lat and long data from geocoder
!pip install geocoder

[33mYou are using pip version 9.0.1, however version 19.1.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [22]:
import geocoder
lats = []
for i in df.PostCode[:3]:
    lats.append(geocoder.google(i+'Toronto, Ontario'))

In [23]:
### at the time of the assignment, Google returned an empty list
lats

[<[REQUEST_DENIED] Google - Geocode [empty]>,
 <[REQUEST_DENIED] Google - Geocode [empty]>,
 <[REQUEST_DENIED] Google - Geocode [empty]>]

In [24]:
### Using the provided csv file.
df3 = pd.read_csv('Geospatial_Coordinates.csv')

In [25]:
df.shape

(211, 3)

In [26]:
df3.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [27]:
### merge dataframes by PostCode/Postal Code, how is an implicit 'inner'
df4 = pd.merge(df2,df3, left_on='PostCode',
                        right_on='Postal Code')

df4

Unnamed: 0,PostCode,Borough,Neighborhood,Postal Code,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",M1B,43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",M1C,43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",M1E,43.763573,-79.188711
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,M1J,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",M1K,43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",M1L,43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",M1M,43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",M1N,43.692657,-79.264848


In [28]:
### quick boolean reconsile check to see if all the Postal Codes Match
[i for i in df4['PostCode'] == df4['Postal Code'] if i == False]

[]

In [29]:
### Final DataFrame for Assignemnt
df4[['Postal Code','Borough','Neighborhood','Latitude','Longitude']]

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [30]:
### Repeat Analysis from New York Assignment on Toronto Neighborhoods.

In [31]:
##!conda install -c conda-forge geopy --yes

In [32]:
from geopy.geocoders import Nominatim

In [33]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent='my-application')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geographical coordinate of Toronto is {} {}'.format(longitude,latitude))

The geographical coordinate of Toronto is -79.387207 43.653963


In [34]:
### Map of Toronto Neighborhoods
map_toronto = folium.Map(location=[latitude, longitude],zoom_start=10)

for lat,lng,label in zip(df4['Latitude'],df4['Longitude'],df4['Neighborhood']):
    label = folium.Popup(label,parse_html=True)
    folium.CircleMarker([lat,lng],
                       radius=5,
                       popup=label,
                       color='blue',
                       fill=True,
                       fill_color='#3186cc',
                       fill_opacity=0.7,
                       parse_html=False).add_to(map_toronto)
    
map_toronto

In [35]:
### Folium Variables for url request; Extended radius to 750.
CLIENT_ID = 'XQR4HET2P2HOKVY2DUHNJ1SVSTEZAZRBUCTOWCZIDFWT2QBZ'
CLIENT_SECRET = 'VD4NUAAJYD041TPQTW3CQRZAZRMTGAAG422GJGKOTRJQFNTY'
VERSION = '20180605'
LIMIT = 200
radius = 900
search_query = 'trending'

In [36]:
### Function to build venue dataframe, added try,except block to skip empty venue results.
def getvenues(name, latitude, longitude):
    venues_list = []

    for name, lat, lng in zip(name, latitude, longitude):
                print(name)

                url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, lat, lng, VERSION, radius, LIMIT)
                try:
                    results = requests.get(url).json()['response']['groups'][0]['items']
                except:
                    print('Results Failure')
                    continue
                venues_list.append(
                    [(name,
                     lat,
                     lng,
                     v['venue']['name'],
                     v['venue']['location']['lat'],
                     v['venue']['location']['lng'],
                     v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([i for m in venues_list for i in m])
    nearby_venues.columns = ['Neighborhood','Neighborhood Latitude','Neighborhood Longitude','Venue','Venue Latitude','Venue Longitude','Venue Category']
    return(nearby_venues)

In [37]:
torontodf = getvenues(df4['Neighborhood'], df4['Latitude'],df4['Longitude'])

Rouge, Malvern
Highland Creek, Rouge Hill, Port Union
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Agincourt North, L'Amoreaux East, Milliken, Steeles East
L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
Silver Hills, York Mills
Newtonbrook, Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park, Don Mills South
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
CFB Toronto, Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Woodbine Gardens, Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West, 

In [38]:
torontodf

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern",43.806686,-79.194353,Images Salon & Spa,43.802283,-79.198565,Spa
1,"Rouge, Malvern",43.806686,-79.194353,Staples Morningside,43.800285,-79.196607,Paper / Office Supplies Store
2,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.802008,-79.198080,Fast Food Restaurant
3,"Rouge, Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
4,"Rouge, Malvern",43.806686,-79.194353,Harvey's,43.800106,-79.198258,Fast Food Restaurant
5,"Rouge, Malvern",43.806686,-79.194353,Tim Hortons,43.802000,-79.198169,Coffee Shop
6,"Rouge, Malvern",43.806686,-79.194353,Lee Valley,43.803161,-79.199681,Hobby Shop
7,"Rouge, Malvern",43.806686,-79.194353,Mr Jerk,43.801262,-79.199758,African Restaurant
8,"Rouge, Malvern",43.806686,-79.194353,FIX AUTO SCARBOROUGH EAST ROUGE,43.799775,-79.192305,Auto Workshop
9,"Rouge, Malvern",43.806686,-79.194353,Charley's Exotic Cuisine,43.800982,-79.200233,Chinese Restaurant


In [39]:
len(torontodf['Venue Category'].unique())

334

In [40]:
torontodf_onehot = pd.get_dummies(torontodf[['Venue Category']], prefix='',prefix_sep='')
torontodf_onehot['Neighborhood'] = torontodf['Neighborhood']

In [41]:
fixed_columns = [torontodf_onehot.columns[list(torontodf_onehot.columns).index('Neighborhood')]] + list(torontodf_onehot.columns[:list(torontodf_onehot.columns).index('Neighborhood'):])
torontodf_onehot = torontodf_onehot[fixed_columns]
torontodf_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,...,Mobile Phone Shop,Modern European Restaurant,Monument / Landmark,Motorcycle Shop,Movie Theater,Museum,Music School,Music Store,Music Venue,Nail Salon
0,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [42]:
toronto_grouped = torontodf_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,...,Mobile Phone Shop,Modern European Restaurant,Monument / Landmark,Motorcycle Shop,Movie Theater,Museum,Music School,Music Store,Music Venue,Nail Salon
0,"Adelaide, King, Richmond",0.00,0.00,0.000000,0.000000,0.000000,0.00,0.0,0.0,0.0,...,0.00000,0.00,0.01,0.000000,0.020000,0.00,0.00,0.000000,0.000000,0.000000
1,Agincourt,0.00,0.00,0.000000,0.000000,0.000000,0.00,0.0,0.0,0.0,...,0.00000,0.00,0.00,0.029412,0.000000,0.00,0.00,0.000000,0.000000,0.000000
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.00,0.00,0.000000,0.000000,0.000000,0.00,0.0,0.0,0.0,...,0.00000,0.00,0.00,0.000000,0.000000,0.00,0.00,0.000000,0.000000,0.000000
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.00,0.00,0.000000,0.000000,0.000000,0.00,0.0,0.0,0.0,...,0.00000,0.00,0.00,0.000000,0.000000,0.00,0.00,0.000000,0.000000,0.000000
4,"Alderwood, Long Branch",0.00,0.00,0.000000,0.000000,0.000000,0.00,0.0,0.0,0.0,...,0.00000,0.00,0.00,0.000000,0.000000,0.00,0.00,0.000000,0.000000,0.000000
5,"Bathurst Manor, Downsview North, Wilson Heights",0.00,0.00,0.000000,0.000000,0.000000,0.00,0.0,0.0,0.0,...,0.00000,0.00,0.00,0.000000,0.000000,0.00,0.00,0.000000,0.000000,0.000000
6,Bayview Village,0.00,0.00,0.000000,0.000000,0.000000,0.00,0.0,0.0,0.0,...,0.00000,0.00,0.00,0.000000,0.000000,0.00,0.00,0.000000,0.000000,0.000000
7,"Bedford Park, Lawrence Manor East",0.00,0.00,0.000000,0.000000,0.000000,0.00,0.0,0.0,0.0,...,0.00000,0.00,0.00,0.000000,0.000000,0.00,0.00,0.000000,0.000000,0.000000
8,Berczy Park,0.00,0.00,0.000000,0.000000,0.000000,0.00,0.0,0.0,0.0,...,0.00000,0.00,0.00,0.000000,0.010000,0.01,0.00,0.000000,0.000000,0.000000
9,"Birch Cliff, Cliffside West",0.00,0.00,0.000000,0.000000,0.000000,0.00,0.0,0.0,0.0,...,0.00000,0.00,0.00,0.000000,0.000000,0.00,0.00,0.000000,0.000000,0.000000


In [43]:
toronto_grouped.shape

(102, 222)

In [44]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [45]:
num_top_venues = 10

indicators = ['st','nd','rd']

columns = ['Neighborhood']
for i in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(i+1, indicators[i]))
    except:
        columns.append('{}th Most Common Venue'.format(i+1))
        
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for i in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[i,1:] = return_most_common_venues(toronto_grouped.iloc[i,:],num_top_venues)
    
    
neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Hotel,Café,American Restaurant,Cosmetics Shop,Bar,Asian Restaurant,Clothing Store,Concert Hall,Furniture / Home Store
1,Agincourt,Chinese Restaurant,Bakery,Bubble Tea Shop,Clothing Store,Malay Restaurant,Caribbean Restaurant,Mediterranean Restaurant,Hong Kong Restaurant,Breakfast Spot,American Restaurant
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Chinese Restaurant,Korean Restaurant,Hardware Store,Gym,Bakery,Malay Restaurant,Coffee Shop,Fast Food Restaurant,Event Space,Caribbean Restaurant
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Coffee Shop,Fried Chicken Joint,Fast Food Restaurant,Beer Store,Construction & Landscaping,Hardware Store,Bus Line,Clothing Store,Climbing Gym
4,"Alderwood, Long Branch",Discount Store,Donut Shop,Dance Studio,Gas Station,Convenience Store,Gym,Coffee Shop,Cajun / Creole Restaurant,Camera Store,Cocktail Bar


In [46]:
### create cluster of neighborhoods
kclusters = 4

toronto_grouped_clustering = toronto_grouped.iloc[:,1:]

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)
### cluster labels
kmeans.labels_

array([0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0,
       0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 3, 1, 0, 1, 1,
       0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1,
       0, 0, 0, 0, 0, 3, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 2, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 3, 1, 0, 1], dtype=int32)

In [47]:
### Organize and assign labels to final dataframe for folium map.
df4 = df4.sort_values(ascending=True, by='Neighborhood').reset_index().drop('index',axis=1)
df4 = df4[df4['Neighborhood'].isin(toronto_grouped['Neighborhood'])]
df4['labels'] = kmeans.labels_
df4

Unnamed: 0,PostCode,Borough,Neighborhood,Postal Code,Latitude,Longitude,labels
0,M5H,Downtown Toronto,"Adelaide, King, Richmond",M5H,43.650571,-79.384568,0
1,M1S,Scarborough,Agincourt,M1S,43.794200,-79.262029,1
2,M1V,Scarborough,"Agincourt North, L'Amoreaux East, Milliken, St...",M1V,43.815252,-79.284577,1
3,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",M9V,43.739416,-79.588437,1
4,M8W,Etobicoke,"Alderwood, Long Branch",M8W,43.602414,-79.543484,1
5,M3H,North York,"Bathurst Manor, Downsview North, Wilson Heights",M3H,43.754328,-79.442259,1
6,M2K,North York,Bayview Village,M2K,43.786947,-79.385975,1
7,M5M,North York,"Bedford Park, Lawrence Manor East",M5M,43.733283,-79.419750,1
8,M5E,Downtown Toronto,Berczy Park,M5E,43.644771,-79.373306,0
9,M1N,Scarborough,"Birch Cliff, Cliffside West",M1N,43.692657,-79.264848,0


In [48]:
### make final map of clusters
map_clusters = folium.Map(location=[latitude,longitude], zoom_start=10)

import matplotlib.cm as cm
import matplotlib.colors as colors

x = np.arange(kclusters)
ys = [i + x +(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0,1,len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []

for lat, lng, poi, cluster in zip(df4['Latitude'],df4['Longitude'],df4['Neighborhood'],df4['labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster),parse_html=True)
    folium.CircleMarker([lat,lng],radius=5,popup=label,color=rainbow[cluster-1],fill=True).add_to(map_clusters)



In [49]:
### Final Analysis of Clusterd Neighborhoods
map_clusters