In [2]:
from bs4 import BeautifulSoup
import requests
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.cm as cm
import matplotlib.colors as colors
import json 
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
from sklearn.cluster import KMeans

print('Libraries imported.')

Libraries imported.


## Scraping data from wikipedia and form a table showing Postcode, Borough and Neighborhoods of Toronto

In [4]:
url= "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
wikiPage=requests.get(url).text
soup = BeautifulSoup(wikiPage, 'lxml');
table=soup.table.find_all("td")
df = pd.DataFrame([],columns=["Postcode","Borough","Neighborhood"])

for i in np.arange(0,len(table)-3,3):
    nb= table[i+2].text[:table[i+2].text.index("\n")]
    
    df=df.append({"Postcode": table[i].text, "Borough": table[i+1].text, "Neighborhood":nb}, ignore_index=True)

df1=df.groupby(["Postcode","Borough" ], group_keys=False)['Neighborhood'].apply(", ".join).reset_index()
df1=df1.loc[df1['Borough']!="Not assigned"]
df1.head()

Unnamed: 0,Postcode,Borough,Neighborhood
1,M1B,Scarborough,"Rouge, Malvern"
2,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
3,M1E,Scarborough,"Guildwood, Morningside, West Hill"
4,M1G,Scarborough,Woburn
5,M1H,Scarborough,Cedarbrae


In [5]:
df1.shape

(103, 3)

## Download Geospatial_data and get the Latitude and Longitude. 


In [3]:
import wget
url="https://cocl.us/Geospatial_data";
geoFile=wget.download(url)
geoDf=pd.read_csv(geoFile);
geoDf.head()

  0% [                                                                                ]    0 / 2891100% [................................................................................] 2891 / 2891

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## Merge to get Latitude and Longitude for each Postal Code

In [6]:
locationDf=pd.DataFrame([], columns= ['Latitude','Longitude']);

locationDf.head()



for postal_code in df1['Postcode'].values:
    locationDf=locationDf.append(geoDf.loc[geoDf["Postal Code"]==postal_code, ['Latitude','Longitude']])


df1["Latitude"]=locationDf["Latitude"]
df1["Longitude"]=locationDf["Longitude"]

df1.head(11)

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
1,M1B,Scarborough,"Rouge, Malvern",43.784535,-79.160497
2,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.763573,-79.188711
3,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.770992,-79.216917
4,M1G,Scarborough,Woburn,43.773136,-79.239476
5,M1H,Scarborough,Cedarbrae,43.744734,-79.239476
6,M1J,Scarborough,Scarborough Village,43.727929,-79.262029
7,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.711112,-79.284577
8,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.716316,-79.239476
9,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.692657,-79.264848
10,M1N,Scarborough,"Birch Cliff, Cliffside West",43.75741,-79.273304


## Find out the rows in which Borough column contains the word of "Toronto"

In [7]:
toronto_con=[False if borough.find("Toronto")==-1 else True for borough in df1['Borough'].values  ]
df1= df1[toronto_con].reset_index()

## Find out the Latitude and Longitutde of Toronto and use it as the center of the folium map

In [8]:
from geopy.geocoders import Nominatim
import folium

address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto, Canada {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto, Canada 43.653963, -79.387207.


## Create a map of Toronto and its neighborhoods

In [9]:
# create map of Toronto using latitude and longitude values

map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
# add markers to map
for lat, lng, borough, neighborhood in zip(df1['Latitude'], df1['Longitude'], df1['Borough'], df1['Neighborhood']):
    if np.isfinite(lat):
        label = '{}, {}'.format(neighborhood, borough)
        label = folium.Popup(label, parse_html=True)
        
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color='blue',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False).add_to(map_toronto)  

    
map_toronto

#### Define Foursquare Credentials and Version

In [15]:
CLIENT_ID = 'QHBHP1GPCCVISNF2EUARCMFCWCUYUEGMUNXKDCOJV12XYAGM' # your Foursquare ID
CLIENT_SECRET = 'ONCR20YJQYUJPVXEDMBFY1EXDTOHVC5OSAJQF1HD5ADK2V2Z' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: QHBHP1GPCCVISNF2EUARCMFCWCUYUEGMUNXKDCOJV12XYAGM
CLIENT_SECRET:ONCR20YJQYUJPVXEDMBFY1EXDTOHVC5OSAJQF1HD5ADK2V2Z


#### Let's explore the first neighborhood in our dataframe.

In [16]:
df1.loc[0, 'Neighborhood']

'The Beaches'

In [17]:
neighborhood_latitude = df1.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df1.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = df1.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of The Beaches are 43.7116948, -79.41693559999999.


In [18]:


LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=QHBHP1GPCCVISNF2EUARCMFCWCUYUEGMUNXKDCOJV12XYAGM&client_secret=ONCR20YJQYUJPVXEDMBFY1EXDTOHVC5OSAJQF1HD5ADK2V2Z&v=20180605&ll=43.7116948,-79.41693559999999&radius=500&limit=100'

In [19]:
results = requests.get(url).json()


In [20]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [21]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# nearby_venues

In [22]:
# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

nearby_venues

Unnamed: 0,venue.name,venue.categories,venue.location.lat,venue.location.lng
0,Rosalind's Garden Oasis,"[{'id': '4bf58dd8d48988d15a941735', 'name': 'G...",43.712189,-79.411978


In [23]:
# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Rosalind's Garden Oasis,Garden,43.712189,-79.411978


In [24]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

1 venues were returned by Foursquare.


In [25]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        try:
            results = requests.get(url).json()["response"]['groups'][0]['items']
        except:
            pass
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

## Find out venues for all neighborhoods

In [26]:

toronto_venues = getNearbyVenues(names=df1['Neighborhood'],
                                   latitudes=df1['Latitude'],
                                   longitudes=df1['Longitude']
                                  )


The Beaches
The Danforth West, Riverdale
The Beaches West, India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park, Summerhill East
Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North, Forest Hill West
The Annex, North Midtown, Yorkville
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place, Underground city
Christie
Dovercourt Village, Dufferin
Little Portugal, Trinity
Brockton, Exhibition Place, Parkdale Village
High Park, The Junction Sout

In [27]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",42,42,42,42,42,42
Berczy Park,13,13,13,13,13,13
"Brockton, Exhibition Place, Parkdale Village",0,0,1,1,1,1
Business Reply Mail Processing Centre 969 Eastern,0,0,1,1,1,1
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",8,8,8,8,8,8
"Cabbagetown, St. James Town",55,55,55,55,55,55
Central Bay Street,37,37,37,37,37,37
Christie,0,0,1,1,1,1
Church and Wellesley,23,23,23,23,23,23
"Commerce Court, Victoria Hotel",11,11,11,11,11,11


In [28]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 193 uniques categories.


In [30]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Theater,Thrift / Vintage Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"The Danforth West, Riverdale",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [31]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Theater,Thrift / Vintage Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.02381,0.0,0.0,0.0,0.02381,0.0,0.0,0.02381,0.0,0.02381
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.018182,0.0,0.0,0.0,0.018182,0.036364,0.036364,0.0,0.0,0.018182
6,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.027027,0.0,0.0,0.0,0.0,0.027027
7,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [32]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
                   venue  freq
0            Coffee Shop  0.21
1                   Park  0.05
2  General Entertainment  0.02
3    Japanese Restaurant  0.02
4                   Café  0.02


----Berczy Park----
                         venue  freq
0                    Gift Shop  0.15
1                Movie Theater  0.08
2  Eastern European Restaurant  0.08
3                      Dog Run  0.08
4                          Bar  0.08


----Brockton, Exhibition Place, Parkdale Village----
                venue  freq
0      Baseball Field   1.0
1   Accessories Store   0.0
2       Metro Station   0.0
3  Light Rail Station   0.0
4        Liquor Store   0.0


----Business Reply Mail Processing Centre 969 Eastern----
                venue  freq
0      Baseball Field   1.0
1   Accessories Store   0.0
2       Metro Station   0.0
3  Light Rail Station   0.0
4        Liquor Store   0.0


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Ra

In [33]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [34]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Park,Yoga Studio,Distribution Center,Portuguese Restaurant,Nightclub,Music Venue,Mexican Restaurant,Juice Bar,Japanese Restaurant
1,Berczy Park,Gift Shop,Eastern European Restaurant,Cuban Restaurant,Coffee Shop,Italian Restaurant,Dog Run,Restaurant,Bar,Breakfast Spot,Dessert Shop
2,"Brockton, Exhibition Place, Parkdale Village",Baseball Field,Yoga Studio,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant
3,Business Reply Mail Processing Centre 969 Eastern,Baseball Field,Yoga Studio,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Coffee Shop,Shopping Plaza,Convenience Store,Cosmetics Shop,Café,Pizza Place,Liquor Store,Beer Store,Dessert Shop,Doner Restaurant


## Clustering the neighborhoods based on venues

In [35]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [36]:


toronto_merged = df1

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
neighborhoods_venues_sorted.set_index(['Neighborhood'], inplace=True);
neighborhoods_venues_sorted.head()




Unnamed: 0_level_0,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
"Adelaide, King, Richmond",0,Coffee Shop,Park,Yoga Studio,Distribution Center,Portuguese Restaurant,Nightclub,Music Venue,Mexican Restaurant,Juice Bar,Japanese Restaurant
Berczy Park,0,Gift Shop,Eastern European Restaurant,Cuban Restaurant,Coffee Shop,Italian Restaurant,Dog Run,Restaurant,Bar,Breakfast Spot,Dessert Shop
"Brockton, Exhibition Place, Parkdale Village",1,Baseball Field,Yoga Studio,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant
Business Reply Mail Processing Centre 969 Eastern,1,Baseball Field,Yoga Studio,Eastern European Restaurant,Donut Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",0,Coffee Shop,Shopping Plaza,Convenience Store,Cosmetics Shop,Café,Pizza Place,Liquor Store,Beer Store,Dessert Shop,Doner Restaurant


In [37]:
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted, on='Neighborhood')




In [38]:
toronto_merged=toronto_merged.dropna()

In [39]:
toronto_merged.head()

Unnamed: 0,index,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,63,M4E,East Toronto,The Beaches,43.711695,-79.416936,3.0,Garden,Yoga Studio,Cupcake Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop
1,67,M4K,East Toronto,"The Danforth West, Riverdale",43.653206,-79.400049,0.0,Bar,Vietnamese Restaurant,Café,Bakery,Coffee Shop,Chinese Restaurant,Vegetarian / Vegan Restaurant,Mexican Restaurant,Dessert Shop,Dumpling Restaurant
2,68,M4L,East Toronto,"The Beaches West, India Bazaar",43.628947,-79.39442,0.0,Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Coffee Shop,Boutique,Boat or Ferry,Bar,Harbor / Marina,Rental Car Location
3,69,M4M,East Toronto,Studio District,43.646435,-79.374846,0.0,Coffee Shop,Café,Restaurant,Beer Bar,Seafood Restaurant,Hotel,Cocktail Bar,Japanese Restaurant,Lounge,Farmers Market
4,70,M4N,Central Toronto,Lawrence Park,43.648429,-79.38228,0.0,Coffee Shop,Café,Restaurant,Asian Restaurant,American Restaurant,Gastropub,Seafood Restaurant,Japanese Restaurant,Hotel,Gym


In [40]:
toronto_merged=toronto_merged[np.isfinite(toronto_merged['Cluster Labels'])]
toronto_merged['Cluster Labels']=toronto_merged['Cluster Labels'].astype('int')

In [41]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [42]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]
toronto_merged.head()

Unnamed: 0,Postcode,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,M4K,-79.400049,0,Bar,Vietnamese Restaurant,Café,Bakery,Coffee Shop,Chinese Restaurant,Vegetarian / Vegan Restaurant,Mexican Restaurant,Dessert Shop,Dumpling Restaurant
2,M4L,-79.39442,0,Airport Service,Airport Lounge,Airport Terminal,Sculpture Garden,Coffee Shop,Boutique,Boat or Ferry,Bar,Harbor / Marina,Rental Car Location
3,M4M,-79.374846,0,Coffee Shop,Café,Restaurant,Beer Bar,Seafood Restaurant,Hotel,Cocktail Bar,Japanese Restaurant,Lounge,Farmers Market
4,M4N,-79.38228,0,Coffee Shop,Café,Restaurant,Asian Restaurant,American Restaurant,Gastropub,Seafood Restaurant,Japanese Restaurant,Hotel,Gym
5,M4P,-79.464763,0,Clothing Store,Furniture / Home Store,Boutique,Women's Store,Gift Shop,Event Space,Miscellaneous Shop,Coffee Shop,Accessories Store,Vietnamese Restaurant
6,M4R,-79.445073,0,Japanese Restaurant,Pizza Place,Metro Station,Pub,Cupcake Shop,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant
7,M4S,-79.428191,0,Hockey Arena,Trail,Field,Cupcake Shop,Doner Restaurant,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant
9,M4V,-79.422564,0,Grocery Store,Café,Park,Gas Station,Athletics & Sports,Nightclub,Candy Store,Diner,Restaurant,Coffee Shop
10,M4W,-79.442259,0,Bakery,Pharmacy,Supermarket,Bar,Café,Gym / Fitness Center,Park,Grocery Store,Pool,Portuguese Restaurant
11,M4X,-79.41975,0,Bar,Coffee Shop,Asian Restaurant,Restaurant,Pizza Place,Wine Bar,Vietnamese Restaurant,Café,Men's Store,Bakery
