# Segmenting and Clustering Neighborhoods in Toronto - week 3 - Peer-graded

In [1]:
import pandas as pd
import numpy as np

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [3]:
data = pd.read_html(url,header=0) #Get the table and transform in pandas dataframe

In [4]:
df = data[0] #The position 0 contain the data of post code

In [5]:
df.head(15)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
8,M8A,Not assigned,Not assigned
9,M9A,Queen's Park,Not assigned


In [6]:
df = df[df.Borough != 'Not assigned'] #Exclude Borough that has the value "Not assigned"

In [7]:
df.head(15)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
9,M9A,Queen's Park,Not assigned
10,M1B,Scarborough,Rouge
11,M1B,Scarborough,Malvern
13,M3B,North York,Don Mills North


In [8]:
df[df.Neighbourhood == 'Not assigned'].size #Number of Neighbourhood == 'Not assigned'

3

In [9]:
df.loc[df.Neighbourhood == 'Not assigned', 'Neighbourhood'] = df['Borough']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [10]:
df.head(15)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
9,M9A,Queen's Park,Queen's Park
10,M1B,Scarborough,Rouge
11,M1B,Scarborough,Malvern
13,M3B,North York,Don Mills North


In [11]:
postcodes = df.Postcode.unique()

In [12]:
postcodes

array(['M3A', 'M4A', 'M5A', 'M6A', 'M7A', 'M9A', 'M1B', 'M3B', 'M4B',
       'M5B', 'M6B', 'M9B', 'M1C', 'M3C', 'M4C', 'M5C', 'M6C', 'M9C',
       'M1E', 'M4E', 'M5E', 'M6E', 'M1G', 'M4G', 'M5G', 'M6G', 'M1H',
       'M2H', 'M3H', 'M4H', 'M5H', 'M6H', 'M1J', 'M2J', 'M3J', 'M4J',
       'M5J', 'M6J', 'M1K', 'M2K', 'M3K', 'M4K', 'M5K', 'M6K', 'M1L',
       'M2L', 'M3L', 'M4L', 'M5L', 'M6L', 'M9L', 'M1M', 'M2M', 'M3M',
       'M4M', 'M5M', 'M6M', 'M9M', 'M1N', 'M2N', 'M3N', 'M4N', 'M5N',
       'M6N', 'M9N', 'M1P', 'M2P', 'M4P', 'M5P', 'M6P', 'M9P', 'M1R',
       'M2R', 'M4R', 'M5R', 'M6R', 'M7R', 'M9R', 'M1S', 'M4S', 'M5S',
       'M6S', 'M1T', 'M4T', 'M5T', 'M1V', 'M4V', 'M5V', 'M8V', 'M9V',
       'M1W', 'M4W', 'M5W', 'M8W', 'M9W', 'M1X', 'M4X', 'M5X', 'M8X',
       'M4Y', 'M7Y', 'M8Y', 'M8Z'], dtype=object)

In [13]:
neighs = []
boroughs = []
for pc in postcodes:
    neigh = ', '.join(np.array(df['Neighbourhood'][df.Postcode == pc]))
    boroughs.append(np.array(df['Borough'][df.Postcode == pc])[0])
    neighs.append(neigh)

New data frame wih the Neighbourhood combined:

In [14]:
df_new = pd.DataFrame({'Postalcode':postcodes, 'Borough':boroughs, 'Neighbourhood':neighs}) #New data frame wih the Neighbourhood combined

In [15]:
df_new.head(10)

Unnamed: 0,Postalcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park
5,M9A,Queen's Park,Queen's Park
6,M1B,Scarborough,"Rouge, Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


In [16]:
df_new.shape

(103, 3)

### Coordinates:

In [17]:
data = df_new.copy()
data.head(5)

Unnamed: 0,Postalcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park


In [18]:
url_coord = 'http://cocl.us/Geospatial_data'

In [19]:
df_coord = pd.read_csv(url_coord)

In [20]:
df_coord.head(5)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [21]:
data = data.join(df_coord.set_index('Postal Code'), on='Postalcode')

In [22]:
data

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.654260,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
5,M9A,Queen's Park,Queen's Park,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


In [23]:
msk = []
for i in data.Borough:
    msk.append('Toronto' in i)

In [24]:
toronto_data = data[msk]  #Selection only Toronto Borough
toronto_data.head()

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [76]:
toronto_data

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
4,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
25,M6G,Downtown Toronto,Christie,43.669542,-79.422564
30,M5H,Downtown Toronto,"Adelaide, King, Richmond",43.650571,-79.384568
31,M6H,West Toronto,"Dovercourt Village, Dufferin",43.669005,-79.442259


In [79]:
toronto_data.shape

(39, 5)

### Woring with Toronto Data

In [26]:
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner


# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    folium-0.5.0               |             py_0          45 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    altair-4.0.1               |             py_0         575 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.0 MB

The following NEW packages will be 

In [27]:
CLIENT_ID = 'NZK3QRRG3IUTPSX4SVZNE02Z22VDSFRCJNWY4X3PLMSU3N5Y' # your Foursquare ID
CLIENT_SECRET = 'NCPPPWELYYZLI0FPHRMNVUIQQH1SZDZ0N5JZ4F5NGKYQYQBL' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: NZK3QRRG3IUTPSX4SVZNE02Z22VDSFRCJNWY4X3PLMSU3N5Y
CLIENT_SECRET:NCPPPWELYYZLI0FPHRMNVUIQQH1SZDZ0N5JZ4F5NGKYQYQBL


In [28]:
toronto_loc = {'latitude': 43.6532,'longitude': -79.3832}
latitude = toronto_loc['latitude']
longitude = toronto_loc['longitude']
print(latitude, longitude)

43.6532 -79.3832


In [29]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=12, height='60%', width='50%') # generate map centred around Toronto City and in blue are each postcode center


# add Toronto Center as a red circle mark
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    popup='Toronto',
    fill=True,
    color='red',
    fill_color='red',
    fill_opacity=0.6
    ).add_to(venues_map)

# add Boroughs
for lat, lng, label in zip(toronto_data.Latitude, toronto_data.Longitude, toronto_data.Neighbourhood):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        #popup=label,
        fill=True,
        color='blue',
        fill_color='blue',
        fill_opacity=0.6
        ).add_to(venues_map)

    

# display map
venues_map

In [30]:
radius = 50000
LIMIT = 100
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?client_id=NZK3QRRG3IUTPSX4SVZNE02Z22VDSFRCJNWY4X3PLMSU3N5Y&client_secret=NCPPPWELYYZLI0FPHRMNVUIQQH1SZDZ0N5JZ4F5NGKYQYQBL&ll=43.6532,-79.3832&v=20180604&radius=50000&limit=100'

In [31]:
import requests

In [32]:
results = requests.get(url).json()
'There are {} around Toronto Centered.'.format(len(results['response']['groups'][0]['items']))

'There are 100 around Toronto Centered.'

In [33]:
items = results['response']['groups'][0]['items']
items[0]

{'reasons': {'count': 0,
  'items': [{'summary': 'This spot is popular',
    'type': 'general',
    'reasonName': 'globalInteractionReason'}]},
 'venue': {'id': '5227bb01498e17bf485e6202',
  'name': 'Downtown Toronto',
  'location': {'lat': 43.65323167517444,
   'lng': -79.38529600606677,
   'labeledLatLngs': [{'label': 'display',
     'lat': 43.65323167517444,
     'lng': -79.38529600606677}],
   'distance': 168,
   'cc': 'CA',
   'city': 'Toronto',
   'state': 'ON',
   'country': 'Canada',
   'formattedAddress': ['Toronto ON', 'Canada']},
  'categories': [{'id': '4f2a25ac4b909258e854f55f',
    'name': 'Neighborhood',
    'pluralName': 'Neighborhoods',
    'shortName': 'Neighborhood',
    'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/neighborhood_',
     'suffix': '.png'},
    'primary': True}],
  'photos': {'count': 0, 'groups': []}},
 'referralId': 'e-0-5227bb01498e17bf485e6202-0'}

In [34]:
dataframe = json_normalize(items) # flatten JSON

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter columns
filtered_columns = ['venue.name', 'venue.categories'] + [col for col in dataframe.columns if col.startswith('venue.location.')] + ['venue.id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# filter the category for each row
dataframe_filtered['venue.categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean columns
dataframe_filtered.columns = [col.split('.')[-1] for col in dataframe_filtered.columns]

dataframe_filtered.head(10)

Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,neighborhood,postalCode,state,id
0,Downtown Toronto,Neighborhood,,CA,Toronto,Canada,,168,"[Toronto ON, Canada]","[{'label': 'display', 'lat': 43.65323167517444...",43.653232,-79.385296,,,ON,5227bb01498e17bf485e6202
1,Pai,Thai Restaurant,18 Duncan St,CA,Toronto,Canada,Adelaide and Duncan,729,"[18 Duncan St (Adelaide and Duncan), Toronto O...","[{'label': 'display', 'lat': 43.64792310735613...",43.647923,-79.388579,Entertainment District,M5H 3G6,ON,529612de11d2ab526191ccc9
2,Byblos Toronto,Mediterranean Restaurant,11 Duncan Street,CA,Toronto,Canada,,748,"[11 Duncan Street, Toronto ON M5V 3M2, Canada]","[{'label': 'display', 'lat': 43.64761505417176...",43.647615,-79.388381,,M5V 3M2,ON,5321f4d9e4b07946702e6e08
3,Art Gallery of Ontario,Art Gallery,317 Dundas St W,CA,Toronto,Canada,at Beverley St,788,"[317 Dundas St W (at Beverley St), Toronto ON ...","[{'label': 'display', 'lat': 43.65400286033738...",43.654003,-79.392922,,M5T 1G4,ON,4ad4c05ef964a520daf620e3
4,Hogtown Smoke,Food Truck,1959 Queen St E,CA,Toronto,Canada,,812,"[1959 Queen St E, Toronto ON M4L 1H7, Canada]","[{'label': 'display', 'lat': 43.64928693557788...",43.649287,-79.374689,,M4L 1H7,ON,4fcb97f7e4b00a0e8520b055
5,St. Lawrence Market (South Building),Farmers Market,93 Front St E,CA,Toronto,Canada,at Lower Jarvis St,1058,"[93 Front St E (at Lower Jarvis St), Toronto O...","[{'label': 'display', 'lat': 43.64874320223593...",43.648743,-79.371597,St. Lawrence,M5E 1C3,ON,4ad4c062f964a520fbf720e3
6,Delta Hotels by Marriott Toronto,Hotel,75 Lower Simcoe Street,CA,Toronto,Canada,,1150,"[75 Lower Simcoe Street, Toronto ON M5J 3A6, C...","[{'label': 'display', 'lat': 43.6428819, 'lng'...",43.642882,-79.383949,,M5J 3A6,ON,53357710498e20817350cfb4
7,UNIQLO ユニクロ,Clothing Store,220 Yonge St,CA,Toronto,Canada,at Dundas St W,365,"[220 Yonge St (at Dundas St W), Toronto ON M5B...","[{'label': 'display', 'lat': 43.65591027779457...",43.65591,-79.380641,Downtown Toronto,M5B 2H1,ON,57eda381498ebe0e6ef40972
8,SOMA chocolatemaker,Dessert Shop,443 King St. W,CA,Toronto,Canada,,1335,"[443 King St. W, Toronto ON M5V 1K4, Canada]","[{'label': 'display', 'lat': 43.64532829235515...",43.645328,-79.395714,,M5V 1K4,ON,4df796f1aeb7da11e1a974d4
9,Roundhouse Park,Park,255 Bremner Blvd.,CA,Toronto,Canada,at Lower Simcoe St.,1278,"[255 Bremner Blvd. (at Lower Simcoe St.), Toro...","[{'label': 'display', 'lat': 43.64174513889102...",43.641745,-79.384279,Entertainment District,M5V 3M9,ON,4b642db1f964a520b7a22ae3


In [35]:
df_clean = dataframe_filtered.dropna(subset=['postalCode'])

In [36]:
df_clean = df_clean[['name','categories','lat','lng','postalCode']]

In [37]:
df_clean.head(15)

Unnamed: 0,name,categories,lat,lng,postalCode
1,Pai,Thai Restaurant,43.647923,-79.388579,M5H 3G6
2,Byblos Toronto,Mediterranean Restaurant,43.647615,-79.388381,M5V 3M2
3,Art Gallery of Ontario,Art Gallery,43.654003,-79.392922,M5T 1G4
4,Hogtown Smoke,Food Truck,43.649287,-79.374689,M4L 1H7
5,St. Lawrence Market (South Building),Farmers Market,43.648743,-79.371597,M5E 1C3
6,Delta Hotels by Marriott Toronto,Hotel,43.642882,-79.383949,M5J 3A6
7,UNIQLO ユニクロ,Clothing Store,43.65591,-79.380641,M5B 2H1
8,SOMA chocolatemaker,Dessert Shop,43.645328,-79.395714,M5V 1K4
9,Roundhouse Park,Park,43.641745,-79.384279,M5V 3M9
10,Steam Whistle Brewing,Brewery,43.641752,-79.387089,M5V 3M9


In [38]:
postalcodenew = []
postal = np.array(df_clean.postalCode)
for i in np.arange(len(postal)):            #Using standard postalcode with 3 characters
    postalcodenew.append(postal[i][0:3])

In [39]:
df_clean['postalCode']=postalcodenew

In [40]:
df_clean.head(10)

Unnamed: 0,name,categories,lat,lng,postalCode
1,Pai,Thai Restaurant,43.647923,-79.388579,M5H
2,Byblos Toronto,Mediterranean Restaurant,43.647615,-79.388381,M5V
3,Art Gallery of Ontario,Art Gallery,43.654003,-79.392922,M5T
4,Hogtown Smoke,Food Truck,43.649287,-79.374689,M4L
5,St. Lawrence Market (South Building),Farmers Market,43.648743,-79.371597,M5E
6,Delta Hotels by Marriott Toronto,Hotel,43.642882,-79.383949,M5J
7,UNIQLO ユニクロ,Clothing Store,43.65591,-79.380641,M5B
8,SOMA chocolatemaker,Dessert Shop,43.645328,-79.395714,M5V
9,Roundhouse Park,Park,43.641745,-79.384279,M5V
10,Steam Whistle Brewing,Brewery,43.641752,-79.387089,M5V


In [41]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=12, height='60%', width='50%') # generate map centred around Toronto City


# add Toronto Center as a red circle mark
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    popup='Toronto',
    fill=True,
    color='red',
    fill_color='red',
    fill_opacity=0.6
    ).add_to(venues_map)

# add Boroughs
for lat, lng, label in zip(toronto_data.Latitude, toronto_data.Longitude, toronto_data.Neighbourhood):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        #popup=label,
        fill=True,
        color='blue',
        fill_color='blue',
        fill_opacity=0.6
        ).add_to(venues_map)

# add venues
for lat, lng, label in zip(df_clean.lat, df_clean.lng, df_clean.name):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        #popup=label,
        fill=True,
        color='green',
        fill_color='green',
        fill_opacity=0.6
        ).add_to(venues_map)
    
    

# display map
venues_map

In [65]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

def get_data_frame(url):
    results = requests.get(url).json()
    items = results['response']['groups'][0]['items']
    dataframe = json_normalize(items) # flatten JSON
    filtered_columns = ['venue.name', 'venue.categories'] + [col for col in dataframe.columns if col.startswith('venue.location.')] + ['venue.id']
    dataframe_filtered = dataframe.loc[:, filtered_columns]
    dataframe_filtered['venue.categories'] = dataframe_filtered.apply(get_category_type, axis=1)
    dataframe_filtered.columns = [col.split('.')[-1] for col in dataframe_filtered.columns]
    try:
        df_clean = dataframe_filtered.dropna(subset=['postalCode'])
        df_clean = df_clean[['name','categories','lat','lng','postalCode']]
        postalcodenew = []
        postal = np.array(df_clean.postalCode)
        for i in np.arange(len(postal)):            #Using standard postalcode with 3 characters
            postalcodenew.append(postal[i][0:3])
        df_clean['postalCode']=postalcodenew
        return df_clean
    except:
        pass
    
radius = 3000
LIMIT = 30 #Number max o venue per neigh
    
data_total = pd.DataFrame(columns=['name','categories','lat','lng','postalCode'])
    
for neigh_lat, neigh_long, neigh_label in zip(toronto_data.Latitude, toronto_data.Longitude, toronto_data.Neighbourhood):
    url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, neigh_lat, neigh_long, VERSION, radius, LIMIT)
    data_total = pd.concat([data_total,get_data_frame(url)])
    #print (data_total.size)
    


In [69]:
data_total.head(10)

Unnamed: 0,name,categories,lat,lng,postalCode
0,Roselle Desserts,Bakery,43.653447,-79.362017,M5A
3,The Distillery Historic District,Historic Site,43.650244,-79.359323,M5A
4,Impact Kitchen,Restaurant,43.656369,-79.35698,M5A
5,Cooper Koo Family YMCA,Gym / Fitness Center,43.653191,-79.357947,M5A
6,Rooster Coffee,Coffee Shop,43.6519,-79.365609,M5A
7,SOMA chocolatemaker,Chocolate Shop,43.650622,-79.358127,M5A
9,Arvo,Coffee Shop,43.649963,-79.361442,M5A
10,Young Centre for the Performing Arts,Performing Arts Venue,43.650825,-79.357593,M5A
11,Cacao 70,Dessert Shop,43.650067,-79.360723,M5A
13,Mangia and Bevi Resto-Bar,Italian Restaurant,43.65225,-79.366355,M5R


In [80]:
data_total.shape

(210, 5)

In [71]:
data_total.drop_duplicates(subset ="name", keep = False, inplace = True) #Drop duplicate veues

In [81]:
data_total.shape

(210, 5)

In [73]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=12, height='60%', width='50%') # generate map centred around Toronto City

# add Toronto Center as a red circle mark
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    popup='Toronto',
    fill=True,
    color='red',
    fill_color='red',
    fill_opacity=0.6
    ).add_to(venues_map)

# add Boroughs postal code
for lat, lng, label in zip(toronto_data.Latitude, toronto_data.Longitude, toronto_data.Neighbourhood):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        #popup=label,
        fill=True,
        color='blue',
        fill_color='blue',
        fill_opacity=0.6
        ).add_to(venues_map)

# add venues
for lat, lng, label in zip(data_total.lat, data_total.lng, data_total.name):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        #popup=label,
        fill=True,
        color='green',
        color_opacity=0.1,
        fill_color='green',
        fill_opacity=0.6
        ).add_to(venues_map)    

        
venues_map

In [84]:
pd.unique(toronto_data.Borough) #There are 4 regions in Toronto in the data

array(['Downtown Toronto', 'East Toronto', 'West Toronto',
       'Central Toronto'], dtype=object)

### Clustering Toronto Venues

In [85]:
import matplotlib.pyplot as plt # plotting library
# backend for rendering plots within the browser
%matplotlib inline 
from sklearn.cluster import KMeans 
print('Libraries imported.')

Libraries imported.


In [86]:
k_means = KMeans(init="k-means++", n_clusters=4, n_init=12)

In [90]:
X = np.array(data_total[['lat','lng']])

In [91]:
k_means.fit(X)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=4, n_init=12, n_jobs=None, precompute_distances='auto',
    random_state=None, tol=0.0001, verbose=0)

In [94]:
k_means_labels = k_means.labels_
k_means_labels

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 3, 3, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 2, 2,
       2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2], dtype=int32)

In [92]:
k_means_cluster_centers = k_means.cluster_centers_
k_means_cluster_centers

array([[ 43.6513766 , -79.40718227],
       [ 43.65673584, -79.46144418],
       [ 43.66720016, -79.34318361],
       [ 43.72657274, -79.3958444 ]])

In [97]:
data_total['k_labels']=k_means_labels

In [108]:
colors = ['green','yellow','red','blue']

venues_map = folium.Map(location=[latitude, longitude], zoom_start=12, height='70%', width='50%') # generate map centred around Toronto City


# add Boroughs postal code
for lat, lng, label in zip(toronto_data.Latitude, toronto_data.Longitude, toronto_data.Neighbourhood):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        #popup=label,
        fill=True,
        color='black',
        fill_color='black',
        fill_opacity=0.6
        ).add_to(venues_map)

# add venues
for lat, lng, col in zip(data_total.lat, data_total.lng, data_total.k_labels):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        #popup=label,
        fill=True,
        color=colors[col],
        color_opacity=0.1,
        fill_color=colors[col],
        fill_opacity=0.6
        ).add_to(venues_map)    

        
venues_map


In [101]:
colors

array([[0.61960784, 0.00392157, 0.25882353, 1.        ],
       [0.99346405, 0.74771242, 0.43529412, 1.        ],
       [0.74771242, 0.89803922, 0.62745098, 1.        ],
       [0.36862745, 0.30980392, 0.63529412, 1.        ]])