# Peer-Graded

## One notebook for all three parts.

## Part 1:  transform the data in the table on the Wikipedia page into dataframe

#### Import request library and scrape from wikipedia url.

In [1]:
import requests

In [2]:
web_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

#### Import BeautifulSoup.

In [3]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(web_url,'lxml')
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of postal codes of Canada: M - Wikipedia
  </title>
  <script>
   document.documentElement.className=document.documentElement.className.replace(/(^|\s)client-nojs(\s|$)/,"$1client-js$2");RLCONF={"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":906439794,"wgRevisionId":906439794,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Communications in Ontario","Postal codes in Canada","Toronto","Ontario-related lists"],"wgBreakFrames":!1,"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June",

#### When inspect the HTML, we can find that info that we are looking for is under class wikitable sortable.

In [4]:
T_table = soup.find('table',{'class':'wikitable sortable'})
T_table

<table class="wikitable sortable">
<tbody><tr>
<th>Postcode</th>
<th>Borough</th>
<th>Neighbourhood
</th></tr>
<tr>
<td>M1A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M2A</td>
<td>Not assigned</td>
<td>Not assigned
</td></tr>
<tr>
<td>M3A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Parkwoods" title="Parkwoods">Parkwoods</a>
</td></tr>
<tr>
<td>M4A</td>
<td><a href="/wiki/North_York" title="North York">North York</a></td>
<td><a href="/wiki/Victoria_Village" title="Victoria Village">Victoria Village</a>
</td></tr>
<tr>
<td>M5A</td>
<td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
<td><a href="/wiki/Harbourfront_(Toronto)" title="Harbourfront (Toronto)">Harbourfront</a>
</td></tr>
<tr>
<td>M5A</td>
<td><a href="/wiki/Downtown_Toronto" title="Downtown Toronto">Downtown Toronto</a></td>
<td><a href="/wiki/Regent_Park" title="Regent Park">Regent Park</a>
</td></tr>
<tr>
<td>M6A</td>

In [5]:
print(T_table.tr.text)


Postcode
Borough
Neighbourhood



In [6]:
headers = "PostalCode, Borough, Neighborhood"

#### Getting values.

In [7]:
table1=""
for tr in T_table.find_all('tr'):
    row1=""
    for tds in tr.find_all('td'):
        row1=row1+","+tds.text
    table1=table1+row1[1:]
print(table1)

M1A,Not assigned,Not assigned
M2A,Not assigned,Not assigned
M3A,North York,Parkwoods
M4A,North York,Victoria Village
M5A,Downtown Toronto,Harbourfront
M5A,Downtown Toronto,Regent Park
M6A,North York,Lawrence Heights
M6A,North York,Lawrence Manor
M7A,Queen's Park,Not assigned
M8A,Not assigned,Not assigned
M9A,Etobicoke,Islington Avenue
M1B,Scarborough,Rouge
M1B,Scarborough,Malvern
M2B,Not assigned,Not assigned
M3B,North York,Don Mills North
M4B,East York,Woodbine Gardens
M4B,East York,Parkview Hill
M5B,Downtown Toronto,Ryerson
M5B,Downtown Toronto,Garden District
M6B,North York,Glencairn
M7B,Not assigned,Not assigned
M8B,Not assigned,Not assigned
M9B,Etobicoke,Cloverdale
M9B,Etobicoke,Islington
M9B,Etobicoke,Martin Grove
M9B,Etobicoke,Princess Gardens
M9B,Etobicoke,West Deane Park
M1C,Scarborough,Highland Creek
M1C,Scarborough,Rouge Hill
M1C,Scarborough,Port Union
M2C,Not assigned,Not assigned
M3C,North York,Flemingdon Park
M3C,North York,Don Mills South
M4C,East York,Woodbine Heights
M

In [8]:
file=open("toronto01.csv","wb")
file.write(bytes(table1,encoding="ascii",errors="ignore"))

8738

#### Convert into pandas dataframe.

In [9]:
import pandas as pd
df = pd.read_csv('toronto01.csv',header=None)
df.columns=["Postalcode","Borough","Neighborhood"]

In [10]:
df.head(5)

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


#### Process only the cells that have an assigned borough. Drop rows with "Not Assigned" borough.

In [11]:
# Only process the cells that have an assigned borough
indexNames = df[ df['Borough'] =='Not assigned'].index

# drop these rows
df.drop(indexNames , inplace=True)

In [12]:
df.head(5)

Unnamed: 0,Postalcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


#### If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.

In [13]:
df.loc[df['Neighborhood'] =='Not assigned' , 'Neighborhood'] = df['Borough']
df.head(5)

Unnamed: 0,Postalcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


#### rows with same postcode will be combined into one row.

In [14]:
result = df.groupby(['Postalcode','Borough'], sort=False).agg( ', '.join)

In [15]:
df_new=result.reset_index()
df_new.head(10)

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Rouge, Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


#### Print the number of rows of your dataframe

In [16]:
df_new.shape

(103, 3)

## Part 2:  get the latitude and the longitude coordinates of each neighborhood

#### Downloaded the csv file that has the geographical coordinates of each postal code from http://cocl.us/Geospatial_data.  

In [17]:
df_lolat = pd.read_csv('Geospatial_Coordinates.csv')
df_lolat.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [18]:
df_lolat.columns=['Postalcode','Latitude','Longitude']

In [19]:
df_lolat.head()

Unnamed: 0,Postalcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [20]:
Toronto_df = pd.merge(df_new,
                 df_lolat[['Postalcode','Latitude', 'Longitude']],
                 on='Postalcode')
Toronto_df

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.654260,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


## Part 3:  Explore and cluster the neighborhoods

#### Get latitude and longitude values of New York.

In [21]:
# import libraries
from geopy.geocoders import Nominatim
import matplotlib.colors as colors
import matplotlib.cm as cm
from sklearn.cluster import KMeans
import folium

In [22]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="Toronto")
location = geolocator.geocode(address)
lati_toronto = location.latitude
long_toronto = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(lati_toronto, long_toronto))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [23]:
map_toronto = folium.Map(location=[lati_toronto, long_toronto], zoom_start=10)

# add markers to map
for lat, lng, borough, Neighborhood in zip(Toronto_df['Latitude'], Toronto_df['Longitude'], Toronto_df['Borough'], Toronto_df['Neighborhood']):
    label = '{}, {}'.format(Neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Foursquare Account and credentials.

In [24]:
CLIENT_ID = '2MBLWPCF03MPNX50MJBLOD1LIJZRYU3NIAWB4GUVCMX3FPQ1' # your Foursquare ID
CLIENT_SECRET = 'UEMUQIT4POBRNMAZ4Y1Y1N3EYBIM12D1MCBY5RBNWBY3PY34' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 2MBLWPCF03MPNX50MJBLOD1LIJZRYU3NIAWB4GUVCMX3FPQ1
CLIENT_SECRET:UEMUQIT4POBRNMAZ4Y1Y1N3EYBIM12D1MCBY5RBNWBY3PY34


#### define radius & limit

In [25]:
radius=500
LIMIT=100

In [26]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id=2MBLWPCF03MPNX50MJBLOD1LIJZRYU3NIAWB4GUVCMX3FPQ1&client_secret=UEMUQIT4POBRNMAZ4Y1Y1N3EYBIM12D1MCBY5RBNWBY3PY34&v=20180604&ll=43.653963,-79.387207&radius=500&limit=100'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [27]:
toronto_venues = getNearbyVenues(names=Toronto_df['Neighborhood'],
                                   latitudes=Toronto_df['Latitude'],
                                   longitudes=Toronto_df['Longitude']
                                  )

Parkwoods
Victoria Village
Harbourfront, Regent Park
Lawrence Heights, Lawrence Manor
Queen's Park
Islington Avenue
Rouge, Malvern
Don Mills North
Woodbine Gardens, Parkview Hill
Ryerson, Garden District
Glencairn
Cloverdale, Islington, Martin Grove, Princess Gardens, West Deane Park
Highland Creek, Rouge Hill, Port Union
Flemingdon Park, Don Mills South
Woodbine Heights
St. James Town
Humewood-Cedarvale
Bloordale Gardens, Eringate, Markland Wood, Old Burnhamthorpe
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Downsview North, Wilson Heights
Thorncliffe Park
Adelaide, King, Richmond
Dovercourt Village, Dufferin
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto
Harbourfront East, Toronto Islands, Union Station
Little Portugal, Trinity
East Birchmount Park, Ionview, Kennedy Park
Bayview Village
CFB Toronto, Downsview East
The D

#### Check dataframe's size.

In [28]:
print(toronto_venues.shape)
toronto_venues.head()

(7931, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Downtown Toronto,43.653232,-79.385296,Neighborhood
1,Parkwoods,43.753259,-79.329656,Japango,43.655268,-79.385165,Sushi Restaurant
2,Parkwoods,43.753259,-79.329656,Cafe Plenty,43.654571,-79.38945,Café
3,Parkwoods,43.753259,-79.329656,Textile Museum of Canada,43.654396,-79.3865,Art Museum
4,Parkwoods,43.753259,-79.329656,Poke Guys,43.654895,-79.385052,Poke Place


#### How many values returned for each neighborhood

In [29]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",77,77,77,77,77,77
Agincourt,77,77,77,77,77,77
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",77,77,77,77,77,77
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",77,77,77,77,77,77
"Alderwood, Long Branch",77,77,77,77,77,77
"Bathurst Manor, Downsview North, Wilson Heights",77,77,77,77,77,77
Bayview Village,77,77,77,77,77,77
"Bedford Park, Lawrence Manor East",77,77,77,77,77,77
Berczy Park,77,77,77,77,77,77
"Birch Cliff, Cliffside West",77,77,77,77,77,77


#### Analyze each neighborhood

In [30]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot.head()

Unnamed: 0,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Bar,Breakfast Spot,Bubble Tea Shop,Café,Chinese Restaurant,...,Seafood Restaurant,Smoke Shop,Steakhouse,Sushi Restaurant,Tapas Restaurant,Tea Room,Thai Restaurant,Toy / Game Store,University,Vegetarian / Vegan Restaurant
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### size of new data frame

In [31]:
toronto_onehot.shape

(7931, 56)

#### Group rows by neighborhoods. Take the mean of the frequency.

In [32]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Bar,Breakfast Spot,Bubble Tea Shop,Café,...,Seafood Restaurant,Smoke Shop,Steakhouse,Sushi Restaurant,Tapas Restaurant,Tea Room,Thai Restaurant,Toy / Game Store,University,Vegetarian / Vegan Restaurant
0,"Adelaide, King, Richmond",0.012987,0.038961,0.012987,0.012987,0.012987,0.025974,0.025974,0.025974,0.051948,...,0.012987,0.012987,0.012987,0.038961,0.012987,0.025974,0.012987,0.012987,0.012987,0.012987
1,Agincourt,0.012987,0.038961,0.012987,0.012987,0.012987,0.025974,0.025974,0.025974,0.051948,...,0.012987,0.012987,0.012987,0.038961,0.012987,0.025974,0.012987,0.012987,0.012987,0.012987
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.012987,0.038961,0.012987,0.012987,0.012987,0.025974,0.025974,0.025974,0.051948,...,0.012987,0.012987,0.012987,0.038961,0.012987,0.025974,0.012987,0.012987,0.012987,0.012987
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.012987,0.038961,0.012987,0.012987,0.012987,0.025974,0.025974,0.025974,0.051948,...,0.012987,0.012987,0.012987,0.038961,0.012987,0.025974,0.012987,0.012987,0.012987,0.012987
4,"Alderwood, Long Branch",0.012987,0.038961,0.012987,0.012987,0.012987,0.025974,0.025974,0.025974,0.051948,...,0.012987,0.012987,0.012987,0.038961,0.012987,0.025974,0.012987,0.012987,0.012987,0.012987
5,"Bathurst Manor, Downsview North, Wilson Heights",0.012987,0.038961,0.012987,0.012987,0.012987,0.025974,0.025974,0.025974,0.051948,...,0.012987,0.012987,0.012987,0.038961,0.012987,0.025974,0.012987,0.012987,0.012987,0.012987
6,Bayview Village,0.012987,0.038961,0.012987,0.012987,0.012987,0.025974,0.025974,0.025974,0.051948,...,0.012987,0.012987,0.012987,0.038961,0.012987,0.025974,0.012987,0.012987,0.012987,0.012987
7,"Bedford Park, Lawrence Manor East",0.012987,0.038961,0.012987,0.012987,0.012987,0.025974,0.025974,0.025974,0.051948,...,0.012987,0.012987,0.012987,0.038961,0.012987,0.025974,0.012987,0.012987,0.012987,0.012987
8,Berczy Park,0.012987,0.038961,0.012987,0.012987,0.012987,0.025974,0.025974,0.025974,0.051948,...,0.012987,0.012987,0.012987,0.038961,0.012987,0.025974,0.012987,0.012987,0.012987,0.012987
9,"Birch Cliff, Cliffside West",0.012987,0.038961,0.012987,0.012987,0.012987,0.025974,0.025974,0.025974,0.051948,...,0.012987,0.012987,0.012987,0.038961,0.012987,0.025974,0.012987,0.012987,0.012987,0.012987


#### top 5 most common venues.

In [33]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
                 venue  freq
0          Coffee Shop  0.08
1                 Café  0.05
2  Japanese Restaurant  0.04
3     Sushi Restaurant  0.04
4          Art Gallery  0.04


----Agincourt----
                 venue  freq
0          Coffee Shop  0.08
1                 Café  0.05
2  Japanese Restaurant  0.04
3     Sushi Restaurant  0.04
4          Art Gallery  0.04


----Agincourt North, L'Amoreaux East, Milliken, Steeles East----
                 venue  freq
0          Coffee Shop  0.08
1                 Café  0.05
2  Japanese Restaurant  0.04
3     Sushi Restaurant  0.04
4          Art Gallery  0.04


----Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown----
                 venue  freq
0          Coffee Shop  0.08
1                 Café  0.05
2  Japanese Restaurant  0.04
3     Sushi Restaurant  0.04
4          Art Gallery  0.04


----Alderwood, Long Branch----
                 venue  freq
0 

                 venue  freq
0          Coffee Shop  0.08
1                 Café  0.05
2  Japanese Restaurant  0.04
3     Sushi Restaurant  0.04
4          Art Gallery  0.04


----East Birchmount Park, Ionview, Kennedy Park----
                 venue  freq
0          Coffee Shop  0.08
1                 Café  0.05
2  Japanese Restaurant  0.04
3     Sushi Restaurant  0.04
4          Art Gallery  0.04


----East Toronto----
                 venue  freq
0          Coffee Shop  0.08
1                 Café  0.05
2  Japanese Restaurant  0.04
3     Sushi Restaurant  0.04
4          Art Gallery  0.04


----Emery, Humberlea----
                 venue  freq
0          Coffee Shop  0.08
1                 Café  0.05
2  Japanese Restaurant  0.04
3     Sushi Restaurant  0.04
4          Art Gallery  0.04


----Fairview, Henry Farm, Oriole----
                 venue  freq
0          Coffee Shop  0.08
1                 Café  0.05
2  Japanese Restaurant  0.04
3     Sushi Restaurant  0.04
4          Art G

                 venue  freq
0          Coffee Shop  0.08
1                 Café  0.05
2  Japanese Restaurant  0.04
3     Sushi Restaurant  0.04
4          Art Gallery  0.04


----Scarborough Village----
                 venue  freq
0          Coffee Shop  0.08
1                 Café  0.05
2  Japanese Restaurant  0.04
3     Sushi Restaurant  0.04
4          Art Gallery  0.04


----Silver Hills, York Mills----
                 venue  freq
0          Coffee Shop  0.08
1                 Café  0.05
2  Japanese Restaurant  0.04
3     Sushi Restaurant  0.04
4          Art Gallery  0.04


----St. James Town----
                 venue  freq
0          Coffee Shop  0.08
1                 Café  0.05
2  Japanese Restaurant  0.04
3     Sushi Restaurant  0.04
4          Art Gallery  0.04


----Stn A PO Boxes 25 The Esplanade----
                 venue  freq
0          Coffee Shop  0.08
1                 Café  0.05
2  Japanese Restaurant  0.04
3     Sushi Restaurant  0.04
4          Art Gallery  0.0

#### Put into dataframe. Sort in descending order.

In [34]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#### Display top 10 venues.

In [35]:
import numpy as np

In [36]:
num_top_venues = 10
indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

In [37]:
# create data frame.
neighborhoods_sorted = pd.DataFrame(columns=columns)
neighborhoods_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Sushi Restaurant,Japanese Restaurant,Art Gallery,Tea Room,Bubble Tea Shop,Ice Cream Shop,Chinese Restaurant,Hotel
1,Agincourt,Coffee Shop,Café,Sushi Restaurant,Japanese Restaurant,Art Gallery,Tea Room,Bubble Tea Shop,Ice Cream Shop,Chinese Restaurant,Hotel
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Coffee Shop,Café,Sushi Restaurant,Japanese Restaurant,Art Gallery,Tea Room,Bubble Tea Shop,Ice Cream Shop,Chinese Restaurant,Hotel
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Coffee Shop,Café,Sushi Restaurant,Japanese Restaurant,Art Gallery,Tea Room,Bubble Tea Shop,Ice Cream Shop,Chinese Restaurant,Hotel
4,"Alderwood, Long Branch",Coffee Shop,Café,Sushi Restaurant,Japanese Restaurant,Art Gallery,Tea Room,Bubble Tea Shop,Ice Cream Shop,Chinese Restaurant,Hotel


#### Cluster the neighborhoods into 5 clusters.

In [38]:
kclusters = 5
toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)
kmeans.labels_[0:10]

  return_n_iter=True)


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [43]:
neighborhoods_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
toronto_merged = df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_sorted.set_index('Neighborhood'), on='Neighborhood')

# check the last columns!
toronto_merged.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,M3A,North York,Parkwoods,0.0,Coffee Shop,Café,Sushi Restaurant,Japanese Restaurant,Art Gallery,Tea Room,Bubble Tea Shop,Ice Cream Shop,Chinese Restaurant,Hotel
3,M4A,North York,Victoria Village,0.0,Coffee Shop,Café,Sushi Restaurant,Japanese Restaurant,Art Gallery,Tea Room,Bubble Tea Shop,Ice Cream Shop,Chinese Restaurant,Hotel
4,M5A,Downtown Toronto,Harbourfront,,,,,,,,,,,
5,M5A,Downtown Toronto,Regent Park,,,,,,,,,,,
6,M6A,North York,Lawrence Heights,,,,,,,,,,,


#### Visualize.

In [44]:
map_toronto = folium.Map(location=[lati_toronto, long_toronto], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

KeyError: 'Latitude'

#### Examine clusters.

#### Cluster 1

In [45]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,North York,Café,Sushi Restaurant,Japanese Restaurant,Art Gallery,Tea Room,Bubble Tea Shop,Ice Cream Shop,Chinese Restaurant,Hotel
3,North York,Café,Sushi Restaurant,Japanese Restaurant,Art Gallery,Tea Room,Bubble Tea Shop,Ice Cream Shop,Chinese Restaurant,Hotel
8,Queen's Park,Café,Sushi Restaurant,Japanese Restaurant,Art Gallery,Tea Room,Bubble Tea Shop,Ice Cream Shop,Chinese Restaurant,Hotel
10,Etobicoke,Café,Sushi Restaurant,Japanese Restaurant,Art Gallery,Tea Room,Bubble Tea Shop,Ice Cream Shop,Chinese Restaurant,Hotel
14,North York,Café,Sushi Restaurant,Japanese Restaurant,Art Gallery,Tea Room,Bubble Tea Shop,Ice Cream Shop,Chinese Restaurant,Hotel
19,North York,Café,Sushi Restaurant,Japanese Restaurant,Art Gallery,Tea Room,Bubble Tea Shop,Ice Cream Shop,Chinese Restaurant,Hotel
33,East York,Café,Sushi Restaurant,Japanese Restaurant,Art Gallery,Tea Room,Bubble Tea Shop,Ice Cream Shop,Chinese Restaurant,Hotel
34,Downtown Toronto,Café,Sushi Restaurant,Japanese Restaurant,Art Gallery,Tea Room,Bubble Tea Shop,Ice Cream Shop,Chinese Restaurant,Hotel
35,York,Café,Sushi Restaurant,Japanese Restaurant,Art Gallery,Tea Room,Bubble Tea Shop,Ice Cream Shop,Chinese Restaurant,Hotel
47,East Toronto,Café,Sushi Restaurant,Japanese Restaurant,Art Gallery,Tea Room,Bubble Tea Shop,Ice Cream Shop,Chinese Restaurant,Hotel


#### Cluster 2

In [46]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


#### Cluster 3

In [47]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


#### Cluster 4

In [48]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


#### Cluster 5

In [49]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
