Importing required libraries

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

### Getting data from wikipedia webpage and extracting table data

In [3]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

#Creating BeautifulSoup object with lxml parser
soup = BeautifulSoup(source, 'lxml')

#Storing table data in a variable, first table appearance has the date
table = soup.find('table')

#Storing column header in a variable, from the header row of the wikipedia table
header = table.find_all('th')

columns = []

for head in header:
    columns.append(head.text.rstrip())
    
columns


['Postal code', 'Borough', 'Neighborhood']

#### Inserting table rows into a dataframe

In [33]:
#parsing through table rows to create a list and creating a dataframe
table_rows = table.find_all('tr')

l = []
for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text.rstrip() for tr in td]
    l.append(row)
    
table_df = pd.DataFrame(l, columns = columns)
table_df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,,,
1,M1A,Not assigned,
2,M2A,Not assigned,
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village


Rename column 'Postal code' to 'PostalCode'

In [34]:
table_df.rename(columns={'Postal code': 'PostalCode'}, inplace=True)
table_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,,,
1,M1A,Not assigned,
2,M2A,Not assigned,
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village


Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.

In [35]:
#drop 'Not assigned' Boroughs
table_df = table_df[table_df['Borough'] != 'Not assigned']
table_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,,,
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Regent Park / Harbourfront
6,M6A,North York,Lawrence Manor / Lawrence Heights


In [37]:
#drop rows with 'None' values (first row has 'None' value for all columns)

table_df.dropna(inplace=True)
table_df.head()


Unnamed: 0,PostalCode,Borough,Neighborhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Regent Park / Harbourfront
6,M6A,North York,Lawrence Manor / Lawrence Heights
7,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


In [38]:
#reset index
table_df.reset_index(drop=True, inplace=True)
table_df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government
...,...,...,...
98,M8X,Etobicoke,The Kingsway / Montgomery Road / Old Mill North
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,Business reply mail Processing CentrE
101,M8Y,Etobicoke,Old Mill South / King's Mill Park / Sunnylea /...


In [60]:
#replacing '/' with ',' in Neighborhood column
table_df['Neighborhood'] = table_df['Neighborhood'].str.replace('/',',')
table_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"


More than one neighborhood can exist in one postal code area. For example, in the table on the Wikipedia page, you will notice that M5A is listed twice and has two neighborhoods: Harbourfront and Regent Park. These two rows will be combined into one row with the neighborhoods separated with a comma as shown in row 11 in the above table.

In [15]:
#There is no PostalCode assigned to more than one Neighborhoods in the Wikipedia table data

If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.

In [16]:
#There is no Borough with 'Not assigned' Neighborhood

In the last cell of your notebook, use the .shape method to print the number of rows of your dataframe.

In [61]:
#Printing the no. of rows in the DataFrame
table_df.shape

(103, 3)

### end of first section

#### Starting section two

## Adding latitude, longitude to the dataframe

##### importing lat, long csv file from http://cocl.us/Geospatial_data

In [23]:
loc_df = pd.read_csv('http://cocl.us/Geospatial_data')
loc_df.head()

#renaming column 'Postal Code' to 'PostalCode'
loc_df.rename(columns={'Postal Code':'PostalCode'}, inplace=True)
loc_df.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


##### Creating new dataframe with lat, long

In [62]:
neighborhood_df = pd.merge(left=table_df, right=loc_df)
print(neighborhood_df.shape)
neighborhood_df.head()

(103, 5)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494


### End of section 2

#### Starting section 3

## Explore and cluster the neighborhoods in Toronto. 

In [63]:
# Filter the dataframe and get the rows with word 'Toronto' in it

toronto_df = neighborhood_df[neighborhood_df['Borough'].str.contains('Toronto')]
print(toronto_df.shape)
toronto_df.reset_index(drop=True, inplace=True)
toronto_df

(39, 5)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564
8,M5H,Downtown Toronto,"Richmond , Adelaide , King",43.650571,-79.384568
9,M6H,West Toronto,"Dufferin , Dovercourt Village",43.669005,-79.442259


##### importing required libraries

In [47]:
!conda install -c conda-forge geopy --yes

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... 
  - anaconda/win-64::openssl-1.1.1d-he774522_2
  - defaults/win-64::openssl-1.1.1d-he774522_2done

# All requested packages already installed.



In [76]:
from geopy.geocoders import Nominatim
import folium
import json
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

#### Getting latitude and longitude values of Toronto

In [52]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent = 'explore_Toronto')
location = geolocator.geocode(address)
lat = location.latitude
long = location.longitude
print("Coordinates for Toronto are {}, {}.".format(lat, long))

Coordinates for Toronto are 43.6534817, -79.3839347.


### Create a map of Toronto

In [64]:
map_Toronto = folium.Map(location=[lat,long], zoom_start=10)

# add markers to map
for lat, long, borough, neighborhood in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Borough'], toronto_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat,long],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False
    ).add_to(map_Toronto)

map_Toronto

##### Define Foursquare credentials and version
hidden cell

In [68]:
# @hidden_cell

CLIENT_ID = '2IVD1U2HZ2PRIUELJ20EYMI2KBE0F1L1CXNG2JMV0YLXFLWX' # your Foursquare ID
CLIENT_SECRET = 'OSM21SJDF5TVMGGGKU0KBHMRPZY5XGUGLTSKEGVPN4EIZVTZ' # your Foursquare Secret

VERSION = '20180605' # Foursquare API version


#### Exploring the first neighborhood in the dataframe

In [66]:
toronto_df.loc[0, 'Neighborhood']

'Regent Park , Harbourfront'

Getting Neighborhood's latitude and longitude values

In [67]:
neighborhood_lat = toronto_df.loc[0,'Latitude']
neighborhood_long = toronto_df.loc[0,'Longitude']

neighborhood_name = toronto_df.loc[0,'Neighborhood']

print('Latitude and Longitude values of {} are {}, {}'.format(neighborhood_name, neighborhood_lat, neighborhood_long))

Latitude and Longitude values of Regent Park , Harbourfront are 43.6542599, -79.3606359


##### Top 10 venues that are in Regent Park , Harbourfront within a radius of 500 meters.

In [83]:
# Create the URL
RADIUS = 500
LIMIT=50

url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET,neighborhood_lat, neighborhood_long, VERSION, RADIUS, LIMIT) 

In [190]:
# Send the GET request
results = requests.get(url).json()
#results

function to extract the category of the venue

In [79]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

clean the json data

In [85]:
venues = results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues)
# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Roselle Desserts,Bakery,43.653447,-79.362017
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,Cooper Koo Family YMCA,Distribution Center,43.653249,-79.358008
3,Body Blitz Spa East,Spa,43.654735,-79.359874
4,Morning Glory Cafe,Breakfast Spot,43.653947,-79.361149


In [86]:
nearby_venues.shape

(45, 4)

### Explore neighborhoods in Toronto

Function to process all neighborhoods in Toronto

In [92]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues_list = []
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        #create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
        
        #get request
        results = requests.get(url).json()['response']['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
        
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    return (nearby_venues)

Using above method, creating a Dataframe of venues

In [93]:

toronto_venues = getNearbyVenues(names=toronto_df['Neighborhood'],
                                   latitudes=toronto_df['Latitude'],
                                   longitudes=toronto_df['Longitude']
                                  )

Regent Park , Harbourfront
Queen's Park , Ontario Provincial Government
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond , Adelaide , King
Dufferin , Dovercourt Village
Harbourfront East , Union Station , Toronto Islands
Little Portugal , Trinity
The Danforth West , Riverdale
Toronto Dominion Centre , Design Exchange
Brockton , Parkdale Village , Exhibition Place
India Bazaar , The Beaches West
Commerce Court , Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West
High Park , The Junction South
North Toronto West
The Annex , North Midtown , Yorkville
Parkdale , Roncesvalles
Davisville
University of Toronto , Harbord
Runnymede , Swansea
Moore Park , Summerhill East
Kensington Market , Chinatown , Grange Park
Summerhill West , Rathnelly , South Hill , Forest Hill SE , Deer Park
CN Tower , King and Spadina , Railway Lands , Harbourfront West , Bathurst
 Quay , South Niagara , Island airport
Rosed

Size of resulting dataframe

In [94]:
print(toronto_venues.shape)
toronto_venues.head()

(1204, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park , Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park , Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park , Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park , Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,"Regent Park , Harbourfront",43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot


Number of venues for each neighborhood

In [95]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,50,50,50,50,50,50
"Brockton , Parkdale Village , Exhibition Place",23,23,23,23,23,23
Business reply mail Processing CentrE,15,15,15,15,15,15
"CN Tower , King and Spadina , Railway Lands , Harbourfront West , Bathurst\n Quay , South Niagara , Island airport",16,16,16,16,16,16
Central Bay Street,50,50,50,50,50,50
Christie,18,18,18,18,18,18
Church and Wellesley,50,50,50,50,50,50
"Commerce Court , Victoria Hotel",50,50,50,50,50,50
Davisville,36,36,36,36,36,36
Davisville North,9,9,9,9,9,9


In [96]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 219 uniques categories.


### Analyze each neighborhood

In [122]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 
toronto_onehot.head()

toronto_onehot.shape

(1204, 219)

In [130]:
fixed_columns = list(toronto_onehot.columns)
fixed_columns

['Airport',
 'Airport Food Court',
 'Airport Gate',
 'Airport Lounge',
 'Airport Service',
 'Airport Terminal',
 'American Restaurant',
 'Antique Shop',
 'Aquarium',
 'Art Gallery',
 'Arts & Crafts Store',
 'Asian Restaurant',
 'Athletics & Sports',
 'Auto Workshop',
 'BBQ Joint',
 'Baby Store',
 'Bagel Shop',
 'Bakery',
 'Bank',
 'Bar',
 'Baseball Stadium',
 'Basketball Stadium',
 'Beach',
 'Beer Bar',
 'Beer Store',
 'Belgian Restaurant',
 'Bistro',
 'Boat or Ferry',
 'Bookstore',
 'Boutique',
 'Brazilian Restaurant',
 'Breakfast Spot',
 'Brewery',
 'Bubble Tea Shop',
 'Burger Joint',
 'Burrito Place',
 'Bus Line',
 'Butcher',
 'Café',
 'Cajun / Creole Restaurant',
 'Camera Store',
 'Candy Store',
 'Caribbean Restaurant',
 'Cheese Shop',
 'Chinese Restaurant',
 'Chocolate Shop',
 'Church',
 'Climbing Gym',
 'Clothing Store',
 'Cocktail Bar',
 'Coffee Shop',
 'College Arts Building',
 'College Auditorium',
 'College Gym',
 'College Rec Center',
 'Colombian Restaurant',
 'Comfort Food 

Move 'Neighborhood' column to the first column

In [132]:
type(fixed_columns)
#fixed_columns = toronto_onehot.columns[0]+list(toronto_onehot.columns[1:])
fixed_columns.remove('Neighborhood')
fixed_columns.insert(0,'Neighborhood')
fixed_columns

['Neighborhood',
 'Airport',
 'Airport Food Court',
 'Airport Gate',
 'Airport Lounge',
 'Airport Service',
 'Airport Terminal',
 'American Restaurant',
 'Antique Shop',
 'Aquarium',
 'Art Gallery',
 'Arts & Crafts Store',
 'Asian Restaurant',
 'Athletics & Sports',
 'Auto Workshop',
 'BBQ Joint',
 'Baby Store',
 'Bagel Shop',
 'Bakery',
 'Bank',
 'Bar',
 'Baseball Stadium',
 'Basketball Stadium',
 'Beach',
 'Beer Bar',
 'Beer Store',
 'Belgian Restaurant',
 'Bistro',
 'Boat or Ferry',
 'Bookstore',
 'Boutique',
 'Brazilian Restaurant',
 'Breakfast Spot',
 'Brewery',
 'Bubble Tea Shop',
 'Burger Joint',
 'Burrito Place',
 'Bus Line',
 'Butcher',
 'Café',
 'Cajun / Creole Restaurant',
 'Camera Store',
 'Candy Store',
 'Caribbean Restaurant',
 'Cheese Shop',
 'Chinese Restaurant',
 'Chocolate Shop',
 'Church',
 'Climbing Gym',
 'Clothing Store',
 'Cocktail Bar',
 'Coffee Shop',
 'College Arts Building',
 'College Auditorium',
 'College Gym',
 'College Rec Center',
 'Colombian Restaurant'

In [133]:
toronto_onehot = toronto_onehot[fixed_columns]
toronto_onehot

Unnamed: 0,Neighborhood,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Yoga Studio
0,"Regent Park , Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park , Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park , Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park , Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Regent Park , Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1199,Business reply mail Processing CentrE,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1200,Business reply mail Processing CentrE,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1201,Business reply mail Processing CentrE,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1202,Business reply mail Processing CentrE,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


New DataFrame size

In [134]:
toronto_onehot.shape

(1204, 219)

#### Grouping rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [135]:
tor_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
tor_grouped

Unnamed: 0,Neighborhood,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0
1,"Brockton , Parkdale Village , Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Business reply mail Processing CentrE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667
3,"CN Tower , King and Spadina , Railway Lands , ...",0.0625,0.0625,0.0625,0.125,0.125,0.125,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.04
7,"Commerce Court , Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [136]:
tor_grouped.shape

(39, 219)

##### Printing Neighborhoods along with the top 5 venues and putting this information in a DataFrame

In [147]:
num_top_venues = 5

for l in tor_grouped['Neighborhood']:
    print("--"+l+"--")
    temp = tor_grouped[tor_grouped['Neighborhood'] == l].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq':2})
    print(temp.sort_values('freq',ascending=False).reset_index(drop=True).head(num_top_venues))
    print('')

--Berczy Park--
                venue  freq
0         Coffee Shop  0.10
1                Café  0.04
2         Cheese Shop  0.04
3            Beer Bar  0.04
4  Seafood Restaurant  0.04

--Brockton , Parkdale Village , Exhibition Place--
            venue  freq
0            Café  0.13
1  Breakfast Spot  0.09
2       Nightclub  0.09
3     Coffee Shop  0.09
4         Stadium  0.04

--Business reply mail Processing CentrE--
           venue  freq
0    Yoga Studio  0.07
1  Auto Workshop  0.07
2     Comic Shop  0.07
3           Park  0.07
4     Restaurant  0.07

--CN Tower , King and Spadina , Railway Lands , Harbourfront West , Bathurst
 Quay , South Niagara , Island airport--
              venue  freq
0    Airport Lounge  0.12
1   Airport Service  0.12
2  Airport Terminal  0.12
3           Airport  0.06
4     Boat or Ferry  0.06

--Central Bay Street--
                 venue  freq
0          Coffee Shop  0.18
1   Italian Restaurant  0.06
2      Bubble Tea Shop  0.04
3  Japanese Restaurant  

In [148]:
#function to sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [155]:
#creating  new dataframe and display the top 10 venues for each neighborhood.
import numpy as np
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

#creating columns for number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most common venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most common venue'.format(ind+1))
        
#Creating a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns = columns)
neighborhoods_venues_sorted['Neighborhood'] = tor_grouped['Neighborhood']
neighborhoods_venues_sorted

for ind in np.arange(tor_grouped.shape[0]):
    #print(tor_grouped.iloc[ind, :])
    neighborhoods_venues_sorted.iloc[ind,1:] = return_most_common_venues(tor_grouped.iloc[ind, :], num_top_venues)
    
neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
0,Berczy Park,Coffee Shop,Café,Restaurant,Bakery,Beer Bar,Cocktail Bar,Farmers Market,Seafood Restaurant,Cheese Shop,Beach
1,"Brockton , Parkdale Village , Exhibition Place",Café,Breakfast Spot,Nightclub,Coffee Shop,Pet Store,Climbing Gym,Italian Restaurant,Intersection,Restaurant,Stadium
2,Business reply mail Processing CentrE,Yoga Studio,Auto Workshop,Park,Comic Shop,Pizza Place,Restaurant,Burrito Place,Light Rail Station,Brewery,Skate Park
3,"CN Tower , King and Spadina , Railway Lands , ...",Airport Lounge,Airport Service,Airport Terminal,Airport,Harbor / Marina,Coffee Shop,Plane,Rental Car Location,Sculpture Garden,Boat or Ferry
4,Central Bay Street,Coffee Shop,Italian Restaurant,Spa,Bubble Tea Shop,Burger Joint,Japanese Restaurant,Thai Restaurant,Modern European Restaurant,Bookstore,Sandwich Place


## Cluster Neighborhood

Running k-means to cluster the neighborhood into 5 cluster

In [176]:
#import
from sklearn.cluster import KMeans

import matplotlib.cm as cm
import matplotlib.colors as colors

In [162]:
# number of clusters
kclusters = 5
tor_grouped
tor_grouped_clustering = tor_grouped.drop('Neighborhood', 1)
tor_grouped_clustering

#running k-menas clustering
kmeans = KMeans(n_clusters = kclusters, random_state = 0).fit(tor_grouped_clustering)

#cluster labels
kmeans.labels_[0:50]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 3, 0, 0, 0, 0, 0, 3, 0, 1, 0,
       0, 0, 0, 0, 4, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0])

In [172]:
#adding clustering labels
type(neighborhoods_venues_sorted)
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
neighborhoods_venues_sorted.head(2)

Unnamed: 0,Cluster Labels,Neighborhood,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
0,0,Berczy Park,Coffee Shop,Café,Restaurant,Bakery,Beer Bar,Cocktail Bar,Farmers Market,Seafood Restaurant,Cheese Shop,Beach
1,0,"Brockton , Parkdale Village , Exhibition Place",Café,Breakfast Spot,Nightclub,Coffee Shop,Pet Store,Climbing Gym,Italian Restaurant,Intersection,Restaurant,Stadium


In [173]:


#merging Borough, latitude, longitude to the new dataframe
toronto_merged = toronto_df

toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
#toronto_df.head()
#neighborhoods_venues_sorted.head()
toronto_merged

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
0,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636,0,Coffee Shop,Pub,Park,Theater,Mexican Restaurant,Breakfast Spot,Café,Restaurant,Bakery,Shoe Store
1,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494,0,Coffee Shop,Yoga Studio,Café,Beer Bar,Music Venue,Italian Restaurant,Burger Joint,Burrito Place,Juice Bar,Distribution Center
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Coffee Shop,Café,Bookstore,Restaurant,Cosmetics Shop,Clothing Store,Theater,Italian Restaurant,Ramen Restaurant,Beer Bar
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Italian Restaurant,Café,Coffee Shop,Park,Farmers Market,Bakery,BBQ Joint,Japanese Restaurant,Restaurant,Thai Restaurant
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,3,Trail,Pub,Health Food Store,Yoga Studio,Dance Studio,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,Coffee Shop,Café,Restaurant,Bakery,Beer Bar,Cocktail Bar,Farmers Market,Seafood Restaurant,Cheese Shop,Beach
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,Coffee Shop,Italian Restaurant,Spa,Bubble Tea Shop,Burger Joint,Japanese Restaurant,Thai Restaurant,Modern European Restaurant,Bookstore,Sandwich Place
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564,0,Grocery Store,Café,Park,Coffee Shop,Italian Restaurant,Diner,Candy Store,Restaurant,Athletics & Sports,Baby Store
8,M5H,Downtown Toronto,"Richmond , Adelaide , King",43.650571,-79.384568,0,Café,Asian Restaurant,Hotel,Bar,Vegetarian / Vegan Restaurant,Gastropub,American Restaurant,Steakhouse,Restaurant,Pizza Place
9,M6H,West Toronto,"Dufferin , Dovercourt Village",43.669005,-79.442259,0,Bakery,Pharmacy,Café,Middle Eastern Restaurant,Bar,Supermarket,Bank,Brewery,Fast Food Restaurant,Pool


#### Visualizing the resuling clusters

In [183]:
# create map
map_clusters = folium.Map(location=[lat,long], zoom_start=11)

#color scheme for clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, long, neighbor, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(neighbor) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat,long],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.5).add_to(map_clusters)
    
map_clusters

### Examining Clusters

In [185]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
0,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636,0,Coffee Shop,Pub,Park,Theater,Mexican Restaurant,Breakfast Spot,Café,Restaurant,Bakery,Shoe Store
1,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494,0,Coffee Shop,Yoga Studio,Café,Beer Bar,Music Venue,Italian Restaurant,Burger Joint,Burrito Place,Juice Bar,Distribution Center
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Coffee Shop,Café,Bookstore,Restaurant,Cosmetics Shop,Clothing Store,Theater,Italian Restaurant,Ramen Restaurant,Beer Bar
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Italian Restaurant,Café,Coffee Shop,Park,Farmers Market,Bakery,BBQ Joint,Japanese Restaurant,Restaurant,Thai Restaurant
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,Coffee Shop,Café,Restaurant,Bakery,Beer Bar,Cocktail Bar,Farmers Market,Seafood Restaurant,Cheese Shop,Beach
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,Coffee Shop,Italian Restaurant,Spa,Bubble Tea Shop,Burger Joint,Japanese Restaurant,Thai Restaurant,Modern European Restaurant,Bookstore,Sandwich Place
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564,0,Grocery Store,Café,Park,Coffee Shop,Italian Restaurant,Diner,Candy Store,Restaurant,Athletics & Sports,Baby Store
8,M5H,Downtown Toronto,"Richmond , Adelaide , King",43.650571,-79.384568,0,Café,Asian Restaurant,Hotel,Bar,Vegetarian / Vegan Restaurant,Gastropub,American Restaurant,Steakhouse,Restaurant,Pizza Place
9,M6H,West Toronto,"Dufferin , Dovercourt Village",43.669005,-79.442259,0,Bakery,Pharmacy,Café,Middle Eastern Restaurant,Bar,Supermarket,Bank,Brewery,Fast Food Restaurant,Pool
10,M5J,Downtown Toronto,"Harbourfront East , Union Station , Toronto Is...",43.640816,-79.381752,0,Aquarium,Coffee Shop,Café,Bar,Park,Hotel,Plaza,Fried Chicken Joint,Italian Restaurant,Beer Bar


In [184]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
0,Downtown Toronto,0,Coffee Shop,Pub,Park,Theater,Mexican Restaurant,Breakfast Spot,Café,Restaurant,Bakery,Shoe Store
1,Downtown Toronto,0,Coffee Shop,Yoga Studio,Café,Beer Bar,Music Venue,Italian Restaurant,Burger Joint,Burrito Place,Juice Bar,Distribution Center
2,Downtown Toronto,0,Coffee Shop,Café,Bookstore,Restaurant,Cosmetics Shop,Clothing Store,Theater,Italian Restaurant,Ramen Restaurant,Beer Bar
3,Downtown Toronto,0,Italian Restaurant,Café,Coffee Shop,Park,Farmers Market,Bakery,BBQ Joint,Japanese Restaurant,Restaurant,Thai Restaurant
5,Downtown Toronto,0,Coffee Shop,Café,Restaurant,Bakery,Beer Bar,Cocktail Bar,Farmers Market,Seafood Restaurant,Cheese Shop,Beach
6,Downtown Toronto,0,Coffee Shop,Italian Restaurant,Spa,Bubble Tea Shop,Burger Joint,Japanese Restaurant,Thai Restaurant,Modern European Restaurant,Bookstore,Sandwich Place
7,Downtown Toronto,0,Grocery Store,Café,Park,Coffee Shop,Italian Restaurant,Diner,Candy Store,Restaurant,Athletics & Sports,Baby Store
8,Downtown Toronto,0,Café,Asian Restaurant,Hotel,Bar,Vegetarian / Vegan Restaurant,Gastropub,American Restaurant,Steakhouse,Restaurant,Pizza Place
9,West Toronto,0,Bakery,Pharmacy,Café,Middle Eastern Restaurant,Bar,Supermarket,Bank,Brewery,Fast Food Restaurant,Pool
10,Downtown Toronto,0,Aquarium,Coffee Shop,Café,Bar,Park,Hotel,Plaza,Fried Chicken Joint,Italian Restaurant,Beer Bar


In [186]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
29,Central Toronto,1,Playground,Gym,Yoga Studio,Deli / Bodega,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run


In [187]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
19,Central Toronto,2,Pool,Garden,Health & Beauty Service,Yoga Studio,Deli / Bodega,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run


In [188]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
4,East Toronto,3,Trail,Pub,Health Food Store,Yoga Studio,Dance Studio,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run
18,Central Toronto,3,Park,Lawyer,Bus Line,Swim School,Yoga Studio,Department Store,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run
20,Central Toronto,3,Park,Asian Restaurant,Sandwich Place,Food & Drink Shop,Hotel,Department Store,Breakfast Spot,Gym,Convenience Store,Cosmetics Shop
21,Central Toronto,3,Jewelry Store,Trail,Bus Line,Sushi Restaurant,Home Service,Park,Gaming Cafe,Dance Studio,Donut Shop,Doner Restaurant


In [189]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most common venue,2nd Most common venue,3rd Most common venue,4th Most common venue,5th Most common venue,6th Most common venue,7th Most common venue,8th Most common venue,9th Most common venue,10th Most common venue
33,Downtown Toronto,4,Park,Playground,Trail,Yoga Studio,Dance Studio,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Distribution Center
