# Capstone Project: Determination of Toronto Neighborhood that Best Suits Mr. Smith Preferences

#### Step 1: Install necessary packages for the analysis (Geopy and Folium)

In [1]:
!conda install -c conda-forge geopy --yes
!conda install -c conda-forge folium=0.5.0 --yes

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2020.4.5.1 |       hecc5488_0         146 KB  conda-forge
    certifi-2020.4.5.1         |   py36h9f0ad1d_0         151 KB  conda-forge
    geopy-1.21.0               |             py_0          58 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    openssl-1.1.1f             |       h516909a_0         2.1 MB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0         conda-forge
    geopy:           1

#### Step 2: Import necessary packages for analysis (i.e. pandas, numpy, etc.)

In [3]:
import pandas as pd
import numpy as np
import io
import requests
from sklearn.cluster import KMeans
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
import json
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize

#### Step 3: Import lists of Postal codes, Borough and Neighborhoods in Toronto from Wikipedia. All postal codes beginning with M are located within the city of Toronto. 

In [4]:
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]

#### Step 4: Data cleaning performed. 
1. Postal codes with no Boroughs assigned are removed from the list. This means that neighborhood does not exist in those areas. 
2. For postal codes with assigned boroughs but not assigned neighborhoods, the neighborhood name is assumed to be the borough name. 

In [5]:
df['Borough'].replace("Not assigned", np.NaN, inplace=True)
df.dropna(subset=['Borough'], axis=0, inplace=True)
df["Neighborhood"] = df["Neighborhood"].str.replace(" /",",")
df["Neighborhood"].replace("Not assigned", df["Borough"], inplace=True)
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [6]:
df.shape

(103, 3)

After data cleaning, 103 unique postal codes (i.e. neighborhoods) exist in Toronto City. 

#### Step 5: Extract data from the csv sheet containing the geospatial coordinates of each postcode. Join both tables together based on the postal codes to create a new dataframe. 

In [7]:
url="http://cocl.us/Geospatial_data"
s=requests.get(url).content
data_lat_log=pd.read_csv(io.StringIO(s.decode('utf-8')))
df_new = df.merge(data_lat_log, left_on='Postal code', right_on="Postal Code")
df_new.drop('Postal Code', 1, inplace=True)
df_new.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [15]:
df_new["Borough"].value_counts()

North York          24
Downtown Toronto    19
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
York                 5
East York            5
East Toronto         5
Mississauga          1
Name: Borough, dtype: int64

In [16]:
toronto_data=df_new;

#### Step 6: Determine Toronto, Canada Coordinates for Folium Map

In [17]:
address = 'Toronto, ON, Canada'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto, ON, Canada are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto, ON, Canada are 43.6534817, -79.3839347.


#### Step 7: Show Toronto on Map and show neighborhoods on the map. 

In [20]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Step 8: Foursquare API Client ID and SECRET for data extraction 

In [26]:
CLIENT_ID = '0NEISBFG0NMNCFEV3VSB453GQ2OWCD4EOXWCSUBZ0XW3I1AU' # your Foursquare ID
CLIENT_SECRET = 'LZJEMZVSDGVN434JTJ0MJZV2TDO3AYMRCBDSVMONVIC5REH1' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 0NEISBFG0NMNCFEV3VSB453GQ2OWCD4EOXWCSUBZ0XW3I1AU
CLIENT_SECRET:LZJEMZVSDGVN434JTJ0MJZV2TDO3AYMRCBDSVMONVIC5REH1


#### Step 9: Get the nearby venues for each neighborhood centre (based on coordinates) on Foursquare. Radius from the coordinates is set as 1000 m and limit results to 100. 

In [29]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000, LIMIT=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [30]:
toronto_venues = getNearbyVenues(names=toronto_data['Neighborhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  );

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmount Park
Bayview Village
Downsview
The Danforth West, Ri

In [33]:
print(toronto_venues.shape)
toronto_venues.head()

(4952, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Allwyn's Bakery,43.75984,-79.324719,Caribbean Restaurant
1,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
2,Parkwoods,43.753259,-79.329656,Tim Hortons,43.760668,-79.326368,Café
3,Parkwoods,43.753259,-79.329656,A&W,43.760643,-79.326865,Fast Food Restaurant
4,Parkwoods,43.753259,-79.329656,Bruno's valu-mart,43.746143,-79.32463,Grocery Store


In [34]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,50,50,50,50,50,50
"Alderwood, Long Branch",25,25,25,25,25,25
"Bathurst Manor, Wilson Heights, Downsview North",31,31,31,31,31,31
Bayview Village,15,15,15,15,15,15
"Bedford Park, Lawrence Manor East",41,41,41,41,41,41
Berczy Park,100,100,100,100,100,100
"Birch Cliff, Cliffside West",12,12,12,12,12,12
"Brockton, Parkdale Village, Exhibition Place",100,100,100,100,100,100
Business reply mail Processing CentrE,48,48,48,48,48,48
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",15,15,15,15,15,15


#### Step 10: Check how many unique venue categories are there in each neighborhood. 

In [35]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 327 uniques categories.


#### Step 11: Apply one hot encoding to transform venue categories to categorical variables. 

In [40]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Zoo,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,Arcade,...,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [37]:
toronto_onehot.shape

(4952, 327)

#### Step 12: Determine the mean frequency of each venue category in the neighborhood. 

In [41]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Zoo,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,...,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.000000,0.000000,0.000000,0.020000,0.0,0.0,0.00,0.00,...,0.000000,0.000000,0.020000,0.000000,0.0,0.0000,0.000000,0.000000,0.000000,0.000000
1,"Alderwood, Long Branch",0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.00,0.00,...,0.000000,0.000000,0.000000,0.000000,0.0,0.0000,0.000000,0.000000,0.000000,0.000000
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.00,0.00,...,0.000000,0.032258,0.000000,0.000000,0.0,0.0000,0.000000,0.000000,0.000000,0.000000
3,Bayview Village,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.00,0.00,...,0.000000,0.000000,0.000000,0.000000,0.0,0.0000,0.000000,0.000000,0.000000,0.000000
4,"Bedford Park, Lawrence Manor East",0.0,0.000000,0.000000,0.000000,0.024390,0.0,0.0,0.00,0.00,...,0.000000,0.024390,0.000000,0.000000,0.0,0.0000,0.000000,0.024390,0.000000,0.000000
5,Berczy Park,0.0,0.000000,0.000000,0.000000,0.020000,0.0,0.0,0.00,0.00,...,0.000000,0.000000,0.000000,0.000000,0.0,0.0000,0.000000,0.000000,0.000000,0.000000
6,"Birch Cliff, Cliffside West",0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.00,0.00,...,0.000000,0.000000,0.000000,0.000000,0.0,0.0000,0.000000,0.000000,0.000000,0.000000
7,"Brockton, Parkdale Village, Exhibition Place",0.0,0.010000,0.000000,0.000000,0.010000,0.0,0.0,0.00,0.00,...,0.000000,0.000000,0.000000,0.000000,0.0,0.0000,0.000000,0.000000,0.000000,0.000000
8,Business reply mail Processing CentrE,0.0,0.000000,0.000000,0.000000,0.020833,0.0,0.0,0.00,0.00,...,0.000000,0.000000,0.000000,0.000000,0.0,0.0000,0.000000,0.000000,0.000000,0.000000
9,"CN Tower, King and Spadina, Railway Lands, Har...",0.0,0.000000,0.000000,0.066667,0.000000,0.0,0.0,0.00,0.00,...,0.000000,0.000000,0.000000,0.000000,0.0,0.0000,0.000000,0.000000,0.000000,0.000000


In [42]:
toronto_grouped.shape

(97, 327)

#### Step 13: Find the top 5 (most common) venues in each neighborhood. 

In [44]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [80]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted_most = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted_most['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted_most.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted_most.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Shopping Mall,Bakery,Coffee Shop,Sandwich Place,Pizza Place,Caribbean Restaurant,Sri Lankan Restaurant,Indian Restaurant,Discount Store
1,"Alderwood, Long Branch",Discount Store,Pharmacy,Pizza Place,Skating Rink,Sandwich Place,Garden Center,Gas Station,Donut Shop,Liquor Store,Shopping Mall
2,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Pizza Place,Convenience Store,Coffee Shop,Trail,Sandwich Place,Dog Run,Fried Chicken Joint,Frozen Yogurt Shop,Middle Eastern Restaurant
3,Bayview Village,Grocery Store,Bank,Gas Station,Japanese Restaurant,Intersection,Chinese Restaurant,Park,Shopping Mall,Restaurant,Café
4,"Bedford Park, Lawrence Manor East",Italian Restaurant,Coffee Shop,Restaurant,Fast Food Restaurant,Sandwich Place,Bank,Juice Bar,Thai Restaurant,Bagel Shop,Bakery


#### Step 14: Find the bottom 5 (least common) venues in each neighborhood. 

In [81]:
def return_least_common_venues(row, num_bottom_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=True)
    
    return row_categories_sorted.index.values[0:num_bottom_venues]

In [84]:
num_bottom_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of least common venues
columns = ['Neighborhood']
for ind in np.arange(num_bottom_venues):
    try:
        columns.append('{}{} Least Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Least Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted_least = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted_least['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted_least.iloc[ind, 1:] = return_least_common_venues(toronto_grouped.iloc[ind, :], num_bottom_venues)

neighborhoods_venues_sorted_least.head()

Unnamed: 0,Neighborhood,1st Least Common Venue,2nd Least Common Venue,3rd Least Common Venue,4th Least Common Venue,5th Least Common Venue,6th Least Common Venue,7th Least Common Venue,8th Least Common Venue,9th Least Common Venue,10th Least Common Venue
0,Agincourt,Zoo,Music Store,Music School,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Music Venue,Monument / Landmark,Mobile Phone Shop
1,"Alderwood, Long Branch",Zoo,Nightclub,New American Restaurant,Nail Salon,Music Venue,Music Store,Music School,Museum,Moving Target,Movie Theater
2,"Bathurst Manor, Wilson Heights, Downsview North",Zoo,Nail Salon,Music Venue,Music Store,Music School,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark
3,Bayview Village,Zoo,Optical Shop,Opera House,Office,Noodle House,Nightclub,New American Restaurant,Nail Salon,Organic Grocery,Music Venue
4,"Bedford Park, Lawrence Manor East",Zoo,Nail Salon,Music Venue,Music Store,Music School,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark


#### Step 15: Join the top (most common) 10 venue categories and bottom (least common) 10 venue categories together to create a new dataframe consisting of both most common and least common data. 

In [190]:
data_toronto_filtered = neighborhoods_venues_sorted_most.merge(neighborhoods_venues_sorted_least, left_on='Neighborhood', right_on="Neighborhood")
#df_new.drop('Postal Code', 1, inplace=True)
data_toronto_filtered.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,...,1st Least Common Venue,2nd Least Common Venue,3rd Least Common Venue,4th Least Common Venue,5th Least Common Venue,6th Least Common Venue,7th Least Common Venue,8th Least Common Venue,9th Least Common Venue,10th Least Common Venue
0,Agincourt,Chinese Restaurant,Shopping Mall,Bakery,Coffee Shop,Sandwich Place,Pizza Place,Caribbean Restaurant,Sri Lankan Restaurant,Indian Restaurant,...,Zoo,Music Store,Music School,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Music Venue,Monument / Landmark,Mobile Phone Shop
1,"Alderwood, Long Branch",Discount Store,Pharmacy,Pizza Place,Skating Rink,Sandwich Place,Garden Center,Gas Station,Donut Shop,Liquor Store,...,Zoo,Nightclub,New American Restaurant,Nail Salon,Music Venue,Music Store,Music School,Museum,Moving Target,Movie Theater
2,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Pizza Place,Convenience Store,Coffee Shop,Trail,Sandwich Place,Dog Run,Fried Chicken Joint,Frozen Yogurt Shop,...,Zoo,Nail Salon,Music Venue,Music Store,Music School,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark
3,Bayview Village,Grocery Store,Bank,Gas Station,Japanese Restaurant,Intersection,Chinese Restaurant,Park,Shopping Mall,Restaurant,...,Zoo,Optical Shop,Opera House,Office,Noodle House,Nightclub,New American Restaurant,Nail Salon,Organic Grocery,Music Venue
4,"Bedford Park, Lawrence Manor East",Italian Restaurant,Coffee Shop,Restaurant,Fast Food Restaurant,Sandwich Place,Bank,Juice Bar,Thai Restaurant,Bagel Shop,...,Zoo,Nail Salon,Music Venue,Music Store,Music School,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark


In [104]:
data_toronto_filtered.shape

(97, 21)

#### Step 16: Run K-means clustering algorithm for the entire dataset. 

In [189]:
# set number of clusters
from sklearn.cluster import KMeans
kclusters = 7
toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
# kmeans.labels_[0:20] 

In [191]:
# add clustering labels
data_toronto_filtered.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(data_toronto_filtered.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,...,1st Least Common Venue,2nd Least Common Venue,3rd Least Common Venue,4th Least Common Venue,5th Least Common Venue,6th Least Common Venue,7th Least Common Venue,8th Least Common Venue,9th Least Common Venue,10th Least Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,3.0,Park,Convenience Store,Pharmacy,Shopping Mall,...,Zoo,New American Restaurant,Nail Salon,Music Venue,Music Store,Music School,Museum,Moving Target,Movie Theater,Moroccan Restaurant
1,M4A,North York,Victoria Village,43.725882,-79.315572,3.0,Coffee Shop,Boxing Gym,Hockey Arena,Sporting Goods Shop,...,Zoo,Optical Shop,Opera House,Office,Noodle House,Nightclub,New American Restaurant,Nail Salon,Organic Grocery,Music Venue
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,1.0,Coffee Shop,Theater,Diner,Café,...,Zoo,Music Venue,Music Store,Music School,Moving Target,Movie Theater,Moroccan Restaurant,Nail Salon,Monument / Landmark,Mobile Phone Shop
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,0.0,Fast Food Restaurant,Coffee Shop,Restaurant,Accessories Store,...,Zoo,Nail Salon,Music Venue,Music Store,Music School,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,1.0,Coffee Shop,Park,Sushi Restaurant,Italian Restaurant,...,Zoo,Music Venue,Music Store,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark,Nail Salon,Mobile Phone Shop,Metro Station


#### Step 17: Let's have a look on the results cluster by cluster.

# Cluster 1 (below) are neighborhoods most common for restaurants / pubs / coffee shops. It is definitely not somewhere where Mr. Smith wants to live in.

In [247]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,...,1st Least Common Venue,2nd Least Common Venue,3rd Least Common Venue,4th Least Common Venue,5th Least Common Venue,6th Least Common Venue,7th Least Common Venue,8th Least Common Venue,9th Least Common Venue,10th Least Common Venue
3,North York,0.0,Fast Food Restaurant,Coffee Shop,Restaurant,Accessories Store,Dessert Shop,Vietnamese Restaurant,Fried Chicken Joint,Sushi Restaurant,...,Zoo,Nail Salon,Music Venue,Music Store,Music School,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark
6,Scarborough,0.0,Fast Food Restaurant,Trail,Coffee Shop,Hobby Shop,Restaurant,Bus Station,Supermarket,Caribbean Restaurant,...,Zoo,Office,Noodle House,Nightclub,New American Restaurant,Nail Salon,Music Venue,Music Store,Opera House,Music School
7,North York,0.0,Restaurant,Coffee Shop,Japanese Restaurant,Asian Restaurant,Burger Joint,Gym,Supermarket,Bank,...,Zoo,Noodle House,New American Restaurant,Nail Salon,Music Venue,Music Store,Music School,Museum,Moving Target,Movie Theater
10,North York,0.0,Grocery Store,Fast Food Restaurant,Gym,Pizza Place,Gas Station,Park,Coffee Shop,Bank,...,Zoo,Office,Noodle House,Nightclub,New American Restaurant,Nail Salon,Music Venue,Music Store,Music School,Museum
13,North York,0.0,Restaurant,Coffee Shop,Japanese Restaurant,Asian Restaurant,Burger Joint,Gym,Supermarket,Bank,...,Zoo,Noodle House,New American Restaurant,Nail Salon,Music Venue,Music Store,Music School,Museum,Moving Target,Movie Theater
16,York,0.0,Convenience Store,Pizza Place,Coffee Shop,Sushi Restaurant,Grocery Store,Sandwich Place,Bakery,Field,...,Zoo,Office,Noodle House,Nightclub,New American Restaurant,Nail Salon,Music Venue,Music Store,Music School,Museum
18,Scarborough,0.0,Pizza Place,Bank,Coffee Shop,Fast Food Restaurant,Grocery Store,Sports Bar,Food & Drink Shop,Electronics Store,...,Zoo,Noodle House,Nightclub,New American Restaurant,Nail Salon,Music Venue,Music Store,Music School,Museum,Moving Target
23,East York,0.0,Coffee Shop,Sporting Goods Shop,Furniture / Home Store,Grocery Store,Electronics Store,Brewery,Bank,Restaurant,...,Zoo,Nail Salon,Music Venue,Music Store,Music School,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark
29,East York,0.0,Coffee Shop,Indian Restaurant,Grocery Store,Brewery,Gym,Supermarket,Burger Joint,Shopping Mall,...,Zoo,New American Restaurant,Nail Salon,Music Venue,Music Store,Music School,Museum,Moving Target,Movie Theater,Moroccan Restaurant
32,Scarborough,0.0,Ice Cream Shop,Fast Food Restaurant,Coffee Shop,Train Station,Grocery Store,Restaurant,Bowling Alley,Japanese Restaurant,...,Zoo,Optical Shop,Opera House,Office,Noodle House,Nightclub,New American Restaurant,Nail Salon,Organic Grocery,Music Venue


 # Cluster 2 (below) neighborhoods have a lot of coffee shops but has a park too. Looks like a busy neighborhood too. Mr. Smith do not prefer this cluster. 

In [248]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,...,1st Least Common Venue,2nd Least Common Venue,3rd Least Common Venue,4th Least Common Venue,5th Least Common Venue,6th Least Common Venue,7th Least Common Venue,8th Least Common Venue,9th Least Common Venue,10th Least Common Venue
2,Downtown Toronto,1.0,Coffee Shop,Theater,Diner,Café,Park,Pub,Breakfast Spot,Italian Restaurant,...,Zoo,Music Venue,Music Store,Music School,Moving Target,Movie Theater,Moroccan Restaurant,Nail Salon,Monument / Landmark,Mobile Phone Shop
4,Downtown Toronto,1.0,Coffee Shop,Park,Sushi Restaurant,Italian Restaurant,Japanese Restaurant,Gastropub,Ramen Restaurant,Restaurant,...,Zoo,Music Venue,Music Store,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark,Nail Salon,Mobile Phone Shop,Metro Station
9,Downtown Toronto,1.0,Coffee Shop,Gastropub,Japanese Restaurant,Café,Theater,Italian Restaurant,Restaurant,Plaza,...,Zoo,Music Store,Music School,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Nail Salon,Monument / Landmark,Miscellaneous Shop
15,Downtown Toronto,1.0,Coffee Shop,Café,Restaurant,Gastropub,Seafood Restaurant,Hotel,Italian Restaurant,Theater,...,Zoo,Nail Salon,Music Store,Music School,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark,Modern European Restaurant,Mobile Phone Shop
19,East Toronto,1.0,Pub,Coffee Shop,Pizza Place,Breakfast Spot,Bakery,Beach,Japanese Restaurant,Burger Joint,...,Zoo,Music Store,Music School,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Music Venue,Monument / Landmark,Mobile Phone Shop
20,Downtown Toronto,1.0,Coffee Shop,Café,Hotel,Japanese Restaurant,Restaurant,Seafood Restaurant,Park,Gastropub,...,Zoo,Music Store,Music School,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark,Music Venue,Modern European Restaurant,Miscellaneous Shop
24,Downtown Toronto,1.0,Coffee Shop,Ramen Restaurant,Diner,Café,Clothing Store,Sushi Restaurant,Japanese Restaurant,Gastropub,...,Zoo,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark,Mobile Phone Shop,Music School,Miscellaneous Shop,Men's Store
25,Downtown Toronto,1.0,Korean Restaurant,Café,Coffee Shop,Grocery Store,Ice Cream Shop,Mexican Restaurant,Cocktail Bar,Indian Restaurant,...,Zoo,Music Venue,Music School,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Nail Salon,Monument / Landmark,Mobile Phone Shop
30,Downtown Toronto,1.0,Coffee Shop,Café,Theater,Hotel,Japanese Restaurant,American Restaurant,Restaurant,Concert Hall,...,Zoo,Moroccan Restaurant,Monument / Landmark,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Middle Eastern Restaurant,Moving Target,Mexican Restaurant,Men's Store
31,West Toronto,1.0,Café,Park,Coffee Shop,Italian Restaurant,Sushi Restaurant,Bar,Bakery,Portuguese Restaurant,...,Zoo,New American Restaurant,Nail Salon,Music Store,Music School,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark


 # Cluster 3 (below) neighborhoods has a park but nothing much that meets Mr. Smith's preferences. It does not have a grocery store and public transport ammenities nearby

In [252]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,...,1st Least Common Venue,2nd Least Common Venue,3rd Least Common Venue,4th Least Common Venue,5th Least Common Venue,6th Least Common Venue,7th Least Common Venue,8th Least Common Venue,9th Least Common Venue,10th Least Common Venue
12,Scarborough,2.0,Breakfast Spot,Park,Playground,Burger Joint,Italian Restaurant,Yoga Studio,Event Space,Dumpling Restaurant,...,Zoo,Organic Grocery,Optical Shop,Opera House,Office,Noodle House,Nightclub,New American Restaurant,Other Great Outdoors,Nail Salon
101,Etobicoke,2.0,Park,Italian Restaurant,Eastern European Restaurant,Ice Cream Shop,Gym / Fitness Center,Ethiopian Restaurant,Donut Shop,Dry Cleaner,...,Zoo,Other Great Outdoors,Organic Grocery,Optical Shop,Opera House,Office,Noodle House,Nightclub,Other Repair Shop,New American Restaurant


# Cluster 4 (below) shows neighborhood that is quite balanced. Although some neighborhoods still have the most common venue as coffee shop, is not that common compared to Cluster 1 and 2. Mr. Smith can choose from a neighborhood from this list!

In [253]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,...,1st Least Common Venue,2nd Least Common Venue,3rd Least Common Venue,4th Least Common Venue,5th Least Common Venue,6th Least Common Venue,7th Least Common Venue,8th Least Common Venue,9th Least Common Venue,10th Least Common Venue
0,North York,3.0,Park,Convenience Store,Pharmacy,Shopping Mall,Bus Stop,Food & Drink Shop,Laundry Service,Caribbean Restaurant,...,Zoo,New American Restaurant,Nail Salon,Music Venue,Music Store,Music School,Museum,Moving Target,Movie Theater,Moroccan Restaurant
1,North York,3.0,Coffee Shop,Boxing Gym,Hockey Arena,Sporting Goods Shop,Park,Portuguese Restaurant,Pizza Place,Golf Course,...,Zoo,Optical Shop,Opera House,Office,Noodle House,Nightclub,New American Restaurant,Nail Salon,Organic Grocery,Music Venue
5,Etobicoke,3.0,Pharmacy,Grocery Store,Skating Rink,Bank,Convenience Store,Playground,Bakery,Shopping Mall,...,Zoo,Office,Noodle House,Nightclub,New American Restaurant,Nail Salon,Music Venue,Music Store,Opera House,Music School
8,East York,3.0,Pizza Place,Brewery,Fast Food Restaurant,Athletics & Sports,Soccer Stadium,Bus Line,Breakfast Spot,Café,...,Zoo,Opera House,Office,Noodle House,Nightclub,New American Restaurant,Nail Salon,Music Venue,Optical Shop,Music Store
11,Etobicoke,3.0,Park,Hotel,Pizza Place,Theater,Restaurant,Bank,Fish & Chips Shop,Clothing Store,...,Zoo,Optical Shop,Opera House,Office,Noodle House,Nightclub,New American Restaurant,Nail Salon,Organic Grocery,Music Venue
14,East York,3.0,Coffee Shop,Park,Pizza Place,Sandwich Place,Thai Restaurant,Cosmetics Shop,Athletics & Sports,Bus Line,...,Zoo,New American Restaurant,Nail Salon,Music Venue,Music Store,Music School,Museum,Moving Target,Movie Theater,Moroccan Restaurant
17,Etobicoke,3.0,Coffee Shop,Fish & Chips Shop,Shopping Mall,Shopping Plaza,Liquor Store,Beer Store,Café,Park,...,Zoo,Opera House,Office,Noodle House,Nightclub,New American Restaurant,Nail Salon,Music Venue,Optical Shop,Music Store
21,York,3.0,Mexican Restaurant,Pharmacy,Park,Bank,Grocery Store,Beer Store,Bakery,Falafel Restaurant,...,Zoo,Opera House,Office,Noodle House,Nightclub,New American Restaurant,Nail Salon,Music Venue,Music Store,Music School
22,Scarborough,3.0,Coffee Shop,Park,Mobile Phone Shop,Chinese Restaurant,Fast Food Restaurant,Indian Restaurant,Pharmacy,Dumpling Restaurant,...,Zoo,Organic Grocery,Optical Shop,Opera House,Office,Noodle House,Nightclub,New American Restaurant,Other Great Outdoors,Nail Salon
26,Scarborough,3.0,Bakery,Coffee Shop,Bank,Gas Station,Indian Restaurant,Yoga Studio,Burger Joint,Board Shop,...,Zoo,Opera House,Office,Noodle House,Nightclub,New American Restaurant,Nail Salon,Music Venue,Music School,Museum


# Cluster 5 (below) only has park that fits to Mr. Smith preferences. Maybe not the best!

In [254]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 5, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,...,1st Least Common Venue,2nd Least Common Venue,3rd Least Common Venue,4th Least Common Venue,5th Least Common Venue,6th Least Common Venue,7th Least Common Venue,8th Least Common Venue,9th Least Common Venue,10th Least Common Venue
45,North York,5.0,Park,Pool,Yoga Studio,Falafel Restaurant,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,...,Zoo,Organic Grocery,Optical Shop,Opera House,Office,Noodle House,Nightclub,New American Restaurant,Other Great Outdoors,Nail Salon


# Cluster 6 most common venue is a hotel. Nothing from the top 10 meets Mr. Smith preferences. Not this cluster!

In [256]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 6, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,...,1st Least Common Venue,2nd Least Common Venue,3rd Least Common Venue,4th Least Common Venue,5th Least Common Venue,6th Least Common Venue,7th Least Common Venue,8th Least Common Venue,9th Least Common Venue,10th Least Common Venue
94,Etobicoke,6.0,Hotel,Rental Car Location,Coffee Shop,Yoga Studio,Falafel Restaurant,Dry Cleaner,Dumpling Restaurant,Eastern European Restaurant,...,Zoo,Other Great Outdoors,Organic Grocery,Optical Shop,Opera House,Office,Noodle House,Nightclub,Other Repair Shop,New American Restaurant


# From the analysis, it seems like neighborhoods from Cluster 4 seems like the best for Mr. Smith to settle down. He can use this cluster as a starting point to choose the neighborhood that suits him and his family the best. Summary of the neighborhoods and the location of each one is pointed in the map below. 

In [295]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
toronto_merged=toronto_merged[(toronto_merged['Cluster Labels'])==3]
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
        label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
        folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.7).add_to(map_clusters)
map_clusters

In [294]:
toronto_merged

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,...,1st Least Common Venue,2nd Least Common Venue,3rd Least Common Venue,4th Least Common Venue,5th Least Common Venue,6th Least Common Venue,7th Least Common Venue,8th Least Common Venue,9th Least Common Venue,10th Least Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,3.0,Park,Convenience Store,Pharmacy,Shopping Mall,...,Zoo,New American Restaurant,Nail Salon,Music Venue,Music Store,Music School,Museum,Moving Target,Movie Theater,Moroccan Restaurant
1,M4A,North York,Victoria Village,43.725882,-79.315572,3.0,Coffee Shop,Boxing Gym,Hockey Arena,Sporting Goods Shop,...,Zoo,Optical Shop,Opera House,Office,Noodle House,Nightclub,New American Restaurant,Nail Salon,Organic Grocery,Music Venue
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242,3.0,Pharmacy,Grocery Store,Skating Rink,Bank,...,Zoo,Office,Noodle House,Nightclub,New American Restaurant,Nail Salon,Music Venue,Music Store,Opera House,Music School
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,3.0,Pizza Place,Brewery,Fast Food Restaurant,Athletics & Sports,...,Zoo,Opera House,Office,Noodle House,Nightclub,New American Restaurant,Nail Salon,Music Venue,Optical Shop,Music Store
11,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov...",43.650943,-79.554724,3.0,Park,Hotel,Pizza Place,Theater,...,Zoo,Optical Shop,Opera House,Office,Noodle House,Nightclub,New American Restaurant,Nail Salon,Organic Grocery,Music Venue
14,M4C,East York,Woodbine Heights,43.695344,-79.318389,3.0,Coffee Shop,Park,Pizza Place,Sandwich Place,...,Zoo,New American Restaurant,Nail Salon,Music Venue,Music Store,Music School,Museum,Moving Target,Movie Theater,Moroccan Restaurant
17,M9C,Etobicoke,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.643515,-79.577201,3.0,Coffee Shop,Fish & Chips Shop,Shopping Mall,Shopping Plaza,...,Zoo,Opera House,Office,Noodle House,Nightclub,New American Restaurant,Nail Salon,Music Venue,Optical Shop,Music Store
21,M6E,York,Caledonia-Fairbanks,43.689026,-79.453512,3.0,Mexican Restaurant,Pharmacy,Park,Bank,...,Zoo,Opera House,Office,Noodle House,Nightclub,New American Restaurant,Nail Salon,Music Venue,Music Store,Music School
22,M1G,Scarborough,Woburn,43.770992,-79.216917,3.0,Coffee Shop,Park,Mobile Phone Shop,Chinese Restaurant,...,Zoo,Organic Grocery,Optical Shop,Opera House,Office,Noodle House,Nightclub,New American Restaurant,Other Great Outdoors,Nail Salon
26,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,3.0,Bakery,Coffee Shop,Bank,Gas Station,...,Zoo,Opera House,Office,Noodle House,Nightclub,New American Restaurant,Nail Salon,Music Venue,Music School,Museum
