### **Segmenting and Clustering Neighborhoods in Toronto (Third Step)**

##### **Import Libraries and Data from Second Step**

In [1]:
import pandas as pd
import numpy as np
import json
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
import requests

df_toronto = pd.read_csv('toronto_part2.csv')
df_toronto = df_toronto.drop(['Unnamed: 0'], axis=1)
df_toronto.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M9W,Etobicoke,Northwest,43.706748,-79.594054
1,M5S,Downtown Toronto,"Harbord, University of Toronto",43.662696,-79.400049
2,M3J,North York,"Northwood Park, York University",43.76798,-79.487262
3,M2H,North York,Hillcrest Village,43.803762,-79.363452
4,M9C,Etobicoke,"Bloordale Gardens, Eringate, Markland Wood, Ol...",43.643515,-79.577201
5,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
6,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
7,M2N,North York,Willowdale South,43.77012,-79.408493
8,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.739416,-79.588437
9,M2R,North York,Willowdale West,43.782736,-79.442259


##### **Part 1: Create a Map of Toronto**

In [2]:
# create map of Toronto
map_toronto = folium.Map(location=[43.6532, -79.3832], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto

##### **Part 2: Explore the Borough North York**

In [None]:
#API Details
CLIENT_ID = 'MXLSID4D2C4YPFA1KZWJKY1QM4FJXXDXYHQZPPYF2V5PVFOI'
CLIENT_SECRET = 'F5CM2PTG4HREA3NY5311D2YKFIPHQO1EMWDQVEJLMN32LJMT'
VERSION = '20180605'

#New dataframe
northwest_data = df_toronto[df_toronto['Borough'] == 'North York'].reset_index(drop=True)

#Location Details
neighborhood_latitude = 43.706748
neighborhood_longitude = -79.594054
neighborhood_name = "Nort York"


#Top 100 venues
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

#Get results
results = requests.get(url).json()


#Extract the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

#Create pandas
venues = results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues)
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues.head()

print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

##### **Part 3: Explore all Neighborhoods in Toronto**

In [None]:
#Create the function
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)


#Create new dataframe
toronto_venues = getNearbyVenues(names=df_toronto['Neighbourhood'],
                                   latitudes=df_toronto['Latitude'],
                                   longitudes=df_toronto['Longitude']
                                  )

#Identify unique categories
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

##### **Part 4: Analyze Each Neighborhood**

In [57]:
#Encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

#Group rows by neighborhood
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()

#Top 5 common venues
num_top_venues = 5
for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    
#Sort venues in descending
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

#New dataframe
num_top_venues = 10
indicators = ['st', 'nd', 'rd']
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

----Adelaide, King, Richmond----
----Agincourt----
----Agincourt North, L'Amoreaux East, Milliken, Steeles East----
----Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown----
----Alderwood, Long Branch----
----Bathurst Manor, Downsview North, Wilson Heights----
----Bayview Village----
----Bedford Park, Lawrence Manor East----
----Berczy Park----
----Birch Cliff, Cliffside West----
----Bloordale Gardens, Eringate, Markland Wood, Old Burnhamthorpe----
----Brockton, Exhibition Place, Parkdale Village----
----Business reply mail Processing Centre969 Eastern----
----CFB Toronto, Downsview East----
----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
----Cabbagetown, St. James Town----
----Caledonia-Fairbanks----
----Canada Post Gateway Processing Centre----
----Cedarbrae----
----Central Bay Street----
----Chinatown, Grange Park, Kensington Market----
----Christie----
----

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Thai Restaurant,American Restaurant,Steakhouse,Bar,Bakery,Gym,Clothing Store,Hotel
1,Agincourt,Breakfast Spot,Lounge,Clothing Store,Skating Rink,Women's Store,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Playground,Park,Women's Store,Doner Restaurant,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Pizza Place,Beer Store,Fried Chicken Joint,Japanese Restaurant,Fast Food Restaurant,Discount Store,Pharmacy,Sandwich Place,Coffee Shop
4,"Alderwood, Long Branch",Pizza Place,Pharmacy,Skating Rink,Coffee Shop,Pool,Pub,Sandwich Place,Gym,Airport Lounge,Dance Studio
5,"Bathurst Manor, Downsview North, Wilson Heights",Coffee Shop,Frozen Yogurt Shop,Diner,Sandwich Place,Bridal Shop,Fast Food Restaurant,Deli / Bodega,Bank,Restaurant,Middle Eastern Restaurant
6,Bayview Village,Chinese Restaurant,Japanese Restaurant,Café,Bank,Doner Restaurant,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Women's Store
7,"Bedford Park, Lawrence Manor East",Fast Food Restaurant,Juice Bar,Coffee Shop,Italian Restaurant,Restaurant,Sandwich Place,Sushi Restaurant,Pub,Thai Restaurant,Butcher
8,Berczy Park,Coffee Shop,Restaurant,Cocktail Bar,Steakhouse,Café,Bakery,Italian Restaurant,Pub,Cheese Shop,Farmers Market
9,"Birch Cliff, Cliffside West",General Entertainment,College Stadium,Skating Rink,Café,Women's Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run


##### **Part 5: Cluster Neighborhoods**

In [1]:
#K-cluster
kclusters = 5
toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)
kmeans.labels_[0:10]

#New Dataframe
toronto_merged = northwest_data.drop(24)
toronto_merged['Cluster Labels'] = kmeans.labels_
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Borough'), on='Borough')

#Create map
map_clusters = folium.Map(location=[43.6532, -79.3832], zoom_start=11)
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Borough'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

NameError: name 'toronto_grouped' is not defined

In [None]:
northwest_data