In [80]:
import geocoder
import project_part1 as p1
import project_part2 as p2
import folium
import requests
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

In [2]:
df = p1.getDataframe() #Import the data
df = p2.getCoordinates(df) #Get the coordinates

In [4]:
df_toronto = df[df['Borough'].str.contains("Toronto")]

In [5]:
g = geocoder.arcgis("Toronto, Canada")
coordinates = g.latlng
tor_lat = coordinates[0]
tor_lon = coordinates[1]

In [179]:
map_toronto = folium.Map(location=[tor_lat, tor_lon], zoom_start=12)

In [204]:
for i in range(len(df_toronto)):
    label = '{}, {}'.format(df_toronto['Neighbourhood'].iloc[i], df_toronto['Borough'].iloc[i])
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [df_toronto['Latitude'].iloc[i], df_toronto['Longitude'].iloc[i]],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)

In [209]:
map_toronto

In [182]:
CLIENT_ID = 'ZULB5EIMFUDOCGAK1OF54YICODJZ4PZG1WSEUZD2UAVIQPKY' # your Foursquare ID
CLIENT_SECRET = 'X2D4BLU4K4FC3PFHAXXSLLBZZBAJFRS35LAXJ2SPUZTIOC2E' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

In [202]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [203]:
toronto_venues = getNearbyVenues(
                                names = df_toronto['Neighbourhood'],
                                latitudes = df_toronto['Latitude'],
                                longitudes = df_toronto['Longitude']
                                )

In [190]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
toronto_onehot = toronto_onehot.drop("Neighborhood",1)
toronto_onehot.insert(0, 'Neighborhood', toronto_venues['Neighborhood'], True)

In [191]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()

In [192]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [193]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

In [194]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

In [195]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_toronto

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

In [196]:
toronto_merged.dropna(inplace=True)

In [207]:
# create map
map_clusters = folium.Map(location=[tor_lat, tor_lon], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# In the following cluster, the most common venue appears to be coffee shops.

In [198]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]].head()

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,0.0,Coffee Shop,Breakfast Spot,Yoga Studio,Italian Restaurant,Spa,Event Space,Food Truck,Electronics Store,Bakery,Thai Restaurant
4,Downtown Toronto,0.0,Coffee Shop,Sandwich Place,Mediterranean Restaurant,Italian Restaurant,Café,Falafel Restaurant,Fried Chicken Joint,Bank,Theater,Gastropub
9,Downtown Toronto,0.0,Coffee Shop,Clothing Store,Café,Cosmetics Shop,Japanese Restaurant,Furniture / Home Store,Theater,Ramen Restaurant,Bookstore,Movie Theater
15,Downtown Toronto,0.0,Coffee Shop,Cocktail Bar,Restaurant,Clothing Store,Gastropub,Café,Hotel,Cosmetics Shop,Beer Bar,Lingerie Store
20,Downtown Toronto,0.0,Coffee Shop,Beer Bar,Farmers Market,Seafood Restaurant,Cheese Shop,Breakfast Spot,Restaurant,Cocktail Bar,Bakery,Museum


# The most common venue in this cluster seems to be bus lines.

In [199]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]].head()

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
61,Central Toronto,1.0,Bus Line,Swim School,Yoga Studio,Food,Flea Market,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm


# In the following cluster, the most common venues look to be places of exercise or play (parks, playgrounds, pools, etc.)

In [200]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]].head()

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
68,Central Toronto,2.0,French Restaurant,Park,Yoga Studio,Eastern European Restaurant,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
73,Central Toronto,2.0,Playground,Gym Pool,Park,Yoga Studio,Dumpling Restaurant,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant


# The most common venue in this cluster seems to be health food stores.

In [201]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]].head()

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,East Toronto,3.0,Health Food Store,Pub,Trail,Yoga Studio,Dumpling Restaurant,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant


# In the following cluster, the most common venues look to be outdoors ventures.

In [178]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]].head()

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
83,Central Toronto,4.0,Playground,Gym,Trail,Dumpling Restaurant,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
