# Capstone Project

This notebook will be used for my IBM Data Science Capstone Project!

In [1]:
import pandas as pd
import numpy as np
import folium
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
from geopy.geocoders import Nominatim
import requests

In [2]:
tor_data=pd.read_csv('toronto_geo_data.csv').drop('Unnamed: 0', axis=1)
tor_data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [3]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geographical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geographical coordinate of Toronto are 43.653963, -79.387207.


In [4]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(tor_data['Latitude'], tor_data['Longitude'], tor_data['Borough'], tor_data['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [5]:
CLIENT_ID = 'YNC4DJD21CJ0M3BKMNQO5V021W3T1UO5MIAHAUWMFEBQJF0R' # your Foursquare ID
CLIENT_SECRET = 'NK5IXFBQH2WZKPIYCFVP2LLA2RVWTQVFSEIFMDEB3XDEY4Z1' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT=100

In [7]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Postcode', 
                  'Postcode Latitude', 
                  'Postcode Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Run the above function and save the forusquare data as a csv for faster performace in later runs of code.

In [8]:
#tor_venues = getNearbyVenues(names=tor_data['Postcode'],
                                   #latitudes=tor_data['Latitude'],
                                   #longitudes=tor_data['Longitude']
                                  #)
#tor_venues.to_csv('tor_venues_all.csv')

In [9]:
tor_venues=pd.read_csv('tor_venues_all.csv').drop('Unnamed: 0', axis=1)
tor_venues.head()

Unnamed: 0,Postcode,Postcode Latitude,Postcode Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M1B,43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,M1C,43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,M1C,43.784535,-79.160497,Scarborough Historical Society,43.788755,-79.162438,History Museum
3,M1E,43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
4,M1E,43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store


In [10]:
# one hot encoding
tor_onehot = pd.get_dummies(tor_venues[['Venue Category']], prefix="", prefix_sep="")

# add postcode column back to dataframe
tor_onehot['Postcode']=tor_venues["Postcode"]
# move neighborhood column to the first column
fixed_columns = [tor_onehot.columns[-1]] + list(tor_onehot.columns[:-1])
tor_onehot = tor_onehot[fixed_columns]

print(tor_onehot.shape)
tor_onehot.head()

(2236, 270)


Unnamed: 0,Postcode,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M1C,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M1C,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M1E,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M1E,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [29]:
tor_rest_all=tor_onehot.groupby(by='Postcode').count().reset_index()

tor_rest_all['Total Restaurants']=tor_rest_all.sum(axis=1)
tor_data['isin']=tor_data['Postcode'].isin(tor_rest_all['Postcode'])
tor_data_isin=tor_data[tor_data['isin']==True]
tor_rest_all['Postcode Latitude']=tor_data_isin["Latitude"]
tor_rest_all["Postcode Longitude"]=tor_data_isin["Longitude"]
tor_rest_all.head()

Unnamed: 0,Postcode,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio,Total Restaurants,Postcode Latitude,Postcode Longitude
0,M1B,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,269,43.806686,-79.194353
1,M1C,2,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,538,43.784535,-79.160497
2,M1E,8,8,8,8,8,8,8,8,8,...,8,8,8,8,8,8,8,2152,43.763573,-79.188711
3,M1G,3,3,3,3,3,3,3,3,3,...,3,3,3,3,3,3,3,807,43.770992,-79.216917
4,M1H,8,8,8,8,8,8,8,8,8,...,8,8,8,8,8,8,8,2152,43.773136,-79.239476


In [31]:
tor_rest_all.columns.str.contains('Restaurants')

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [13]:
tor_mex=tor_onehot.drop(tor_onehot.columns[~tor_onehot.columns.str.contains('Mexican')], axis=1)
# add postcode column back to dataframe
tor_mex['Postcode']=tor_venues["Postcode"]

# move neighborhood column to the first column
fixed_columns = [tor_mex.columns[-1]] + list(tor_mex.columns[:-1])
tor_mex = tor_mex[fixed_columns]
tor_mex=tor_mex.groupby('Postcode').count().reset_index()
tor_data['isin']=tor_data['Postcode'].isin(tor_mex['Postcode'])
tor_data_isin=tor_data[tor_data['isin']==True]
tor_mex['Postcode Latitude']=tor_data_isin["Latitude"]
tor_mex["Postcode Longitude"]=tor_data_isin["Longitude"]
tor_mex.head()

Unnamed: 0,Postcode,Mexican Restaurant,Postcode Latitude,Postcode Longitude
0,M1B,1,43.806686,-79.194353
1,M1C,2,43.784535,-79.160497
2,M1E,8,43.763573,-79.188711
3,M1G,3,43.770992,-79.216917
4,M1H,8,43.773136,-79.239476
