## Applied Data Science Capstone - Battle of Neigbourhood - New Coffee Shop in Toronto


This notebook is to provide solution for Applied Data Science Capstone Project 

### Import Libraries


In [2]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


### Third Party Libraries


In [3]:
!pip install wikipedia



### Toronto City breakup by Postal Code

In [106]:
import wikipedia as wp
 
#Get the html source
html = wp.page("List_of_postal_codes_of_Canada:_M").html().encode("UTF-8")
postalCodes = pd.read_html(html)[0]
postalCodes.columns = postalCodes.iloc[0]
postalCodes = postalCodes.reindex(postalCodes.index.drop(0))
postalCodes=postalCodes[postalCodes.Borough != 'Not assigned']
postalCodes = postalCodes.reset_index(drop=True)
print (postalCodes.shape)
postalCodes = postalCodes.groupby(['Postcode','Borough'])['Neighbourhood'].apply(', '.join).reset_index()
geoLocation = pd.read_csv('https://cocl.us/Geospatial_data')
geoLocation.columns = ['Postcode','Latitude','Longitude']
geoLocation.head()
neighborhoods = pd.merge(postalCodes,
                 geoLocation,left_on='Postcode', right_on='Postcode')
neighborhoods.columns = column_names = ['Postcode','Borough', 'Neighborhood', 'Latitude', 'Longitude'] 
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)
neighborhoods.head(5)


(212, 3)
The dataframe has 11 boroughs and 103 neighborhoods.


Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


#### Define Foursquare Credentials and Version

In [11]:
CLIENT_ID = '4LF1JFD5K2R3VSKEKVVYYVJ0ZAU3XADJMOSVF4MCXO2ZH5AC' # your Foursquare ID
CLIENT_SECRET = '3ITP4YAR5BRPDEMFLEVZGYI4F04A2LI3ITEI1JVPMVQINMBN' # your Foursquare Secret
VERSION = '20180604'


print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 4LF1JFD5K2R3VSKEKVVYYVJ0ZAU3XADJMOSVF4MCXO2ZH5AC
CLIENT_SECRET:3ITP4YAR5BRPDEMFLEVZGYI4F04A2LI3ITEI1JVPMVQINMBN


### Get Venues data From FourSquare API

In [121]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            1000, 
            100)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)


neighborhoods_venues = getNearbyVenues(names=neighborhoods['Neighborhood'],
                                   latitudes=neighborhoods['Latitude'],
                                   longitudes=neighborhoods['Longitude']
                                  )
neighborhoods_venues.head()


1

### Coffee Shop in Each NeighbourHood

In [92]:
neighborhoods_coffeeshop = neighborhoods_venues [neighborhoods_venues['Venue Category'] == 'Coffee Shop']
neighborhoods_coffeeshop_byneighbourhood = neighborhoods_coffeeshop.groupby('Neighborhood').count()
neighborhoods_coffeeshop_byneighbourhood.head()


Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",4,4,4,4,4,4
Agincourt,1,1,1,1,1,1
"Agincourt North, L'Amoreaux East, Milliken, Steeles East",1,1,1,1,1,1
"Albion Gardens, Beaumond Heights, Humbergate, Jamestown, Mount Olive, Silverstone, South Steeles, Thistletown",1,1,1,1,1,1
"Alderwood, Long Branch",1,1,1,1,1,1


## Map showing Coffee Shops in Toronto

In [116]:
address = 'Toronto, ON, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude, longitude))

# create map of Manhattan using latitude and longitude values
map_toronto_coffee = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, label in zip(neighborhoods_coffeeshop['Venue Latitude'], neighborhoods_coffeeshop['Venue Longitude'], neighborhoods_coffeeshop['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto_coffee)  
    
 
    
map_toronto_coffee

The geograpical coordinate of Toronto City are 43.653963, -79.387207.


### Neigbourhood with low density Coffee Shops

In [120]:
neighborhoods_coffeeshop_byneighbourhood.sort_values(['Venue'],ascending=False)
prospective_venues = neighborhoods_coffeeshop_byneighbourhood[neighborhoods_coffeeshop_byneighbourhood['Venue'] == 1]
prospective_venues.reset_index(level=0, inplace=True)
prospective_venues = pd.merge(prospective_venues,
                 neighborhoods,left_on='Neighborhood', right_on='Neighborhood')
prospective_venues = prospective_venues.drop(['Neighborhood Latitude','Neighborhood Longitude','Venue','Venue Latitude','Venue Longitude','Venue Category'],axis=1)
prospective_venues

Unnamed: 0,Neighborhood,Postcode,Borough,Latitude,Longitude
0,Agincourt,M1S,Scarborough,43.7942,-79.262029
1,"Agincourt North, L'Amoreaux East, Milliken, St...",M1V,Scarborough,43.815252,-79.284577
2,"Albion Gardens, Beaumond Heights, Humbergate, ...",M9V,Etobicoke,43.739416,-79.588437
3,"Alderwood, Long Branch",M8W,Etobicoke,43.602414,-79.543484
4,"Bloordale Gardens, Eringate, Markland Wood, Ol...",M9C,Etobicoke,43.643515,-79.577201
5,Caledonia-Fairbanks,M6E,York,43.689026,-79.453512
6,"Del Ray, Keelesdale, Mount Dennis, Silverthorn",M6M,York,43.691116,-79.476013
7,Downsview West,M3L,North York,43.739015,-79.506944
8,"Kingsview Village, Martin Grove Gardens, Richv...",M9R,Etobicoke,43.688905,-79.554724
9,Lawrence Park,M4N,Central Toronto,43.72802,-79.38879


### Suggestions for Neigbourhood's For new Coffee Shop

In [124]:
address = 'Toronto, ON, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude, longitude))

# create map of Manhattan using latitude and longitude values
map_toronto_coffee_prospect = folium.Map(location=[latitude, longitude], zoom_start=11)
 
    
# add markers to map
for lat, lng, label in zip(prospective_venues['Latitude'], prospective_venues['Longitude'], prospective_venues['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=25,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto_coffee_prospect)  
# add markers to map
for lat, lng, label in zip(neighborhoods_coffeeshop['Venue Latitude'], neighborhoods_coffeeshop['Venue Longitude'], neighborhoods_coffeeshop['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=2,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto_coffee_prospect)  
    
map_toronto_coffee_prospect

The geograpical coordinate of Toronto City are 43.653963, -79.387207.
