### import necessary Libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Folium installed')
print('Libraries imported.')

Folium installed
Libraries imported.


# 2. Data

Data that will be used to solve the problem:

1. Toronto neighborhood data
scraped from a Wikipedia page: https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M

2. a csv file that has the geographical coordinates of each postal code: https://cocl.us/Geospatial_data

3. Foursquare location data, with latitude and longitude coordinates of each neighborhood.

4. Foursquare API to explore neighborhoods in Toronto.

For the Toronto neighborhood data, the data that we use is collected from a Wikipedia page, which provides all the information we need to explore and cluster the neighborhoods in Toronto.

#### data wrangling

In [2]:
data = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
df=data[0]

# Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned
df.drop(df[df['Borough']=="Not assigned"].index,axis=0, inplace=True)
df1 = df.reset_index(drop=True)

# More than one neighborhood can exist in one postal code area
df2=df1.groupby("Postal Code").agg(lambda x:','.join(x))

# If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough

df2.loc[df2['Neighbourhood']=="Not assigned",'Neighbourhood']=df2.loc[df2['Neighbourhood']=="Not assigned",'Borough']
df3 = df2.reset_index()
df3.rename(columns={'Postal Code': 'PostalCode', 'Neighbourhood': 'Neighborhood'}, inplace=True)

df3.head(12)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


First, we built a dataframe of the postal code of each neighborhood along with the borough name and neighborhood name. Then, we used a csv file that has the geographical coordinates of each postal code: https://cocl.us/Geospatial_data Finally, we merge these two dataframes together.

In [3]:
df_geo_coor = pd.read_csv("https://cocl.us/Geospatial_data")
df_geo_coor.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)

df_toronto = pd.merge(df3, df_geo_coor, on = 'PostalCode')
print(df_toronto.shape)
df_toronto.head(12)

(103, 5)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


Also, we use the Foursquare API to explore neighborhoods in Toronto.

In [4]:
# Define Foursquare Credentials and Version
CLIENT_ID = 'HCIVIDXNSDTQLK22TG50GUXXJYWHKTIXMETS2A4ZWSTWRYND' # your Foursquare ID
CLIENT_SECRET = '1N34I2HFKLF5DXBBUTKC30FFQ1O1LX2TMGYIRCDPJ015KG3Z' # your Foursquare Secret
VERSION = '20200729' # Foursquare API version

In [5]:
# Get the neighborhood's latitude and longitude values
neighborhood_latitude = df_toronto.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df_toronto.loc[0, 'Longitude'] # neighborhood longitude value

#### Explore Neighborhoods in Toronto

In [6]:
# define a function for getting the top 100 nearby venues with a radius of 1 km

def getNearbyVenues(names, latitudes, longitudes, radius=1000, LIMIT=100):
    venues_list=[]  
    for name, lat, lng in zip(names, latitudes, longitudes):
        # print(name)
        
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng,
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])


    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [7]:
toronto_venues = getNearbyVenues(names=df_toronto['Neighborhood'],
                                   latitudes=df_toronto['Latitude'],
                                   longitudes=df_toronto['Longitude']
                                  )

In [8]:
print(toronto_venues.shape)
toronto_venues.head()

(4880, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.806686,-79.194353,Images Salon & Spa,43.802283,-79.198565,Spa
1,"Malvern, Rouge",43.806686,-79.194353,Harvey's,43.80002,-79.198307,Restaurant
2,"Malvern, Rouge",43.806686,-79.194353,Staples Morningside,43.800285,-79.196607,Paper / Office Supplies Store
3,"Malvern, Rouge",43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
4,"Malvern, Rouge",43.806686,-79.194353,RBC Royal Bank,43.798782,-79.19709,Bank


#### get details of a venue including rating (0-10), tips and likes

In [9]:
def get_venue_details(venue_id):
    venue_details=[]
    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(
        venue_id, 
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION)

    result = requests.get(url).json()
    venue_data=result['response']['venue']

    try:
        venue_name=venue_data['name']
        venue_likes=venue_data['likes']['count']
        venue_rating=venue_data['rating']
        venue_tips=venue_data['tips']['count']
        venue_details.append([venue_name,venue_likes,venue_rating,venue_tips])
    except KeyError:
        pass
        
    column_names=['ID','Name','Likes','Rating','Tips']
    df = pd.DataFrame(venue_details,columns=column_names)
    return df    

In [10]:
# all the restaurants in the Toronto City
to_res = toronto_venues[toronto_venues['Venue Category'].str.contains("Restaurant")].reset_index(drop=True)
print(to_res.shape)

to_res.head(12)

(1177, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.806686,-79.194353,Harvey's,43.80002,-79.198307,Restaurant
1,"Malvern, Rouge",43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
2,"Malvern, Rouge",43.806686,-79.194353,Wendy's,43.802008,-79.19808,Fast Food Restaurant
3,"Malvern, Rouge",43.806686,-79.194353,Caribbean Wave,43.798558,-79.195777,Caribbean Restaurant
4,"Malvern, Rouge",43.806686,-79.194353,Charley's Exotic Cuisine,43.800982,-79.200233,Chinese Restaurant
5,"Malvern, Rouge",43.806686,-79.194353,Mr. Greek,43.799853,-79.198234,Greek Restaurant
6,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,Fratelli Village Pizzeria,43.784008,-79.169787,Italian Restaurant
7,"Guildwood, Morningside, West Hill",43.763573,-79.188711,Swiss Chalet,43.768122,-79.190493,Restaurant
8,"Guildwood, Morningside, West Hill",43.763573,-79.188711,KFC,43.7689,-79.1856,Fast Food Restaurant
9,"Guildwood, Morningside, West Hill",43.763573,-79.188711,McDonald's,43.768334,-79.188288,Fast Food Restaurant
