In [1]:
import pandas as pd
import numpy as np

#### Reading the table from the link provided and creating a dataframe

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
data = pd.read_html(url, header=0)
df = data[0]
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


#### Removing the rows that contain 'Not assigned' in the 'Borough' column

In [3]:
df = df.drop(df.index[df['Borough'] == 'Not assigned'])
# resetting the index values
df.reset_index(drop=True, inplace = True)
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor


#### Renaming Neighborhoods that are "Not assigned"

In [4]:
# Renaming the neighborhoods that are "Not assigned" with Borough's name
df["Neighborhood"] = np.where((df.Neighborhood == 'Not assigned'), df.Borough,df.Neighborhood)

#### Grouping same postcodes into one line and reindexing

In [5]:
df = df.groupby(["Postcode", 'Borough'])['Neighborhood'].apply(lambda tags: ', '.join(tags)).to_frame().reset_index()

In [6]:
df.head(20)

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [7]:
# shape of the dataframe
df.shape

(103, 3)

In [8]:
print('There are ' + str(df.shape[0]) + ' rows.')

There are 103 rows.


In [9]:
# getting coordinates from file provided
geospatial_data = pd.read_csv('Geospatial_Coordinates.csv')
geospatial_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [10]:
# renaming "Postal Code" to "Postcode"
geospatial_data = geospatial_data.rename(columns={geospatial_data.columns[0]: "Postcode" })

In [11]:
# merge the two dataframes using "Postcode column"
df = df.merge(geospatial_data, on = 'Postcode')

df.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## Exploring and Clustering

In [12]:
# !conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 

# !conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Folium installed
Libraries imported.


In [13]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

In [14]:
# analizing dataframe
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df['Borough'].unique()),
        df['Neighborhood'].shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighborhoods.


In [15]:
address = 'Toronto, Ontario, Canada'

geolocator = Nominatim(user_agent="TO_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


### Creating Map of Toronto

In [16]:
# create a map of Toronto that displays all the Neighborhoods and Borowughs
map_toronto = folium.Map(location = [latitude, longitude], zoom_start = 11)

#add red circles to represent Toronto's Neighbourhoods
for lat, long, bor, neigh in zip(df['Latitude'], df['Longitude'], 
                                 df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neigh, bor)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius = 7, 
        popup = label,
        color = 'red',
        fill = True,
        fill_color = 'yellow',
        fill_opacity = 0.25,
        parse_html = False).add_to(map_toronto)
        
map_toronto

### Clustering the Boroughs

In [17]:
list_boroughs = df['Borough'].unique()
list_boroughs

array(['Scarborough', 'North York', 'East York', 'East Toronto',
       'Central Toronto', 'Downtown Toronto', 'York', 'West Toronto',
       'Mississauga', 'Etobicoke', "Queen's Park"], dtype=object)

In [18]:
def borough_loc(list_of_places):
    for place in list_of_places:
        address = (place + ", Ontario, Canada")
        geolocator = Nominatim(user_agent="TO_explorer")
        location = geolocator.geocode(address)
        latitude = location.latitude
        longitude = location.longitude
        print('{''}, {}, {},'.format(place,latitude,longitude))

In [19]:
borough_loc(list_boroughs)

Scarborough, 43.773077, -79.257774,
North York, 43.7543263, -79.4491169663959,
East York, 43.699971, -79.3325199626159,
East Toronto, 43.626243, -79.396962,
Central Toronto, 43.653963, -79.387207,
Downtown Toronto, 43.6563221, -79.3809161,
York, 44.0007518, -79.4372217,
West Toronto, 43.653963, -79.387207,
Mississauga, 43.590338, -79.645729,


GeocoderTimedOut: Service timed out

In [20]:
boroughs = ["Scarborough", 43.773077, -79.257774,
"North York", 43.7543263, -79.4491169663959,
"East York", 43.699971, -79.3325199626159,
"East Toronto", 43.626243, -79.396962,
"Central Toronto", 43.653963, -79.387207,
"Downtown Toronto", 43.6563221, -79.3809161,
"York", 44.0007518, -79.4372217,
"West Toronto", 43.653963, -79.387207,
"Mississauga", 43.590338, -79.645729,
"Etobicoke", 43.67145915, -79.5524920661167,
"Queen's Park", 43.6606092, -79.3905725,]

In [21]:
#creating borough dataframe
boroughs_df = pd.DataFrame(np.array(boroughs).reshape(11,3), columns = ["Borough","Latitude","Longitude"])
#changing strings to floats
boroughs_df["Latitude"] = pd.to_numeric(boroughs_df["Latitude"])
boroughs_df["Longitude"] = pd.to_numeric(boroughs_df["Longitude"])
boroughs_df.head(10)

Unnamed: 0,Borough,Latitude,Longitude
0,Scarborough,43.773077,-79.257774
1,North York,43.754326,-79.449117
2,East York,43.699971,-79.33252
3,East Toronto,43.626243,-79.396962
4,Central Toronto,43.653963,-79.387207
5,Downtown Toronto,43.656322,-79.380916
6,York,44.000752,-79.437222
7,West Toronto,43.653963,-79.387207
8,Mississauga,43.590338,-79.645729
9,Etobicoke,43.671459,-79.552492


### Create a map of Toronto's Boroughs

In [22]:
# create a map of Toronto's Boroughs
map_toronto_boroughs = folium.Map(location = [43.653963, -79.387207], zoom_start = 10)

#add neighborhood markers to the Toronto map
for lat, long, bor in zip(boroughs_df['Latitude'], boroughs_df['Longitude'], 
                                 boroughs_df['Borough']):
    label = '{}'.format(bor)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius = 7, 
        popup = label,
        color = 'red',
        fill = True,
        fill_color = 'white',
        fill_opacity = 0.7,
        parse_html = False).add_to(map_toronto_boroughs)
        
map_toronto_boroughs

In [23]:
import requests # library to handle requests
import random # library for random number generation

#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

#!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Folium installed
Libraries imported.


## Using Foursquare

In [24]:
# Foursquare Login credentials
CLIENT_ID = '3HKW1CZU4NJOFN0W2VGXP0VG3E1FTVYDRIJPSXR1ZMUVDO4P' # your Foursquare ID
CLIENT_SECRET = '44KWZDKR2Q5YN3RECI2M0DIRJ25GWMYNCMXNPOIR002434DP' # your Foursquare Secret
VERSION = '20180604'

#### getting data for Queen's Park Neighborhood

In [25]:
queens_park_lat = boroughs_df.iloc[10,1]
queens_park_long = boroughs_df.iloc[10,2]

In [26]:
queens_park_df = df[df['Borough'] == "Queen's Park"].reset_index(drop = True)

In [27]:
neighborhood_latitude = queens_park_df.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = queens_park_df.loc[0, 'Longitude'] # neighborhood longitude value
neighborhood_name = queens_park_df.loc[0, 'Neighborhood'] # neighborhood name
print(neighborhood_name)
print('Coordinates of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Queen's Park
Coordinates of Queen's Park are 43.6678556, -79.53224240000002.


#### Searching for all McDonalds Restaurants in the neighbourhood

In [28]:
search_query = 'McDonald'
radius = 500
print(search_query + ' .... OK!')
LIMIT = 100

McDonald .... OK!


In [29]:
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION,  search_query, radius, LIMIT)
url

'https://api.foursquare.com/v2/venues/search?client_id=3HKW1CZU4NJOFN0W2VGXP0VG3E1FTVYDRIJPSXR1ZMUVDO4P&client_secret=44KWZDKR2Q5YN3RECI2M0DIRJ25GWMYNCMXNPOIR002434DP&ll=43.653963,-79.387207&v=20180604&query=McDonald&radius=500&limit=100'

In [30]:
# storing results in results
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e12492347b43d0024204901'},
 'response': {'venues': [{'id': '4ae879d4f964a520aaaf21e3',
    'name': "McDonald's",
    'location': {'address': '260 Yonge Street, Unit J-002, Urban Eatery',
     'crossStreet': 'in Toronto Eaton Centre Urban Eatery',
     'lat': 43.654699,
     'lng': -79.381063,
     'labeledLatLngs': [{'label': 'display',
       'lat': 43.654699,
       'lng': -79.381063}],
     'distance': 501,
     'postalCode': 'M5B 2L9',
     'cc': 'CA',
     'city': 'Toronto',
     'state': 'ON',
     'country': 'Canada',
     'formattedAddress': ['260 Yonge Street, Unit J-002, Urban Eatery (in Toronto Eaton Centre Urban Eatery)',
      'Toronto ON M5B 2L9',
      'Canada']},
    'categories': [{'id': '4bf58dd8d48988d16e941735',
      'name': 'Fast Food Restaurant',
      'pluralName': 'Fast Food Restaurants',
      'shortName': 'Fast Food',
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/fastfood_',
       'suffix': '.png'

#### Define information of interest and filter dataframe

In [35]:
# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
dataframe = json_normalize(venues)
dataframe.head()

Unnamed: 0,id,name,categories,referralId,hasPerk,location.address,location.crossStreet,location.lat,location.lng,location.labeledLatLngs,location.distance,location.postalCode,location.cc,location.city,location.state,location.country,location.formattedAddress
0,4ae879d4f964a520aaaf21e3,McDonald's,"[{'id': '4bf58dd8d48988d16e941735', 'name': 'F...",v-1578256659,False,"260 Yonge Street, Unit J-002, Urban Eatery",in Toronto Eaton Centre Urban Eatery,43.654699,-79.381063,"[{'label': 'display', 'lat': 43.654699, 'lng':...",501,M5B 2L9,CA,Toronto,ON,Canada,"[260 Yonge Street, Unit J-002, Urban Eatery (i..."
1,4c82f365dc018cfabeccd46c,McDonald's,"[{'id': '4bf58dd8d48988d16e941735', 'name': 'F...",v-1578256659,False,109 McCaul Street,Grange Food Court,43.653744,-79.390915,"[{'label': 'display', 'lat': 43.6537443145172,...",299,M5T 3K5,CA,Toronto,ON,Canada,"[109 McCaul Street (Grange Food Court), Toront..."
2,4b59f3bff964a52055a328e3,McDonald's,"[{'id': '4bf58dd8d48988d16e941735', 'name': 'F...",v-1578256659,False,595 Bay St,btwn Edward St & Dundas St W,43.656258,-79.383269,"[{'label': 'display', 'lat': 43.65625754381657...",407,M5G 1M5,CA,Toronto,ON,Canada,"[595 Bay St (btwn Edward St & Dundas St W), To..."
3,4b213e29f964a520d93824e3,McDonald's,"[{'id': '4bf58dd8d48988d16e941735', 'name': 'F...",v-1578256659,False,78 St. Patrick Street,,43.653697,-79.390954,"[{'label': 'display', 'lat': 43.65369665916784...",303,,CA,Toronto,ON,Canada,"[78 St. Patrick Street, Toronto ON, Canada]"
4,4bace7b1f964a520d3173be3,McDonald's,"[{'id': '4bf58dd8d48988d16e941735', 'name': 'F...",v-1578256659,False,Richmond and Bay,on the PATH,43.651137,-79.383256,"[{'label': 'display', 'lat': 43.65113714894541...",447,,CA,Toronto,ON,Canada,"[Richmond and Bay (on the PATH), Toronto ON, C..."


In [36]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

dataframe_filtered

Unnamed: 0,name,categories,address,crossStreet,lat,lng,labeledLatLngs,distance,postalCode,cc,city,state,country,formattedAddress,id
0,McDonald's,Fast Food Restaurant,"260 Yonge Street, Unit J-002, Urban Eatery",in Toronto Eaton Centre Urban Eatery,43.654699,-79.381063,"[{'label': 'display', 'lat': 43.654699, 'lng':...",501,M5B 2L9,CA,Toronto,ON,Canada,"[260 Yonge Street, Unit J-002, Urban Eatery (i...",4ae879d4f964a520aaaf21e3
1,McDonald's,Fast Food Restaurant,109 McCaul Street,Grange Food Court,43.653744,-79.390915,"[{'label': 'display', 'lat': 43.6537443145172,...",299,M5T 3K5,CA,Toronto,ON,Canada,"[109 McCaul Street (Grange Food Court), Toront...",4c82f365dc018cfabeccd46c
2,McDonald's,Fast Food Restaurant,595 Bay St,btwn Edward St & Dundas St W,43.656258,-79.383269,"[{'label': 'display', 'lat': 43.65625754381657...",407,M5G 1M5,CA,Toronto,ON,Canada,"[595 Bay St (btwn Edward St & Dundas St W), To...",4b59f3bff964a52055a328e3
3,McDonald's,Fast Food Restaurant,78 St. Patrick Street,,43.653697,-79.390954,"[{'label': 'display', 'lat': 43.65369665916784...",303,,CA,Toronto,ON,Canada,"[78 St. Patrick Street, Toronto ON, Canada]",4b213e29f964a520d93824e3
4,McDonald's,Fast Food Restaurant,Richmond and Bay,on the PATH,43.651137,-79.383256,"[{'label': 'display', 'lat': 43.65113714894541...",447,,CA,Toronto,ON,Canada,"[Richmond and Bay (on the PATH), Toronto ON, C...",4bace7b1f964a520d3173be3
5,McDonald's,Fast Food Restaurant,123 Queen St. West,,43.65089,-79.38375,"[{'label': 'display', 'lat': 43.65089, 'lng': ...",441,,CA,Toronto,ON,Canada,"[123 Queen St. West, Toronto ON, Canada]",4ce1a706df986ea880afed16
6,McMurtry Gardens of Justice,Monument / Landmark,361 University Avenue,,43.653133,-79.38739,"[{'label': 'display', 'lat': 43.65313279999999...",93,,CA,Toronto,ON,Canada,"[361 University Avenue, Toronto ON, Canada]",525ea70f11d285cb8db4f221
7,McCaul & Dundas,Intersection,Dundas St W,& McCaul St,43.654376,-79.390442,"[{'label': 'display', 'lat': 43.65437587151985...",264,,CA,Toronto,ON,Canada,"[Dundas St W (& McCaul St), Toronto ON, Canada]",51b03cd2498ecc9c45bc0d5b
8,MCO Downtown Toronto,Dentist's Office,101 McCaul St,,43.653425,-79.39083,"[{'label': 'display', 'lat': 43.653425, 'lng':...",297,M5T 2X3,CA,Toronto,ON,Canada,"[101 McCaul St, Toronto ON M5T 2X3, Canada]",5d9d323511e06d000734fa4c
9,Mo'Ramyun,Korean Restaurant,1 Baldwin St.,at Elm St.,43.656148,-79.392282,"[{'label': 'display', 'lat': 43.65614757920083...",475,M5T 1L1,CA,Toronto,ON,Canada,"[1 Baldwin St. (at Elm St.), Toronto ON M5T 1L...",552db032498e660b6506230c


In [37]:
dataframe_filtered = dataframe_filtered.drop(dataframe_filtered[dataframe_filtered["name"] != "McDonald's"].index)

### Creating a Map of Queen's McDonalds Restaurants

In [40]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=16) # generate map 

# add the McDonald restaurants as blue circle markers
for lat, lng in zip(dataframe_filtered['lat'], dataframe_filtered['lng']):
    folium.features.CircleMarker(
        [lat, lng],
        radius=7,
        color='blue',        
        popup = "Mc Donalds",        
        fill = True,
        fill_color='blue',
        fill_opacity=0.3
    ).add_to(venues_map)

# display map of Queen's Park McDonalds Restaurants In Queen's Park
venues_map