In [3]:
# import 
from bs4 import BeautifulSoup
import requests
import csv
import pandas as pd
from pandas import DataFrame as df

In [4]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source,'lxml')
wiki = soup.find('table')

In [6]:
neigh = wiki.find_all('tr')
d = []
for neigh in neigh:
    cols=neigh.find_all('td')
    cols=[x.text.strip() for x in cols]
    d.append(cols)

In [7]:
columns=['PostalCode','Borough','Neighbourhood']
df_TO = pd.DataFrame(data=d,columns=columns)[1:]
print(df_TO.count())
df_TO.head()

PostalCode       288
Borough          288
Neighbourhood    288
dtype: int64


Unnamed: 0,PostalCode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront


In [8]:
#Ignore cells with a borough that is Not assigned.
df_TO_clean =df_TO[df_TO['Borough'] !='Not assigned']

# If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough
for index, row in df_TO_clean.iterrows():
    if row['Neighbourhood']=='Not assigned':
        row['Neighbourhood']=row['Borough']


In [9]:
df_TO_clean_gr = df_TO_clean.groupby(['PostalCode','Borough'], sort=False).agg( ','.join)
df_TO_clean_gr = df_TO_clean_gr.reset_index()
df_TO_clean_gr.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Queen's Park


In [10]:
df_TO_clean_gr.shape
df_TO_clean_gr['PostalCode'].head()

0    M3A
1    M4A
2    M5A
3    M6A
4    M7A
Name: PostalCode, dtype: object

In [11]:
pip install geocoder

Note: you may need to restart the kernel to use updated packages.


In [12]:
import geocoder
import numpy as np

In [14]:
# convert postal code to list to iterate
pc_list = df_TO_clean_gr['PostalCode'].tolist()

In [15]:
# create empty lists to store values
lat_vals=[]
lon_vals=[]

for pc in pc_list:
    lat_lng_coords=None
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(pc))
        lat_lng_coords = g.latlng

    lat_vals.append(lat_lng_coords[0])
    lon_vals.append(lat_lng_coords[1])
    
df_TO_clean_gr['Latitude'] = lat_vals
df_TO_clean_gr['Longitude'] = lon_vals

df_TO_clean_gr.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.75242,-79.329242
1,M4A,North York,Victoria Village,43.7306,-79.313265
2,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.650295,-79.359166
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.72327,-79.451286
4,M7A,Queen's Park,Queen's Park,43.66115,-79.391715


In [16]:
DTO_data = df_TO_clean_gr[df_TO_clean_gr['Borough'].str.contains("Toronto")].reset_index(drop=True)
DTO_data.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.650295,-79.359166
1,M5B,Downtown Toronto,"Ryerson,Garden District",43.657363,-79.37818
2,M5C,Downtown Toronto,St. James Town,43.65121,-79.375481
3,M4E,East Toronto,The Beaches,43.676531,-79.295425
4,M5E,Downtown Toronto,Berczy Park,43.64516,-79.373675


In [17]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(DTO_data['Borough'].unique()),
        DTO_data.shape[0]
    )
)

The dataframe has 4 boroughs and 38 neighborhoods.


# Explore Neighborhoods in Toronto

### Create a function to repeat the same process to all the neighborhoods in Toronto

In [18]:
!conda install -c conda-forge geopy --yes

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.



In [19]:
pip install folium

Note: you may need to restart the kernel to use updated packages.


In [20]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

from geopy.geocoders import Nominatim

print('Libraries imported.')

Libraries imported.


In [21]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [22]:
# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(DTO_data['Latitude'], DTO_data['Longitude'], DTO_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

In [29]:
CLIENT_ID = 'U1FGIWTI0KHEV5JJL5ZUSMVFHG2C413OXBS210OIFVX4LUNO' # your Foursquare ID
CLIENT_SECRET = 'A0EP5GWESZAYDUXGDBPFMLT5DDCJ3P1D2EOP3QRFISGGW040' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: U1FGIWTI0KHEV5JJL5ZUSMVFHG2C413OXBS210OIFVX4LUNO
CLIENT_SECRET:A0EP5GWESZAYDUXGDBPFMLT5DDCJ3P1D2EOP3QRFISGGW040


In [39]:
import urllib
def getNearbyVenues(names, latitudes, longitudes, radius=5000, categoryIds=''):
    try:
        venues_list=[]
        for name, lat, lng in zip(names, latitudes, longitudes):
            #print(name)

            # create the API request URL
            url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)

            if (categoryIds != ''):
                url = url + '&categoryId={}'
                url = url.format(categoryIds)

            # make the GET request
            response = requests.get(url).json()
            results = response["response"]['venues']

            # return only relevant information for each nearby venue
            for v in results:
                success = False
                try:
                    category = v['categories'][0]['name']
                    success = True
                except:
                    pass

                if success:
                    venues_list.append([(
                        name, 
                        lat, 
                        lng, 
                        v['name'], 
                        v['location']['lat'], 
                        v['location']['lng'],
                        v['categories'][0]['name']
                    )])

        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Localidad', 
                  'Localidad Latitude', 
                  'Localidad Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    except:
        print(url)
        print(response)
        print(results)
        print(nearby_venues)

    return(nearby_venues)

In [32]:
LIMIT = 500 
radius = 5000 

In [54]:
#https://developer.foursquare.com/docs/resources/categories
#preschool =52e81612bcbc57f1066b7a45

preschool  = getNearbyVenues(names=DTO_data['Neighbourhood'], latitudes=DTO_data['Latitude'], longitudes=DTO_data['Longitude'], radius=5000, categoryIds='52e81612bcbc57f1066b7a45')
preschool.head()

Unnamed: 0,Localidad,Localidad Latitude,Localidad Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Harbourfront,Regent Park",43.650295,-79.359166,The Wright Spot,43.672679,-79.319917,Preschool
1,"Harbourfront,Regent Park",43.650295,-79.359166,Creative Preschool,43.679454,-79.336772,Preschool
2,"Ryerson,Garden District",43.657363,-79.37818,The Wright Spot,43.672679,-79.319917,Preschool
3,"Ryerson,Garden District",43.657363,-79.37818,Creative Preschool,43.679454,-79.336772,Preschool
4,St. James Town,43.65121,-79.375481,The Wright Spot,43.672679,-79.319917,Preschool


In [56]:
preschool.shape

(39, 7)

In [42]:
#https://developer.foursquare.com/docs/resources/categories
#elementary =4f4533804b9074f6e4fb0105

elementary = getNearbyVenues(names=DTO_data['Neighbourhood'], latitudes=DTO_data['Latitude'], longitudes=DTO_data['Longitude'], radius=5000, categoryIds='4f4533804b9074f6e4fb0105')
elementary.head()

Unnamed: 0,Localidad,Localidad Latitude,Localidad Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Harbourfront,Regent Park",43.650295,-79.359166,Market Lane Public School,43.653344,-79.364758,Elementary School
1,"Harbourfront,Regent Park",43.650295,-79.359166,Ecole Elementaire La Mosaique,43.687073,-79.330219,Elementary School
2,"Harbourfront,Regent Park",43.650295,-79.359166,Nelson Mandela Public School,43.658111,-79.360072,Elementary School
3,"Harbourfront,Regent Park",43.650295,-79.359166,Lord Lansdowne Public School,43.658731,-79.402031,Elementary School
4,"Harbourfront,Regent Park",43.650295,-79.359166,Givins Shaw Elementary School,43.646395,-79.418096,Elementary School


In [62]:
#https://developer.foursquare.com/docs/resources/categories
#industry  =56aa371be4b08b9a8d5734d7

industry = getNearbyVenues(names=DTO_data['Neighbourhood'], latitudes=DTO_data['Latitude'], longitudes=DTO_data['Longitude'], radius=5000, categoryIds='56aa371be4b08b9a8d5734d7')
industry

Unnamed: 0,Localidad,Localidad Latitude,Localidad Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Harbourfront,Regent Park",43.650295,-79.359166,Salt Dome,43.640874,-79.408053,Industrial Estate
1,"Ryerson,Garden District",43.657363,-79.37818,Salt Dome,43.640874,-79.408053,Industrial Estate
2,St. James Town,43.65121,-79.375481,Salt Dome,43.640874,-79.408053,Industrial Estate
3,Berczy Park,43.64516,-79.373675,Salt Dome,43.640874,-79.408053,Industrial Estate
4,Central Bay Street,43.656091,-79.38493,Salt Dome,43.640874,-79.408053,Industrial Estate
5,Christie,43.668781,-79.42071,Salt Dome,43.640874,-79.408053,Industrial Estate
6,"Adelaide,King,Richmond",43.6497,-79.382582,Salt Dome,43.640874,-79.408053,Industrial Estate
7,"Dovercourt Village,Dufferin",43.665087,-79.438705,Salt Dome,43.640874,-79.408053,Industrial Estate
8,"Harbourfront East,Toronto Islands,Union Station",43.63021,-79.362433,Salt Dome,43.640874,-79.408053,Industrial Estate
9,"Little Portugal,Trinity",43.648525,-79.417757,Salt Dome,43.640874,-79.408053,Industrial Estate


In [60]:
#https://developer.foursquare.com/docs/resources/categories
#gas  =56aa371be4b08b9a8d5734d7

gas = getNearbyVenues(names=DTO_data['Neighbourhood'], latitudes=DTO_data['Latitude'], longitudes=DTO_data['Longitude'], radius=1000, categoryIds='4bf58dd8d48988d113951735')
gas.head()
print(gas.shape)

(211, 7)


In [52]:
def addToMap(df, color, existingMap):
    for lat, lng, local, venue, venueCat in zip(df['Venue Latitude'], df['Venue Longitude'], df['Localidad'], df['Venue'], df['Venue Category']):
        label = '{} ({}) - {}'.format(venue, venueCat, local)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.7).add_to(existingMap)

In [53]:
map_preschool = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(preschool, 'red', map_preschool)
map_preschool

In [59]:
map_industry = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(industry, 'gold', map_industry)
map_industry

In [58]:
map_gas = folium.Map(location=[latitude, longitude], zoom_start=12)
addToMap(gas, 'green', map_gas)
map_gas

In [63]:
def addColumn(startDf, columnTitle, dataDf):
    grouped = dataDf.groupby('Localidad').count()
    
    for n in startDf['Localidad']:
        try:
            startDf.loc[startDf['Localidad'] == n,columnTitle] = grouped.loc[n, 'Venue']
        except:
            startDf.loc[startDf['Localidad'] == n,columnTitle] = 0

In [65]:
df_data =DTO_data.copy()
df_data.rename(columns={'Neighbourhood':'Localidad'}, inplace=True)
addColumn(df_data, 'preschool', preschool)
addColumn(df_data, 'elementary', elementary)
addColumn(df_data, 'industry', industry)
addColumn(df_data, 'gas', gas)
df_data

Unnamed: 0,PostalCode,Borough,Localidad,Latitude,Longitude,preschool,elementary,industry,gas
0,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.650295,-79.359166,2.0,34.0,1.0,6.0
1,M5B,Downtown Toronto,"Ryerson,Garden District",43.657363,-79.37818,2.0,44.0,1.0,11.0
2,M5C,Downtown Toronto,St. James Town,43.65121,-79.375481,2.0,38.0,1.0,10.0
3,M4E,East Toronto,The Beaches,43.676531,-79.295425,2.0,23.0,0.0,7.0
4,M5E,Downtown Toronto,Berczy Park,43.64516,-79.373675,1.0,31.0,1.0,6.0
5,M5G,Downtown Toronto,Central Bay Street,43.656091,-79.38493,2.0,43.0,1.0,7.0
6,M6G,Downtown Toronto,Christie,43.668781,-79.42071,0.0,49.0,1.0,6.0
7,M5H,Downtown Toronto,"Adelaide,King,Richmond",43.6497,-79.382582,1.0,32.0,1.0,8.0
8,M6H,West Toronto,"Dovercourt Village,Dufferin",43.665087,-79.438705,0.0,47.0,1.0,7.0
9,M5J,Downtown Toronto,"Harbourfront East,Toronto Islands,Union Station",43.63021,-79.362433,0.0,19.0,1.0,1.0


### Applying Weights

In [66]:
# positive weight
weight_elementary = 5

# negative weight
weight_preschool = -10
weight_industry = -5
weight_gas = -5


In [67]:
df_weighted = df_data[['Localidad']].copy()

In [68]:
df_weighted['Score'] = df_data['elementary'] * weight_elementary + df_data['preschool'] * weight_preschool + df_data['industry'] *weight_industry + df_data['gas'] * weight_gas
df_weighted = df_weighted.sort_values(by=['Score'], ascending=False)
df_weighted

Unnamed: 0,Localidad,Score
17,Lawrence Park,210.0
6,Christie,210.0
28,"Moore Park,Summerhill East",205.0
26,"Harbord,University of Toronto",205.0
30,"Deer Park,Forest Hill SE,Rathnelly,South Hill,...",200.0
20,"Forest Hill North,Forest Hill West",200.0
18,Roselawn,200.0
23,"The Annex,North Midtown,Yorkville",200.0
32,Rosedale,200.0
8,"Dovercourt Village,Dufferin",195.0
