In [74]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
import folium
from pandas.io.json import json_normalize

In [2]:
url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

In [3]:
soup = BeautifulSoup(url,'lxml')

In [11]:
# Scrape the data and build a pandas dataframe

prop = pd.DataFrame()

for i in soup.find_all('td'):
    try:
        postalcode = i.p.b.text
    except:
        None
    try:
        borough = i.span.text.split("(")[0]
    except:
        None
    try:
        neighborhood = i.span.text.split("(")[1].rstrip(")")
    except:
        None
    data_dict = {'PostalCode':postalcode,
                'Borough':borough,
                'Neighborhood':neighborhood}
    prop = prop.append(data_dict, ignore_index=True)
prop.head()

Unnamed: 0,Borough,Neighborhood,PostalCode
0,Not assigned,Mimico NW / The Queensway West / South of Bloo...,M1A
1,Not assigned,Mimico NW / The Queensway West / South of Bloo...,M2A
2,North York,Parkwoods,M3A
3,North York,Victoria Village,M4A
4,Downtown Toronto,Regent Park / Harbourfront,M5A


In [12]:
# Dropping the NaN values from the dataframe

prop.replace(['Not assigned',""],np.nan,inplace=True)
prop.dropna(inplace=True)

In [13]:
# Assigning the Borough Name for neighbourhoods that don't have a neighborhood.

prop.loc[168]['Neighborhood'] = "East Toronto"
prop.loc[6]['Neighborhood'] = "Queen's Park"
prop.loc[148]['Neighborhood'] = "Downtown Toronto"
prop.loc[114]['Neighborhood'] = "Mississauga"

In [14]:
# Cleaning the names of the neighborhoods

borough = []
for i in prop.Borough:
    if i == "MississaugaCanada Post Gateway Processing Centre":
        borough.append("Mississauga")
    elif i == "East TorontoBusiness reply mail Processing Centre969 Eastern":
        borough.append("East Toronto")
    elif i == "Downtown TorontoStn A PO Boxes25 The Esplanade":
        borough.append("Downtown Toronto")
    elif i == "Queen's Park / Ontario Provincial Government":
        borough.append("Queen's Park")
    else:
        borough.append(i)

prop['Borough'] = borough

In [15]:
# Separating multiple neighborhoods  by comma

neigh = []
for i in prop.Neighborhood:
    neigh.append(",".join(i.split("/")))
    
prop['Neighborhood'] = neigh

In [16]:
# Printing the final dataframe

prop.head()

Unnamed: 0,Borough,Neighborhood,PostalCode
2,North York,Parkwoods,M3A
3,North York,Victoria Village,M4A
4,Downtown Toronto,"Regent Park , Harbourfront",M5A
5,North York,"Lawrence Manor , Lawrence Heights",M6A
6,Queen's Park,Queen's Park,M7A


In [17]:
# Printing the final shape of the dataframe
print(prop.shape)

(103, 3)


In [22]:
# getting the csv for the coordinates of the postalcode

coords = pd.read_csv('Geospatial_Coordinates_Toronto.csv')
coords.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [27]:
# Merging dataframes to get the combined dataframe with latitudes and longitudes

final_df = pd.merge(left=prop,right=coords,left_on=prop['PostalCode'],right_on=coords['Postal Code'])
final_df.drop(['Postal Code','key_0'],axis=1,inplace=True)
final_df.head()

Unnamed: 0,Borough,Neighborhood,PostalCode,Latitude,Longitude
0,North York,Parkwoods,M3A,43.753259,-79.329656
1,North York,Victoria Village,M4A,43.725882,-79.315572
2,Downtown Toronto,"Regent Park , Harbourfront",M5A,43.65426,-79.360636
3,North York,"Lawrence Manor , Lawrence Heights",M6A,43.718518,-79.464763
4,Queen's Park,Queen's Park,M7A,43.662301,-79.389494


#### Visualize Toronto Data

In [81]:
# create map of Canada using latitude and longitude values
map_can = folium.Map(location=[43.65189,-79.38171], zoom_start=10)

# add markers to map
for lat, lng, label in zip(final_df['Latitude'], final_df['Longitude'], final_df['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_can)  
    
map_can

In [53]:
# Work with boroughs that only contain the word Toronto

toronto_data = final_df[final_df['Borough'].str.contains("Toronto")]
toronto_data = toronto_data.reset_index()
toronto_data.drop('index',axis=1,inplace=True)
toronto_data

Unnamed: 0,Borough,Neighborhood,PostalCode,Latitude,Longitude
0,Downtown Toronto,"Regent Park , Harbourfront",M5A,43.65426,-79.360636
1,Downtown Toronto,"Garden District, Ryerson",M5B,43.657162,-79.378937
2,Downtown Toronto,St. James Town,M5C,43.651494,-79.375418
3,East Toronto,The Beaches,M4E,43.676357,-79.293031
4,Downtown Toronto,Berczy Park,M5E,43.644771,-79.373306
5,Downtown Toronto,Central Bay Street,M5G,43.657952,-79.387383
6,Downtown Toronto,Christie,M6G,43.669542,-79.422564
7,Downtown Toronto,"Richmond , Adelaide , King",M5H,43.650571,-79.384568
8,West Toronto,"Dufferin , Dovercourt Village",M6H,43.669005,-79.442259
9,East YorkEast Toronto,The Danforth East,M4J,43.685347,-79.338106


In [55]:
# Let's get the coordinates for Toronto

import geocoder

address = 'Toronto, ON'
from geopy.geocoders import TomTom 
g = geocoder.tomtom(address, key = 'PEBXY1EgzTWBLUuDT5xFbHFPZn8Qjt90')
latitude = g.lat
longitude = g.lng

print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude,longitude))

The geograpical coordinate of Toronto are 43.65189, -79.38171.


In [56]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [65]:
# Lets explore a random neighborhood in our dataset

toronto_data.loc[0,'Neighborhood']

'Regent Park , Harbourfront'

In [66]:
# Define FourSquare Credentials

CLIENT_ID = 'PVROPIFHA4AMW5TP0K0NE2MA4LSA0B3UTKLSODV50SJBY3GP' # your Foursquare ID
CLIENT_SECRET = 'OFLWNESJ0IHAU33I01G5GO1SIG33012IXPJ2USSFXVALR5VT' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: PVROPIFHA4AMW5TP0K0NE2MA4LSA0B3UTKLSODV50SJBY3GP
CLIENT_SECRET:OFLWNESJ0IHAU33I01G5GO1SIG33012IXPJ2USSFXVALR5VT


In [67]:
# Get the neighborhood latitude and longitude

neighborhood_latitude = toronto_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = toronto_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = toronto_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Regent Park , Harbourfront are 43.6542599, -79.3606359.


In [68]:
# Let's get the top 100 venues near Regent Park

LIMIT = 100
radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=PVROPIFHA4AMW5TP0K0NE2MA4LSA0B3UTKLSODV50SJBY3GP&client_secret=OFLWNESJ0IHAU33I01G5GO1SIG33012IXPJ2USSFXVALR5VT&v=20180605&ll=43.6542599,-79.3606359&radius=500&limit=100'

In [70]:
# Send the GET requests and examine the results

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e7dde6c60ba08001b7b591e'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Corktown',
  'headerFullLocation': 'Corktown, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 46,
  'suggestedBounds': {'ne': {'lat': 43.6587599045, 'lng': -79.3544279001486},
   'sw': {'lat': 43.6497598955, 'lng': -79.36684389985142}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '54ea41ad498e9a11e9e13308',
       'name': 'Roselle Desserts',
       'location': {'address': '362 King St E',
        'crossStreet': 'Trinity St',
        'lat': 43.653446723052674,
        'lng': -79.3620167174383,
        'labeledLatLngs': [{'label': 'display',
 

In [71]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [72]:
# Clean the json sructure and convert it into pandas dataframe

In [75]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Roselle Desserts,Bakery,43.653447,-79.362017
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,Cooper Koo Family YMCA,Distribution Center,43.653249,-79.358008
3,Body Blitz Spa East,Spa,43.654735,-79.359874
4,Impact Kitchen,Restaurant,43.656369,-79.35698


In [76]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

46 venues were returned by Foursquare.
