Import Libraries

In [2]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

Read the table

In [3]:
wiki = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(wiki,'xml')
table = soup.find('table',{'class':'wikitable sortable'})
rows = table.find_all('tr')


Extract raw tables

In [4]:
frame = []
for row in rows:
    frame.append([t.text.strip() for t in row.find_all('td')])


Create a dataframe

In [5]:
df = pd.DataFrame(frame, columns=['PostalCode', 'Borough', 'Neighbourhood'])
df = df[~df['Borough'].isnull()]  

Clean not assigned rows of column Borough

In [6]:
df.drop(df[df['Borough']=="Not assigned"].index,axis=0, inplace=True) 
new_df = df.reset_index()

Group by postal code

In [7]:
grouped_df= new_df.groupby('PostalCode').agg(lambda x: ','.join(x))

Fix the repeating Boroughs

In [8]:
grouped_df.loc[grouped_df['Neighbourhood']=="Not assigned",'Neighbourhood']=grouped_df.loc[grouped_df['Neighbourhood']=="Not assigned",'Borough']
new_grouped_df = grouped_df.reset_index()
new_grouped_df['Borough']= new_grouped_df['Borough'].str.replace('nan|[{}\s]','').str.split(',').apply(set).str.join(',').str.strip(',').str.replace(",{2,}",",")


Shape

In [9]:
new_grouped_df.shape

(103, 3)

In [10]:
new_grouped_df

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park"
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge"
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff,Cliffside West"


# Second part of the week 3

Import CSV file

In [11]:
latlng_df = pd.read_csv('http://cocl.us/Geospatial_data')
latlng_df.columns = ['PostalCode', 'Latitude', 'Longitude']

Inner join with previous data frame

In [12]:
joined_df = pd.merge(new_grouped_df, latlng_df, on=['PostalCode'], how='inner')
joined_df

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848


# Part 3

In [13]:
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0         conda-forge
    geopy:           1.20.0-py_0       conda-forge

The following packages will be UPDATED:

    ca-

In [15]:
toronto_df= joined_df[joined_df['Borough'].str.contains('Toronto')].reset_index(drop=True)
toronto_df

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M4E,EastToronto,The Beaches,43.676357,-79.293031
1,M4K,EastToronto,"The Danforth West,Riverdale",43.679557,-79.352188
2,M4L,EastToronto,"The Beaches West,India Bazaar",43.668999,-79.315572
3,M4M,EastToronto,Studio District,43.659526,-79.340923
4,M4N,CentralToronto,Lawrence Park,43.72802,-79.38879
5,M4P,CentralToronto,Davisville North,43.712751,-79.390197
6,M4R,CentralToronto,North Toronto West,43.715383,-79.405678
7,M4S,CentralToronto,Davisville,43.704324,-79.38879
8,M4T,CentralToronto,"Moore Park,Summerhill East",43.689574,-79.38316
9,M4V,CentralToronto,"Deer Park,Forest Hill SE,Rathnelly,South Hill,...",43.686412,-79.400049


Generate map

In [16]:
address = 'Toronto, ON, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Manhattan are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Manhattan are 43.653963, -79.387207.


Visualize the neighborhoods

In [17]:
# create map of Manhattan using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Search for a specific venue category

In [18]:
CLIENT_ID = '3AHE3XLNNXWTYZ4TQAXYAPCOKLNVPTDBQB1VWDV2OQ5WBDU0' # your Foursquare ID
CLIENT_SECRET = 'K00YOXUSY3EUXKRF22QWBTOGAIMDCVWK0DNUHUP4LMOME3YP' # your Foursquare Secret
VERSION = '20200123'
LIMIT = 30
radius = 500
search_query = 'Italian'
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)

Get the results

In [20]:
results = requests.get(url).json()

Get relevant part of JSON and transform it into a pandas dataframe

In [21]:
venues = results['response']['venues']
dataframe = json_normalize(venues)
dataframe.head()

Unnamed: 0,categories,hasPerk,id,location.address,location.cc,location.city,location.country,location.crossStreet,location.distance,location.formattedAddress,location.labeledLatLngs,location.lat,location.lng,location.postalCode,location.state,name,referralId,venuePage.id
0,"[{'id': '4bf58dd8d48988d12c951735', 'name': 'E...",False,4bfc0289c3ba9521c00f9653,136 Beverley St,CA,Toronto,Canada,Dundas Street,555,"[136 Beverley St (Dundas Street), Toronto ON, ...","[{'label': 'display', 'lat': 43.65402694219784...",43.654027,-79.394104,,ON,Italian Consulate Toronto,v-1580620969,
1,"[{'id': '4bf58dd8d48988d110941735', 'name': 'I...",False,51bf3866498e55ee55df8db0,,CA,Toronto,Canada,,127,"[Toronto ON, Canada]","[{'label': 'display', 'lat': 43.65499143746528...",43.654991,-79.387897,,ON,The Fresh Italian,v-1580620969,
2,"[{'id': '4bf58dd8d48988d110941735', 'name': 'I...",False,526fe29411d2aeb3803013b0,"109 McCaul Street, Unit #42",CA,Toronto,Canada,Dundas Street West,288,"[109 McCaul Street, Unit #42 (Dundas Street We...","[{'label': 'display', 'lat': 43.653889, 'lng':...",43.653889,-79.390785,M5T 3K5,ON,The Fresh Italian Eatery,v-1580620969,
3,"[{'id': '4bf58dd8d48988d110941735', 'name': 'I...",False,573df789498e03dd8e54b166,595 Bay St,CA,Toronto,Canada,Dundas St,405,"[595 Bay St (Dundas St), Toronto ON M5G 2C2, C...","[{'label': 'display', 'lat': 43.65616, 'lng': ...",43.65616,-79.38319,M5G 2C2,ON,Mustachio Italian Eatery,v-1580620969,
4,"[{'id': '4bf58dd8d48988d110941735', 'name': 'I...",False,4f88cf84e4b002b90ab3b9b9,,CA,,Canada,,434,[Canada],"[{'label': 'display', 'lat': 43.65053979517576...",43.65054,-79.384603,,,LA's Italian + Bar,v-1580620969,


Filter Dataframe

In [22]:
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

dataframe_filtered

Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,postalCode,state,id
0,Italian Consulate Toronto,Embassy / Consulate,136 Beverley St,CA,Toronto,Canada,Dundas Street,555,"[136 Beverley St (Dundas Street), Toronto ON, ...","[{'label': 'display', 'lat': 43.65402694219784...",43.654027,-79.394104,,ON,4bfc0289c3ba9521c00f9653
1,The Fresh Italian,Italian Restaurant,,CA,Toronto,Canada,,127,"[Toronto ON, Canada]","[{'label': 'display', 'lat': 43.65499143746528...",43.654991,-79.387897,,ON,51bf3866498e55ee55df8db0
2,The Fresh Italian Eatery,Italian Restaurant,"109 McCaul Street, Unit #42",CA,Toronto,Canada,Dundas Street West,288,"[109 McCaul Street, Unit #42 (Dundas Street We...","[{'label': 'display', 'lat': 43.653889, 'lng':...",43.653889,-79.390785,M5T 3K5,ON,526fe29411d2aeb3803013b0
3,Mustachio Italian Eatery,Italian Restaurant,595 Bay St,CA,Toronto,Canada,Dundas St,405,"[595 Bay St (Dundas St), Toronto ON M5G 2C2, C...","[{'label': 'display', 'lat': 43.65616, 'lng': ...",43.65616,-79.38319,M5G 2C2,ON,573df789498e03dd8e54b166
4,LA's Italian + Bar,Italian Restaurant,,CA,,Canada,,434,[Canada],"[{'label': 'display', 'lat': 43.65053979517576...",43.65054,-79.384603,,,4f88cf84e4b002b90ab3b9b9
5,Classic italian style pizza food truck,Food Truck,CNE Midway,CA,Toronto,Canada,,530,"[CNE Midway, Toronto ON, Canada]","[{'label': 'display', 'lat': 43.652144, 'lng':...",43.652144,-79.381118,,ON,4c787c9181bca0936180fa14
6,john's italian cafe,Italian Restaurant,27 Baldwin Street,CA,Toronto,Canada,,546,"[27 Baldwin Street, Toronto ON, Canada]","[{'label': 'display', 'lat': 43.65612672798775...",43.656127,-79.393301,,ON,53daae5b498e9c9597c19b23
7,Little Anthony's,Italian Restaurant,121 Richmond St. W,CA,Toronto,Canada,at York St.,462,"[121 Richmond St. W (at York St.), Toronto ON ...","[{'label': 'display', 'lat': 43.65029624519052...",43.650296,-79.384513,M5H 2K1,ON,4b846dd4f964a520dc3431e3
8,Sbarro,Pizza Place,220 Yonge Street,CA,Toronto,Canada,in Toronto Eaton Centre,533,"[220 Yonge Street (in Toronto Eaton Centre), T...","[{'label': 'display', 'lat': 43.655413, 'lng':...",43.655413,-79.380896,M5B 2H1,ON,4b4a2d09f964a520687d26e3


In [23]:
dataframe_filtered.name

0                 Italian Consulate Toronto
1                         The Fresh Italian
2                  The Fresh Italian Eatery
3                  Mustachio Italian Eatery
4                        LA's Italian + Bar
5    Classic italian style pizza food truck
6                       john's italian cafe
7                          Little Anthony's
8                                    Sbarro
Name: name, dtype: object

In [24]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=13) # generate map centred around the Conrad Hotel

# add a red circle marker to represent the Conrad Hotel
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='Conrad Hotel',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

# add the Italian restaurants as blue circle markers
for lat, lng, label in zip(dataframe_filtered.lat, dataframe_filtered.lng, dataframe_filtered.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

# display map
venues_map