# Toronto analysis

## First part

#### Scraping from Wiki

In [4]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd

url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

# We use try-except incase the request was unsuccessful because of 
# wrong URL
try:
   page = urlopen(url)
except:
   print("Error opening the URL")

soup = BeautifulSoup(page, 'html.parser')

content = soup.find('table', {"class": "wikitable"})

df = pd.read_html(str(content))[0]
df

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government
7,M8A,Not assigned,
8,M9A,Etobicoke,Islington Avenue
9,M1B,Scarborough,Malvern / Rouge


#### Remove not assigned Borough

In [5]:
df = df[df.Borough != 'Not assigned']
df

Unnamed: 0,Postal code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government
8,M9A,Etobicoke,Islington Avenue
9,M1B,Scarborough,Malvern / Rouge
11,M3B,North York,Don Mills
12,M4B,East York,Parkview Hill / Woodbine Gardens
13,M5B,Downtown Toronto,"Garden District, Ryerson"


#### Group by Postal Code and concat Neighborhoods

In [6]:
#df['Neighborhood'] = df.groupby(['Postal code'])['Neighborhood'].transform(lambda x: ','.join(x))
#df.'Postal code'.drop_duplicates()

df = pd.DataFrame(df.groupby(['Postal code','Borough'])['Neighborhood'].apply(','.join)).reset_index()

duplicateRowsDF = df[df.duplicated(subset=['Postal code'])]
duplicateRowsDF

Unnamed: 0,Postal code,Borough,Neighborhood


No Postal Code is replicated along the dataframe.

#### Check for not assigned Neighborhood & assigned Borough

In [7]:
for index, row in df.iterrows():  
    if (row.Neighborhood == 'Not assigned' or row.Neighborhood == ''):
        row.Neighborhood = row.Borough
    
df

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1B,Scarborough,Malvern / Rouge
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek
2,M1E,Scarborough,Guildwood / Morningside / West Hill
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,Kennedy Park / Ionview / East Birchmount Park
7,M1L,Scarborough,Golden Mile / Clairlea / Oakridge
8,M1M,Scarborough,Cliffside / Cliffcrest / Scarborough Village West
9,M1N,Scarborough,Birch Cliff / Cliffside West


In [8]:
df.shape

(103, 3)

# Second part

In [9]:

import types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.
client_430eedbebd9d43938a15c55706259f34 = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='0zLkvEzeGO8-Wduda6Pg44fMXp9spcp02CfYngnPoHfW',
    ibm_auth_endpoint="https://iam.ng.bluemix.net/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3-api.us-geo.objectstorage.service.networklayer.com')

body = client_430eedbebd9d43938a15c55706259f34.get_object(Bucket='courseracapstone-donotdelete-pr-5drqqszwhsw9nq',Key='Geospatial_Coordinates.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

coords = pd.read_csv(body)
coords.head()


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [10]:
df = pd.merge(df, coords, left_on='Postal code', right_on='Postal Code')
df = df.drop('Postal Code', 1)
df

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,Malvern / Rouge,43.806686,-79.194353
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek,43.784535,-79.160497
2,M1E,Scarborough,Guildwood / Morningside / West Hill,43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,Kennedy Park / Ionview / East Birchmount Park,43.727929,-79.262029
7,M1L,Scarborough,Golden Mile / Clairlea / Oakridge,43.711112,-79.284577
8,M1M,Scarborough,Cliffside / Cliffcrest / Scarborough Village West,43.716316,-79.239476
9,M1N,Scarborough,Birch Cliff / Cliffside West,43.692657,-79.264848


# Third part

#### Filter Toronto

In [11]:
toronto = df[df['Borough'].str.contains("Toronto")]
toronto

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
37,M4E,East Toronto,The Beaches,43.676357,-79.293031
41,M4K,East Toronto,The Danforth West / Riverdale,43.679557,-79.352188
42,M4L,East Toronto,India Bazaar / The Beaches West,43.668999,-79.315572
43,M4M,East Toronto,Studio District,43.659526,-79.340923
44,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
45,M4P,Central Toronto,Davisville North,43.712751,-79.390197
46,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
47,M4S,Central Toronto,Davisville,43.704324,-79.38879
48,M4T,Central Toronto,Moore Park / Summerhill East,43.689574,-79.38316
49,M4V,Central Toronto,Summerhill West / Rathnelly / South Hill / For...,43.686412,-79.400049


#### Import libraries

In [14]:
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

!pip install --user geopy
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

!pip install --user folium
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/fd/a0/ccb3094026649cda4acd55bf2c3822bb8c277eb11446d13d384e5be35257/folium-0.10.1-py2.py3-none-any.whl (91kB)
[K     |████████████████████████████████| 92kB 15.6MB/s eta 0:00:01
[?25hCollecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/81/6d/31c83485189a2521a75b4130f1fee5364f772a0375f81afff619004e5237/branca-0.4.0-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.4.0 folium-0.10.1
Folium installed
Libraries imported.


#### Setting parameters

In [12]:
CLIENT_ID = 'ZK1EZ3ZPVHG5UIR3QBSOFE1CA31JOGQLAO1HRIKXBPZPZO24' # your Foursquare ID
CLIENT_SECRET = 'V5UPV3TIEI5PLQH5UM2NOCKYZVC1AJS44N3WJ2B5A2PPOXSQ' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('My credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

My credentials:
CLIENT_ID: ZK1EZ3ZPVHG5UIR3QBSOFE1CA31JOGQLAO1HRIKXBPZPZO24
CLIENT_SECRET:V5UPV3TIEI5PLQH5UM2NOCKYZVC1AJS44N3WJ2B5A2PPOXSQ


I assume I am at Hilton Toronto hotel.

In [15]:
address = '145 Richmond St W, Toronto, ON'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude, longitude)

43.649938649999996 -79.38538037286145


I want to find a close Japanese restaurant

In [16]:
search_query = 'Japanese Restaurant'
radius = 500
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)

results = requests.get(url).json()
results
# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
dataframe = json_normalize(venues)
dataframe

Unnamed: 0,categories,hasPerk,id,location.address,location.cc,location.city,location.country,location.crossStreet,location.distance,location.formattedAddress,location.labeledLatLngs,location.lat,location.lng,location.neighborhood,location.postalCode,location.state,name,referralId,venuePage.id
0,"[{'id': '4bf58dd8d48988d111941735', 'name': 'J...",False,4afa1f33f964a520e91622e3,100 Simcoe St.,CA,Toronto,Canada,at Pearl St.,180,"[100 Simcoe St. (at Pearl St.), Toronto ON, Ca...","[{'label': 'display', 'lat': 43.64851365544565...",43.648514,-79.386457,,,ON,Fune Japanese Restaurant,v-1586417715,
1,"[{'id': '4bf58dd8d48988d1d2941735', 'name': 'S...",False,4ae4b055f964a520229d21e3,143 Dundas St. West,CA,Toronto,Canada,,605,"[143 Dundas St. West, Toronto ON, Canada]","[{'label': 'display', 'lat': 43.65538110598594...",43.655381,-79.38527,,,ON,Kyoto House Japanese Restaurant,v-1586417715,
2,"[{'id': '4bf58dd8d48988d111941735', 'name': 'J...",False,4ae73054f964a5203ca921e3,181 Bay St,CA,Toronto,Canada,at Wellington St. W,558,"[181 Bay St (at Wellington St. W), Toronto ON ...","[{'label': 'display', 'lat': 43.647167, 'lng':...",43.647167,-79.379608,,M5J 2T3,ON,Ki Modern Japanese + Bar,v-1586417715,
3,"[{'id': '4bf58dd8d48988d1c4941735', 'name': 'R...",False,50ca02c0245f2d4aa8c2b313,145 Richmond Street West,CA,Toronto,Canada,Hilton Toronto,14,"[145 Richmond Street West (Hilton Toronto), To...","[{'label': 'display', 'lat': 43.65006885787859...",43.650069,-79.385409,,M5H 2L2,ON,Tundra Restaurant,v-1586417715,
4,"[{'id': '4bf58dd8d48988d1c4941735', 'name': 'R...",False,50ca02be245f2d4aa8c2ab5b,145 Richmond Street West,CA,Toronto,Canada,Hilton Toronto,37,"[145 Richmond Street West (Hilton Toronto), To...","[{'label': 'display', 'lat': 43.64979669604817...",43.649797,-79.385807,,M5H 2L2,ON,Barristers Bar,v-1586417715,
5,"[{'id': '4bf58dd8d48988d111941735', 'name': 'J...",False,4c7a99b6794e224bc3026928,,CA,,Canada,,548,[Canada],"[{'label': 'display', 'lat': 43.645011, 'lng':...",43.645011,-79.38565,,,,Japanese 101,v-1586417715,
6,"[{'id': '4bf58dd8d48988d1c4941735', 'name': 'R...",False,4b223f5af964a520ba4424e3,225 Front St W,CA,Toronto,Canada,in InterContinental Toronto Centre,578,[225 Front St W (in InterContinental Toronto C...,"[{'label': 'display', 'lat': 43.64474919591934...",43.644749,-79.385113,Entertainment District,M5V 2X3,ON,Azure Restaurant & Bar,v-1586417715,136175835.0
7,"[{'id': '4bf58dd8d48988d145941735', 'name': 'C...",False,4b2027b5f964a520f82d24e3,195 Dundas St W,CA,Toronto,Canada,at University Ave,571,"[195 Dundas St W (at University Ave), Toronto ...","[{'label': 'display', 'lat': 43.65492521335936...",43.654925,-79.387089,,M5G 1C7,ON,Hong Shing Chinese Restaurant,v-1586417715,60327598.0
8,"[{'id': '4bf58dd8d48988d116941735', 'name': 'B...",False,4ad4c05df964a5203ff620e3,30 Mercer Street,CA,Toronto,Canada,at John St,666,"[30 Mercer Street (at John St), Toronto ON M5V...","[{'label': 'display', 'lat': 43.64563436248102...",43.645634,-79.391125,,M5V 1H3,ON,Victor Restaurant & Bar,v-1586417715,
9,"[{'id': '4bf58dd8d48988d14e941735', 'name': 'A...",False,4ad4c05ff964a52048f720e3,110 Chestnut Street,CA,Toronto,Canada,,552,"[110 Chestnut Street, Toronto ON M5G 1R3, Canada]","[{'label': 'display', 'lat': 43.65488413420439...",43.654884,-79.385931,,M5G 1R3,ON,Hemispheres Restaurant & Bistro,v-1586417715,


#### Filter info

In [17]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]
dataframe_filtered = dataframe_filtered.sort_values(by='distance', ascending = False)
dataframe_filtered

Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,neighborhood,postalCode,state,id
29,Uncle Tetsu's Japanese Cheesecake,Bakery,,CA,Toronto,Canada,,695,"[Toronto ON M5J, Canada]","[{'label': 'display', 'lat': 43.64442750617172...",43.644428,-79.381311,,M5J,ON,57c76c1b498e26057391344c
8,Victor Restaurant & Bar,Bar,30 Mercer Street,CA,Toronto,Canada,at John St,666,"[30 Mercer Street (at John St), Toronto ON M5V...","[{'label': 'display', 'lat': 43.64563436248102...",43.645634,-79.391125,,M5V 1H3,ON,4ad4c05df964a5203ff620e3
22,Flexday At Victor Restaurant,Coworking Space,30 Mercer St,CA,Toronto,Canada,,664,"[30 Mercer St, Toronto ON M5V 1H3, Canada]","[{'label': 'display', 'lat': 43.645652, 'lng':...",43.645652,-79.391123,,M5V 1H3,ON,5b74375433e118002cf5d511
23,Mix Bistro Restaurant and Bar at the Hyatt Reg...,Restaurant,370 King St. W,CA,Toronto,Canada,at Peter St.,639,"[370 King St. W (at Peter St.), Toronto ON, Ca...","[{'label': 'display', 'lat': 43.64624476965642...",43.646245,-79.391459,,,ON,4cb4bf4064998cfaa7f608a2
27,Anoush Restaurant,Restaurant,250 Dundas St W,CA,Toronto,Canada,,623,"[250 Dundas St W, Toronto ON M5T 2Z5, Canada]","[{'label': 'display', 'lat': 43.654588, 'lng':...",43.654588,-79.389692,,M5T 2Z5,ON,4b82c671f964a52016e430e3
1,Kyoto House Japanese Restaurant,Sushi Restaurant,143 Dundas St. West,CA,Toronto,Canada,,605,"[143 Dundas St. West, Toronto ON, Canada]","[{'label': 'display', 'lat': 43.65538110598594...",43.655381,-79.38527,,,ON,4ae4b055f964a520229d21e3
6,Azure Restaurant & Bar,Restaurant,225 Front St W,CA,Toronto,Canada,in InterContinental Toronto Centre,578,[225 Front St W (in InterContinental Toronto C...,"[{'label': 'display', 'lat': 43.64474919591934...",43.644749,-79.385113,Entertainment District,M5V 2X3,ON,4b223f5af964a520ba4424e3
21,Cali Restaurant,Vietnamese Restaurant,179 Dundas St. W.,CA,Toronto,Canada,at Chestnut,576,"[179 Dundas St. W. (at Chestnut), Toronto ON M...","[{'label': 'display', 'lat': 43.65506808, 'lng...",43.655068,-79.386375,,M5G,ON,4c476d6719fde21e32410876
25,Ali Baba's,Middle Eastern Restaurant,199 Dundas St. W,CA,Toronto,Canada,at Centre Ave.,572,"[199 Dundas St. W (at Centre Ave.), Toronto ON...","[{'label': 'display', 'lat': 43.65491647880546...",43.654916,-79.387172,,M5g 1c8,ON,4ddd83c788779c82beb061fc
7,Hong Shing Chinese Restaurant,Chinese Restaurant,195 Dundas St W,CA,Toronto,Canada,at University Ave,571,"[195 Dundas St W (at University Ave), Toronto ...","[{'label': 'display', 'lat': 43.65492521335936...",43.654925,-79.387089,,M5G 1C7,ON,4b2027b5f964a520f82d24e3


Japanese restaurants nearby

In [18]:
restaurants = pd.DataFrame(dataframe_filtered.name[0:9])

restaurants

Unnamed: 0,name
29,Uncle Tetsu's Japanese Cheesecake
8,Victor Restaurant & Bar
22,Flexday At Victor Restaurant
23,Mix Bistro Restaurant and Bar at the Hyatt Reg...
27,Anoush Restaurant
1,Kyoto House Japanese Restaurant
6,Azure Restaurant & Bar
21,Cali Restaurant
25,Ali Baba's


#### Map

In [19]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=13) # generate map centred around the hotel

# add a red circle marker to represent the Hotel
folium.vector_layers.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='Hilton Toronto',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

# add the Japanese restaurants as blue circle markers
for lat, lng, label in zip(dataframe_filtered.lat, dataframe_filtered.lng, dataframe_filtered.categories):
    folium.vector_layers.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

# display map
venues_map

#### Find the top rated among the 10 closest

In [25]:
ids = pd.DataFrame(dataframe_filtered[dataframe_filtered.name.isin(restaurants.name)].id)

column_names = ["Name", "Rating"]

ratings = pd.DataFrame(columns = column_names)
ratings.Name = restaurants.name

r = []

for i in ids.id:
    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(i, CLIENT_ID, CLIENT_SECRET, VERSION)
    result = requests.get(url).json()
    #print(result['response']['venue']['rating'])
    try:
        r.append(result['response']['venue']['rating'])
    except:
        r.append('0')
        
ratings.Rating = [float(i) for i in r]

ratings 

Unnamed: 0,Name,Rating
29,Uncle Tetsu's Japanese Cheesecake,7.5
8,Victor Restaurant & Bar,8.2
22,Flexday At Victor Restaurant,0.0
23,Mix Bistro Restaurant and Bar at the Hyatt Reg...,5.7
27,Anoush Restaurant,0.0
1,Kyoto House Japanese Restaurant,5.9
6,Azure Restaurant & Bar,5.9
21,Cali Restaurant,0.0
25,Ali Baba's,7.3


In [31]:
print("I will go to ", ratings.sort_values(by='Rating', ascending = False).iloc[0].Name, "!")

I will go to  Victor Restaurant & Bar !
