In [6]:
import requests
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
page = requests.get(url)

In [13]:
from bs4 import BeautifulSoup
import pandas as pd
from pandas import DataFrame
Borough = []
postal_code = []
neighborhood = []

content = page.text
soup = BeautifulSoup(content)
#print(soup.prettify())

table = soup.find("table", attrs = {"class":"wikitable"})
table_data = table.tbody.find_all("tr")
#print(table_data)

counter = 0
for tr in table_data:
    for td in tr.find_all("td"):
        if counter % 3 == 0:
            postal_code.append(td.text.replace('\n','').strip())
        elif counter % 3 == 1:
            Borough.append(td.text.replace('\n','').strip())
        else:
            neighborhood.append(td.text.replace('\n','').strip())
        counter += 1

###Convert three lists into a dataframe and clean the dataframe
list1 = list(zip(postal_code, Borough, neighborhood))
df = DataFrame(list1, columns = ['PostalCode', 'Borough', 'Neighborhood'])
df = df[df.Borough != 'Not assigned']
df = df.sort_values(by=['PostalCode'])
df = df.reset_index()
df = df.drop(columns = ['index'])
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


In [14]:
df.shape

(103, 3)

In [16]:
file = pd.read_csv('Geospatial_Coordinates.csv')
file.head()
data = df.merge(file,left_on='PostalCode', right_on='Postal Code')
data = data.drop(columns=['Postal Code'])
data

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437


In [31]:
from geopy.geocoders import Nominatim 
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
from pandas.io.json import json_normalize

address = 'Toronto, ON'
geolocator = Nominatim(user_agent="tr_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


### Creates a map of Toronto with neighborhoods

In [25]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(data['Latitude'], data['Longitude'], data['Borough'], data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Explore neighborhood

In [22]:
CLIENT_ID = 'FE5TNYK1LUWUO1GJ1UGHLV2HXCVEA5XE5EFAU0BHFKPU45SW' # your Foursquare ID
CLIENT_SECRET = 'LFUNVGPUGSHP2FILOIVBGCHDCDKE3XXPFCXHVTRYL5C544HS' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: FE5TNYK1LUWUO1GJ1UGHLV2HXCVEA5XE5EFAU0BHFKPU45SW
CLIENT_SECRET:LFUNVGPUGSHP2FILOIVBGCHDCDKE3XXPFCXHVTRYL5C544HS


In [26]:
data.loc[0, 'Neighborhood']
neighborhood_latitude = data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Malvern, Rouge are 43.806686299999996, -79.19435340000001.


In [37]:
radius = 1000
LIMIT = 100
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
results = requests.get(url).json()

In [35]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']
    
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  


Unnamed: 0,name,categories,lat,lng
0,Images Salon & Spa,Spa,43.802283,-79.198565
1,Harvey's,Restaurant,43.80002,-79.198307
2,Staples Morningside,Paper / Office Supplies Store,43.800285,-79.196607
3,Wendy's,Fast Food Restaurant,43.802008,-79.19808
4,Wendy’s,Fast Food Restaurant,43.807448,-79.199056


In [36]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

18 venues were returned by Foursquare.
