In [10]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
from pandas.io.json import json_normalize

In [11]:
#Scrape clean data with Beautiful Soup and request for the url

url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

page = requests.get(url).text

cdf = BeautifulSoup(page, 'lxml')

In [12]:
#retrieve only the postal codes table from the wikipedia page
datatable = cdf.find('table')
datatable;

In [13]:
#create a new pandas dataframe with the data from the wikipedia table 

headers = ['Postal Code', 'Borough', 'Neighborhood']
new_cdf = pd.DataFrame(columns=headers)
new_cdf;

In [14]:
#convert the table from html code to a pandas dataframe

for tr_cell in datatable.find_all('tr'):
    row_data=[]
    for td_cell in tr_cell.find_all('td'):
        row_data.append(td_cell.text.strip())
    if len(row_data)==3:
        new_cdf.loc[len(new_cdf)] = row_data

In [15]:
new_cdf.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [37]:
#get rid of all the not assigned boroughs from our table 

new_cdf= new_cdf[new_cdf['Borough']!='Not assigned']

new_cdf.head(20)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
2,M3A,North York,Parkwoods,43.753259,-79.329656
3,M4A,North York,Victoria Village,43.725882,-79.315572
4,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
5,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
9,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
11,M3B,North York,Don Mills,43.745906,-79.352188
12,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
13,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [17]:
#check if any neighborhoods are not assigned inside an assigned borough
display = new_cdf.loc[new_cdf['Neighborhood']=='Not assigned']
display.head();

In [18]:
#appropriate code if a neighborhood was unassigned inside of a borough
#this line of code would make the not assigned neighborhood take on the name of the borough

new_cdf['Neighborhood'].replace('Not assigned',new_cdf['Borough'],inplace=True)
new_cdf.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [19]:
new_cdf.rename(columns={'Postal Code':'PostalCode'},inplace=True)
new_cdf.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [20]:
new_cdf.shape

(103, 3)

In [21]:
#loaded the csv file containing longitude and latitude 

cdf_cordinates = pd.read_csv('http://cocl.us/Geospatial_data')
cdf_cordinates.head(10)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [22]:
#used the join statement to add the latitude and longitude columns to the dataframe created in part 1

new_cdf = new_cdf.join(cdf_cordinates.set_index('Postal Code'), on='PostalCode')
new_cdf.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
2,M3A,North York,Parkwoods,43.753259,-79.329656
3,M4A,North York,Victoria Village,43.725882,-79.315572
4,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
5,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [6]:
!conda install -c conda-forge geocoder --yes
import geocoder
from geopy.geocoders import Nominatim

Solving environment: done

# All requested packages already installed.



In [4]:
def get_geocode(postal_code):
    # initialized variable to None
    lat_lng_coords = None
    while(lat_lng_coords is None):
        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng
    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    return latitude,longitude

In [1]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    folium-0.5.0               |             py_0          45 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    branca-0.4.1               |             py_0          26 KB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         713 KB

The following NEW packages will be INSTALLED:

    altair:  4.1.0-py_1 conda-forge
    branca:  0.4.1-py_0 conda-forge
    folium:  0.5.0-py_0 conda-forge
    vincent: 0.4.4-py_1 conda-forge


Downloading and Extracting Packages
folium-0.5.0         | 45 KB     | #####

In [7]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent='Toronto')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The cordinates of Toronto are {}, {}.'.format(latitude,longitude))


The cordinates of Toronto are 43.6534817, -79.3839347.


In [42]:
#map the dataframe on a folium map using the longitudes and latitudes
#a popup is built in to display the borough and neighborhoods of the point

map_toronto = folium.Map(location=[latitude,longitude], zoom_start=10)

for lat,long,borough,neighborhood in zip(new_cdf['Latitude'],new_cdf['Longitude'],new_cdf['Borough'],new_cdf['Neighborhood']):
    label = '{}, {}'.format(neighborhood,borough)
    label=folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat,long],
        radius = 3,
        popup = label,
        color = 'blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,).add_to(map_toronto)
map_toronto

In [31]:
#this dataframe consists of Downtown Toronto rows only

toronto_data = new_cdf[new_cdf['Borough']=='Downtown Toronto'].reset_index(drop=True)
toronto_data.head(8)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
5,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
6,M6G,Downtown Toronto,Christie,43.669542,-79.422564
7,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568


In [47]:
#this map marks only the neighborhoods in downtown toronto in black 

map_toronto = folium.Map(location=[latitude,longitude], zoom_start=12)

for lat,long,borough,neighborhood in zip(toronto_data['Latitude'],toronto_data['Longitude'],toronto_data['Borough'],toronto_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood,borough)
    label=folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat,long],
        radius = 3,
        popup = label,
        color = 'black',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,).add_to(map_toronto)
map_toronto