In [25]:
#conda config --env --set always_yes true
#!conda install -c conda-forge geopy
#!conda install -c conda-forge lxml -y
#!conda install -c conda-forge beautifulsoup4
import urllib.request
import numpy as np
import bs4 as bs
import pandas as pd
from geopy.geocoders import Nominatim
import folium

In [29]:
#Typically we would want to download the Geocoder package as well, but since the package has been deemed 'unstable' by 
#some of the instructors, we've been given supplementary data. While irrelevant to this particular project, the code 
#for grabbing geolocations is as follows:

#!conda install -c conda-forge geocoder
#initialize your variable to None

#lat_lng_coords = None

#loop until you get the coordinates

#while(lat_lng_coords is None):
    #g = geocoder.google('{}, Toronto, Ontario'.format('Postal Code'))
    #lat_lng_coords = g.latlng

#Latitude= lat_lng_coords[0]

#Longitude= lat_lng_coords[1]

#### We start by processing the web scrape from the specified Wikipedia Page

In [4]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
page = urllib.request.urlopen(url)
soup = bs.BeautifulSoup(page,'lxml')
#print(soup.prettify())

In [5]:
all_tables=soup.find_all("table")
#all_tables

In [6]:
right_table=soup.find('table', class_='wikitable sortable')
#right_table

#### We use a loop to go through all the html code

In [7]:

A=[]
B=[]
C=[]

for row in right_table.findAll('tr'):
    cells=row.findAll('td')
    if len(cells)==3:
        A.append(cells[0].find(text=True))
        B.append(cells[1].find(text=True))
        C.append(cells[2].find(text=True))

In [8]:
#We create a dataframe to contain all the code from the Wikipedia table
df=pd.DataFrame(A,columns=['Postal Code'])
df['Borough']=B
df['Neighborhood']=C
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A\n,Not assigned\n,Not assigned\n
1,M2A\n,Not assigned\n,Not assigned\n
2,M3A\n,North York\n,Parkwoods\n
3,M4A\n,North York\n,Victoria Village\n
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront\n"


#### Now we clean the dataframe

In [9]:
cols_to_check = ['Postal Code','Borough', 'Neighborhood']
df[cols_to_check] = df[cols_to_check].replace({'\n':''},regex=True)
df = df[cols_to_check].replace({'Not assigned':np.nan},regex=True)
df.dropna(axis=0,how='any',inplace=True)
df.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


In [10]:
df.groupby('Postal Code')
df.head()


Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [11]:
df.shape

(103, 3)

#### Now that we have our dataframe, we'll add in geolocations from a separate csv. 
#### As mentioned before, normally we would pull this with the geocoder, but for purposes
#### of this project, we've been given a csv which we'll join on our dataframe

In [14]:
df2=pd.read_csv('http://cocl.us/Geospatial_data')
df2.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [15]:
df3=pd.merge(df, df2, on='Postal Code')
df3.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [18]:
df4=df[df['Borough'].astype(str).str.contains('Toronto')]

In [19]:
df4.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
13,M5B,Downtown Toronto,"Garden District, Ryerson"
22,M5C,Downtown Toronto,St. James Town
30,M4E,East Toronto,The Beaches


In [20]:
df5=pd.merge(df4, df2, on='Postal Code')

In [21]:
df5.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


#### Now we create map of Toronto using latitude and longitude values

In [23]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [31]:

map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(df5['Latitude'], df5['Longitude'], df5['Borough'], df5['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### As you can see, there are four main clusters with our neighborhoods. I would say
#### that they are Central, North, East, and West