In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import csv
import geocoder

## Use BeautifulSoup to scrape the website to get the table of neighborhoods

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
req = requests.get(url)
soup = BeautifulSoup(req.text, "html.parser")
#print(soup)
iTable = soup.find('table', attrs={"class":"wikitable sortable"})
#print(iTable)

## Write the content of the table into a csv file

In [3]:
with open('tasks/neighborhoods_6.csv','w', newline='') as csvfile:
    output_file = csv.writer(csvfile, delimiter=',')
    output_file.writerow(['PostalCode','Borough','Neighborhood'])
    
    # handle each row
    for tr in iTable.tbody.find_all("tr"):
        #print(tr)
        tds = []
        for td in tr.find_all("td"):
            tds.append(td.text.replace('\n','').strip())
        
        if (len(tds) > 0):  # avoid empty list from th tag
            if (tds[1] == 'Not assigned'):
                print("drop this line: ", tds[0])
            else:
                output_file.writerow(tds)

drop this line:  M1A
drop this line:  M2A
drop this line:  M8A
drop this line:  M2B
drop this line:  M7B
drop this line:  M8B
drop this line:  M2C
drop this line:  M7C
drop this line:  M8C
drop this line:  M2E
drop this line:  M3E
drop this line:  M7E
drop this line:  M8E
drop this line:  M9E
drop this line:  M2G
drop this line:  M3G
drop this line:  M7G
drop this line:  M8G
drop this line:  M9G
drop this line:  M7H
drop this line:  M8H
drop this line:  M9H
drop this line:  M7J
drop this line:  M8J
drop this line:  M9J
drop this line:  M7K
drop this line:  M8K
drop this line:  M9K
drop this line:  M7L
drop this line:  M8L
drop this line:  M7M
drop this line:  M8M
drop this line:  M7N
drop this line:  M8N
drop this line:  M3P
drop this line:  M7P
drop this line:  M8P
drop this line:  M3R
drop this line:  M8R
drop this line:  M2S
drop this line:  M3S
drop this line:  M7S
drop this line:  M8S
drop this line:  M9S
drop this line:  M2T
drop this line:  M3T
drop this line:  M6T
drop this lin

## Read csv file into pandas dataframe

In [4]:
df = pd.read_csv('tasks/neighborhoods_6.csv')
print(df.head())

  PostalCode           Borough                                 Neighborhood
0        M3A        North York                                    Parkwoods
1        M4A        North York                             Victoria Village
2        M5A  Downtown Toronto                    Regent Park, Harbourfront
3        M6A        North York             Lawrence Manor, Lawrence Heights
4        M7A  Downtown Toronto  Queen's Park, Ontario Provincial Government


In [5]:
df.shape

(103, 3)

## Get long/lat of postal codes with geocoder 

In [6]:
postal_code = df['PostalCode']
print(postal_code)


0      M3A
1      M4A
2      M5A
3      M6A
4      M7A
      ... 
98     M8X
99     M4Y
100    M7Y
101    M8Y
102    M8Z
Name: PostalCode, Length: 103, dtype: object


## Geocoder doesn't work. Use coordinates from existing csv file instead

In [10]:
#lat_lng_coords = None
#while(lat_lng_coords is None):
#    g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
#    lat_lng_coords = g.latlng
#latitude = lat_lng_coords[0]
#longitude = lat_lng_coords[1]

# read coordinates from file
df_coordinate = pd.read_csv('tasks/Geospatial_Coordinates.csv')
print(df_coordinate.head())
print("done")

  Postal Code   Latitude  Longitude
0         M1B  43.806686 -79.194353
1         M1C  43.784535 -79.160497
2         M1E  43.763573 -79.188711
3         M1G  43.770992 -79.216917
4         M1H  43.773136 -79.239476
done


In [11]:
df_coordinate.rename(columns = {'Postal Code': 'PostalCode'}, inplace=True)

In [12]:
df_coordinate.columns.values

array(['PostalCode', 'Latitude', 'Longitude'], dtype=object)

In [13]:
df_total = pd.merge(df, df_coordinate, on='PostalCode', how='inner')

In [15]:
print(df_total)

    PostalCode           Borough  \
0          M3A        North York   
1          M4A        North York   
2          M5A  Downtown Toronto   
3          M6A        North York   
4          M7A  Downtown Toronto   
..         ...               ...   
98         M8X         Etobicoke   
99         M4Y  Downtown Toronto   
100        M7Y      East Toronto   
101        M8Y         Etobicoke   
102        M8Z         Etobicoke   

                                          Neighborhood   Latitude  Longitude  
0                                            Parkwoods  43.753259 -79.329656  
1                                     Victoria Village  43.725882 -79.315572  
2                            Regent Park, Harbourfront  43.654260 -79.360636  
3                     Lawrence Manor, Lawrence Heights  43.718518 -79.464763  
4          Queen's Park, Ontario Provincial Government  43.662301 -79.389494  
..                                                 ...        ...        ...  
98       The K