In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

page_link = 'https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M'
page_response = requests.get(page_link, timeout=5)
page_content = BeautifulSoup(page_response.content, "html.parser")

table = page_content.find('table',{'class':'wikitable'})
table_rows = table.find_all('tr')

res = []
for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    if row:
        res.append(row)

df = pd.DataFrame(res, columns=['Postal Code', 'Borough', 'Neighborhood'])

#Remove Borough = 'Not assigned'
df0=df[~df['Borough'].isin(['Not assigned'])] 
df0.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


Used count to initially check the number of neighbourhood = 'Not assigned' and found out that it is only one record so I just used the replace method.

In [2]:
df1=df0.replace('Not assigned', 'Queen\'s Park')
df1[(df1['Neighborhood']=='Not assigned')].count()

Postal Code     0
Borough         0
Neighborhood    0
dtype: int64

Used an aggregator to combine strings and used lambda with a comma as separator.

In [3]:
foo = lambda a: " , ".join(a) 
df1=df1.groupby(['Postal Code', 'Borough'], as_index=True).agg({'Neighborhood': foo}).reset_index()
df1

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Rouge , Malvern"
1,M1C,Scarborough,"Highland Creek , Rouge Hill , Port Union"
2,M1E,Scarborough,"Guildwood , Morningside , West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park , Ionview , Kennedy Park"
7,M1L,Scarborough,"Clairlea , Golden Mile , Oakridge"
8,M1M,Scarborough,"Cliffcrest , Cliffside , Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff , Cliffside West"


In [4]:
df1.shape

(103, 3)

Read the coordinates from the given link.

In [5]:
df2 = pd.read_csv('http://cocl.us/Geospatial_data')
print('Done')

Done


Used merge by Postal Code since both CSV files have the column 'Postal Code'. Similar to vlookup in Excel.

In [6]:
df1.to_csv("df1.csv", index=False)

df1 = pd.read_csv("df1.csv")
df2 = pd.read_csv('http://cocl.us/Geospatial_data')

df2 = df2.dropna(axis=1)
merged = df1.merge(df2, on='Postal Code')
merged.to_csv("merged_data.csv", index=False)
print('Done')

Done


Merged the CSVs to add the coordinates to the postal codes.

In [7]:
df3 = pd.read_csv("merged_data.csv")
df3.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge , Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek , Rouge Hill , Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood , Morningside , West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


Install folium to create the map.

In [8]:
!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium

Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    altair-2.3.0               |        py36_1001         533 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         631 KB

The following NEW packages will be INSTALLED:

    altair:  2.3.0-py36_1001 conda-forge
    branca:  0.3.1-py_0      conda-forge
    folium:  0.5.0-py_0      conda-forge
    vincent: 0.4.4-py_1      conda-forge


Downloading and Extracting Packages
vincent-0.4.4        |

Install geopy for the coordinates.

In [9]:
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim

Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geopy-1.18.1               |             py_0          51 KB  conda-forge
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          84 KB

The following NEW packages will be INSTALLED:

    geographiclib: 1.49-py_0   conda-forge
    geopy:         1.18.1-py_0 conda-forge


Downloading and Extracting Packages
geopy-1.18.1         | 51 KB     | ##################################### | 100% 
geographiclib-1.49   | 32 KB     | ##################################### | 100% 
Preparing transaction: done
Verifying transaction: done
Executing transaction: done


Get the specific coordinates for Toronto to focus in the map

In [10]:
address = 'Toronto, ON'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

  This is separate from the ipykernel package so we can avoid doing imports until


The geograpical coordinate of Toronto are 43.653963, -79.387207.


Used folium to show map.
Map's labels show postal code and neighborhood.

In [22]:
coordinates_toronto = (43.65, -79.38)

map_toronto = folium.Map(location = coordinates_toronto, zoom_start=10)
 
for lat, lng, borough, neighborhood, postal_code in zip(df3['Latitude'], df3['Longitude'], df3['Borough'], df3['Neighborhood'], df3['Postal Code']):
#    label = '{}, {}, {}'.format(neighborhood, borough, postal_code)
    label = '{}:{}'.format(postal_code, neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
display (map_toronto)