# Capstone Project Week 3

## Part 1

### Import the package

In [1]:
from bs4 import BeautifulSoup
from urllib.request import urlopen

import re
import pandas as pd

### Parse the Wikipedia page by BeautifulSoup

In [2]:
pc_url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
pc_html = urlopen(pc_url)
pc_bs = BeautifulSoup(pc_html, 'html.parser')
pc_tbl = pc_bs.find_all('table')
pc_tbl2 = pc_tbl[0]

### Extract the data, and convert them into DataFrame
By using `find_all('th')`, all the headers in the table will be extracted as a list called `header`, which are `['Postal Code','Borough','Neighbourhood']`.

By using `find_all('td')`, all the cells in the table will be extracted as a list called `cell`.
As there are 3 elements in the header, every 3 elements of the cell will be 1 single column.
Assign the data into the DataFrame `pc_df` if the `Borough` is not `Not assigned`.

In [3]:
header = [i.text.strip() for i in pc_tbl2.find_all('th')]
cell = pc_tbl2.find_all('td')

pc_df = pd.DataFrame(columns=header)

for i in range(0, int(len(cell)/3)):
    row = [j.text.strip() for j in cell[3*i:3*i+3]]
    if row[1] != "Not assigned":
        pc_df = pc_df.append({header[0]:row[0],header[1]:row[1],header[2]:row[2]},
                             ignore_index=True)
        
pc_df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [4]:
pc_df.shape

(103, 3)

## Part 2

### Read the CSV file and merge with Postal Code DataFrame

In [5]:
geo_df = pd.read_csv("https://cocl.us/Geospatial_data")
pc_df2 = pc_df.merge(geo_df, on="Postal Code")

pc_df2.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


## Part 3

### Install Folium

In [6]:
!conda install -c conda-forge folium -y

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.



### Configure the Jupyter Notebook plot

In [7]:
%matplotlib inline

### Create a Map with Folium

In [8]:
import folium

m = folium.Map(location=[43.6011, -79.3470], zoom_start=10)

### Create a color map associated with borough

In [9]:
cmap = ['red', 'blue', 'beige', 'purple', 'orange', 'darkred', 'lightred', 'green', 'darkblue', 'darkgreen']

borough = pc_df2["Borough"].unique()
cmap_dict = {}

for i, x in enumerate(cmap):
    cmap_dict[borough[i]] = x

cmap_dict

{'North York': 'red',
 'Downtown Toronto': 'blue',
 'Etobicoke': 'beige',
 'Scarborough': 'purple',
 'East York': 'orange',
 'York': 'darkred',
 'East Toronto': 'lightred',
 'West Toronto': 'green',
 'Central Toronto': 'darkblue',
 'Mississauga': 'darkgreen'}

### Helper function to add marker

In [10]:
def helper(row):
    folium.vector_layers.CircleMarker(
        location=[row["Latitude"], row["Longitude"]],
        radius=8, popup=row["Postal Code"],
        color=cmap_dict[row["Borough"]],
        fill_color=cmap_dict[row["Borough"]],
        fill_opacity=0.5).add_to(m)

### Add marker to the map for each row in DataFrame

In [11]:
pc_df2.apply(helper, axis=1)

m