# Request HTML

In [1]:
!pip install requests



https://florida.hometownlocator.com/zip-codes/zipcodes,city,saint%20petersburg.cfm

In [2]:
import requests

url = r'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

r = requests.get(url)

print (r.content[:100])

b'<!DOCTYPE html>\n<html class="client-nojs" lang="en" dir="ltr">\n<head>\n<meta charset="UTF-8"/>\n<title'


# Process as BeautifulSoup

In [3]:
!pip install beautifulsoup4



In [4]:
from bs4 import BeautifulSoup

soup = BeautifulSoup(r.content, 'html.parser')

# Parse elements into Pandas DataFrame

In [5]:
rows = soup.select('.wikitable tbody tr')

In [6]:
row = rows[0]
name = row.text.strip().split('\n')

print (name)

['Postcode', 'Borough', 'Neighbourhood']


In [7]:
rows = rows[1:]

In [8]:
data= []

for row in rows:
    
    pc = row.text.strip().split('\n')
    
    print (pc)
    
    d = dict()
    
    d['Postcode'] = pc[0]
    d['Borough'] = pc[1]
    d['Neighborhood'] = pc[2]
    
    data.append(d)

['M1A', 'Not assigned', 'Not assigned']
['M2A', 'Not assigned', 'Not assigned']
['M3A', 'North York', 'Parkwoods']
['M4A', 'North York', 'Victoria Village']
['M5A', 'Downtown Toronto', 'Harbourfront']
['M5A', 'Downtown Toronto', 'Regent Park']
['M6A', 'North York', 'Lawrence Heights']
['M6A', 'North York', 'Lawrence Manor']
['M7A', "Queen's Park", 'Not assigned']
['M8A', 'Not assigned', 'Not assigned']
['M9A', 'Etobicoke', 'Islington Avenue']
['M1B', 'Scarborough', 'Rouge']
['M1B', 'Scarborough', 'Malvern']
['M2B', 'Not assigned', 'Not assigned']
['M3B', 'North York', 'Don Mills North']
['M4B', 'East York', 'Woodbine Gardens']
['M4B', 'East York', 'Parkview Hill']
['M5B', 'Downtown Toronto', 'Ryerson']
['M5B', 'Downtown Toronto', 'Garden District']
['M6B', 'North York', 'Glencairn']
['M7B', 'Not assigned', 'Not assigned']
['M8B', 'Not assigned', 'Not assigned']
['M9B', 'Etobicoke', 'Cloverdale']
['M9B', 'Etobicoke', 'Islington']
['M9B', 'Etobicoke', 'Martin Grove']
['M9B', 'Etobicoke',

In [9]:
!pip install pandas
!pip install numpy



In [10]:
import pandas as pd
import numpy as np

In [11]:
df = pd.DataFrame(data)

In [12]:
df.head(10)

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Not assigned
9,M8A,Not assigned,Not assigned


# Process Postcodes

## Filter out Unassigned Boroughs

In [13]:
df = df[df['Borough'] != 'Not assigned']

## If no Neighborhood name, Borough is the Neighborhood name

In [14]:
df[df['Neighborhood'] == 'Not assigned']

Unnamed: 0,Postcode,Borough,Neighborhood
8,M7A,Queen's Park,Not assigned


In [15]:
df['Neighborhood'] = df['Neighborhood'].replace('Not assigned', np.NaN)
df['Neighborhood'].fillna(df['Borough'], inplace=True)

In [16]:
df.head(10)

Unnamed: 0,Postcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


## Combine multiple Neighborhoods on same line, separated by commas

In [17]:
df = df.groupby(['Postcode', 'Borough'])['Neighborhood'].apply(lambda x: ', '.join(x.astype(str))).reset_index()

In [18]:
df.head(10)

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


# Save DataFrame as JSON file

In [19]:
j = df.to_dict()

In [20]:
import json

with open('YYZ_Neighborhoods.json', 'w') as f:
    
    json.dump(j, f)

# Shape of DataFrame

In [21]:
df.shape

(103, 3)