# Segmenting and Clustering Neighbourhoods in Toronto- Part 1

In [5]:
# Importing the necessary libraries
from bs4 import BeautifulSoup
import requests
import numpy as np
import pandas as pd

In [6]:
# Importing the Wikipedia page using request
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
result_url = requests.get(url)

In [7]:
# Extracting the Table using BeautifulSoup
soup = BeautifulSoup(result_url.content, 'html.parser')
table = soup.find('table')
trs = table.find_all('tr')
rows = []
for tr in trs:
    i = tr.find_all('td')
    if i:
        rows.append(i)
        
neighborhood_list = []
for row in rows:
    postalcode = row[0].text.rstrip()
    borough = row[1].text.rstrip()
    neighborhood = row[2].text.rstrip()
    
    # In case the neighborhood is not assigned
    if borough != 'Not assigned':
        if neighborhood == 'Not assigned':
            neighborhood = borough
        neighborhood_list.append([postalcode, borough, neighborhood])

In [8]:
# Converting into a Pandas Dataframe
column_names = ['Postal Code', 'Borough', 'Neighborhood']
df = pd.DataFrame(neighborhood_list, columns=column_names)
df = df.groupby('Postal Code').agg(
    {
        'Borough':'first', 
        'Neighborhood': ', '.join,}
    ).reset_index()
df.head(20)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,Malvern / Rouge
1,M1C,Scarborough,Rouge Hill / Port Union / Highland Creek
2,M1E,Scarborough,Guildwood / Morningside / West Hill
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,Kennedy Park / Ionview / East Birchmount Park
7,M1L,Scarborough,Golden Mile / Clairlea / Oakridge
8,M1M,Scarborough,Cliffside / Cliffcrest / Scarborough Village West
9,M1N,Scarborough,Birch Cliff / Cliffside West


In [9]:
# Checking the Shape of the Dataframe
df.shape

(103, 3)