# Generate Dataframe scraping the Canada Postal Code from the [wikipedia](https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M)

***

In [1]:
import urllib.request
from bs4 import BeautifulSoup
import pandas as pd

### The function GetCanadaPostalCode will scrape the postal code from the wikipedia webpage using the library [beautifulsoup](https://www.crummy.com/software/BeautifulSoup/bs4/doc/#calling-a-tag-is-like-calling-find-all)

In [2]:
def searchPostalCode(postalcodes, postalcode):
    '''
    This function will search for the postalcode and return its position in the array
    If postcode not found it will return -1
    '''
    totalrows = len(postalcodes)
    # if totalrows is zero that means the array is empty, so no need search
    if totalrows==0:
        return -1
    
    totalcols = len(postalcodes[0]) # check the length of the first row, all rows should have the same number of columns
    
    #Loop through all thw postalcodes
    for x in range(totalrows):
        for y in range(totalcols):
            # Position zero is the postal code
            if postalcodes[x][0] == postalcodes:
                return x
    return -1

In [3]:
def GetCanadaPostalCode(baseurl):
    '''
    This function will return 1 array containing the column names and 2nd array containing all postalcodes 
    
    '''
    postalcodes = []
    
    #Read Web Page
    content = urllib.request.urlopen(baseurl).read()
    
     #---------------------------------------------
    #Scrape Table Header from Web Page
    column_names=[]
    for rows in BeautifulSoup(content, "lxml").findAll("table"):
        for index, cols in enumerate(rows.findAll("th")):
            cv = cols.get_text().rstrip("\n")
            column_names.append(cv)
        
        #We need just the first table, the page has two tables
        break
    
    #---------------------------------------------
    #Scrape Table Content from Web Page
    #find all tr tags
    #Loop through all the rows in the table
    for rows in BeautifulSoup(content, "lxml").findAll("tr"):
        
        #search inside each row all tag td
        totalcol = len(rows.findAll("td"))
        
        if totalcol==3:
            #initiate variables
            cn = 0
            col=[]
            addRow=True
            
            #Loop through all the columns in the table
            for index, cols in enumerate(rows.findAll("td")):
                #cleanup carriage return
                cv = cols.get_text().rstrip("\n")
                #cleanup blank values or "Not assigned"
                if (cv=='Not assigned' or cv=="") :
                    addRow=False
                #append value in the column array
                col.append(cv)
                
            if addRow==True:
                #Search of existing postalcode and if found it will append the neighborhood name in the found position
                pos = searchPostalCode(postalcodes,col[0][0])
                if (pos==-1) :
                    #append the new postalcode in the array
                    postalcodes.append(col)
                else:
                    #append the neighborhood name in the found position
                    postalcodes[pos][3]+=", "+col[0][2]

    return column_names, postalcodes

### Read data

In [4]:
Header, PostalCodes = GetCanadaPostalCode("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")

### Generate dataframe

In [5]:
df_canada = pd.DataFrame(PostalCodes, columns=Header) 

### Check results

In [6]:
'''
Sorting options columns: Postal Code, Borough, Neighborhood
'''
df_canada.sort_values(by='Postal Code', ascending=True)

Unnamed: 0,Postal Code,Borough,Neighborhood
6,M1B,Scarborough,"Malvern, Rouge"
12,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
18,M1E,Scarborough,"Guildwood, Morningside, West Hill"
22,M1G,Scarborough,Woburn
26,M1H,Scarborough,Cedarbrae
32,M1J,Scarborough,Scarborough Village
38,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
44,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
51,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
58,M1N,Scarborough,"Birch Cliff, Cliffside West"
