# Transform HTML Table into DataFrame

In [1]:
# import required libraries
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
import re

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"


In [3]:
# send http request and get response using requests
response = requests.get(url)

In [4]:
print(response)

<Response [200]>


In [5]:
# parse response content using beautifulsoup
html_page = bs(response.content, 'html.parser')

In [6]:
# Get the first table tag and find all <tr> tag in the form of list
tableData = html_page.table.find_all('tr')

In [7]:
# check the tableData
re.findall("\((.*?)\)", tableData[3].find('td').find('span').text)[0].replace(' / ', ',')

'Guildwood,Morningside,West Hill'

In [8]:
def parse_table(tableData):
    data = {'Postal Code': [], 'Borough':[], 'Neighborhood':[]}
    for row in tableData:
        for td in row.find_all('td'):
            code = td.find('b').text
            borough_ = td.find('span').text
            # skip if borough_ is  Not assigned
            if borough_ == 'Not assigned':
                continue
                
            borough = borough_.split('(')[0]
            neighborhood = re.findall('\((.*?)\)', borough_)[0].replace(' / ', ',')
            
            # append into the dict
            data['Postal Code'].append(code)
            data['Borough'].append(borough)
            data['Neighborhood'].append(neighborhood)
    
    
    return data   
        

In [9]:
data = parse_table(tableData)


In [10]:
df = pd.DataFrame(data)

In [14]:
df

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park,Harbourfront"
3,M6A,North York,"Lawrence Manor,Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
...,...,...,...
98,M8X,Etobicoke,"The Kingsway,Montgomery Road,Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East TorontoBusiness reply mail Processing Cen...,Enclave of M4L
101,M8Y,Etobicoke,"Old Mill South,King's Mill Park,Sunnylea,Humbe..."


....

# `any()` in Pandas DataFrame

`syntax:DataFrame.any(axis=0, bool_only=None, skipna=True, level=None, **kwargs)`

In [2]:
# Example
import pandas as pd

data = [[True, False, True], [False, False, False]]
df = pd.DataFrame(data, columns=['col1', 'col2', 'col3'])

In [3]:
df

Unnamed: 0,col1,col2,col3
0,True,False,True
1,False,False,False


Check if any value in each row(index) is `True`

Whether each column contains at least one `True` element (the default).

In [5]:
df.any()

col1     True
col2    False
col3     True
dtype: bool

Q: What is the use of `any()` in pandas dataframe?

Aggregating over the columns.

In [8]:
df.any(axis='columns')

0     True
1    False
dtype: bool

Aggregating over the entire DataFrame with `axis=None`.

In [9]:
df.any(axis=None)

True