In [1]:
import pandas as pd  # For data manipulation and analysis
from bs4 import BeautifulSoup  # For web scraping and HTML parsing
import requests  # To make HTTP requests to access webpage content
import matplotlib.pyplot as plt  # For plotting (not currently used in this code)

# Define the URL of the webpage to scrape mortgage rate data
url = 'https://www.ratehub.ca/best-mortgage-rates/5-year/variable'

# Send a GET request to fetch the HTML content of the webpage
page = requests.get(url)

# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(page.text, 'html')

# Find the table elements that contain the data we want, specifically looking for elements with class 'table-container'
table = soup.find_all('table', class_='table-container')

# Extract and clean the text from each table element
table = [i.text.strip() for i in table]

# Locate the table header and extract text from each column name (header cell)
header = soup.find('thead')
header_text = [th.get_text(strip=True) for th in header.find_all('th') if th.get_text(strip=True)]

# Create an empty DataFrame with the extracted column headers
df = pd.DataFrame(columns=header_text)

# Find all table rows within the <tbody> elements (contains actual rate data)
table_rows = soup.find_all('tbody')

# Initialize an empty list to store extracted row data
data = []

# Loop through each table row and extract text from the first three columns: Rate, Provider, and Payment
for row in table_rows:
    for tr in row.find_all('tr'):
        columns = tr.find_all('td')
        # Ensure there are enough columns before attempting to extract data
        if len(columns) >= 3:
            rate = columns[0].get_text(strip=True)
            provider = columns[1].get_text(strip=True)
            payment = columns[2].get_text(strip=True)
            # Append the extracted values as a new row in the data list
            data.append([rate, provider, payment])

# Limit the data to the first 20 rows
data = data[:20]

# Create a DataFrame from the extracted data using the previously defined headers
df = pd.DataFrame(data, columns=header_text)

# Display the resulting DataFrame
df

Unnamed: 0,Rate,Provider,Payment
0,4.70%,Canadian Lender,"$2,231"
1,4.95%,CanwiseA Ratehub Company,"$2,287"
2,5.00%,Big 6 Bank,"$2,299"
3,5.00%,First National,"$2,299"
4,5.05%,CMLS Financial,"$2,310"
5,5.15%,CIBC,"$2,332"
6,5.20%,Desjardins,"$2,344"
7,5.25%,MCAP,"$2,355"
8,5.30%,Scotiabank,"$2,366"
9,5.30%,Alterna Savings,"$2,366"
