In [1]:
import pandas as pd  # For data manipulation and analysis
from bs4 import BeautifulSoup  # For web scraping and HTML parsing
import requests  # To make HTTP requests to access webpage content
import matplotlib.pyplot as plt  # For plotting (not currently used in this code)

# Define the URL of the webpage to scrape mortgage rate data
url = 'https://www.ratehub.ca/best-mortgage-rates/5-year/variable'

# Send a GET request to fetch the HTML content of the webpage
page = requests.get(url)

# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(page.text, 'html')

# Find the table elements that contain the data we want, specifically looking for elements with class 'table-container'
table = soup.find_all('table', class_='table-container')

# Extract and clean the text from each table element
table = [i.text.strip() for i in table]

# Locate the table header and extract text from each column name (header cell)
header = soup.find('thead')
header_text = [th.get_text(strip=True) for th in header.find_all('th') if th.get_text(strip=True)]

# Create an empty DataFrame with the extracted column headers
df = pd.DataFrame(columns=header_text)

# Find all table rows within the <tbody> elements (contains actual rate data)
table_rows = soup.find_all('tbody')

# Initialize an empty list to store extracted row data
data = []

# Loop through each table row and extract text from the first three columns: Rate, Provider, and Payment
for row in table_rows:
    for tr in row.find_all('tr'):
        columns = tr.find_all('td')
        # Ensure there are enough columns before attempting to extract data
        if len(columns) >= 3:
            rate = columns[0].get_text(strip=True)
            provider = columns[1].get_text(strip=True)
            payment = columns[2].get_text(strip=True)
            # Append the extracted values as a new row in the data list
            data.append([rate, provider, payment])

# Create a DataFrame from the extracted data using the previously defined headers
df5v = pd.DataFrame(data, columns=header_text)
df5v.drop(columns=['Payment'], inplace=True)
df5v.rename(columns={'Rate': 'Variable rate 5y'}, inplace=True)

In [2]:
# Define the URL of the webpage to scrape mortgage rate data
url = 'https://www.ratehub.ca/best-mortgage-rates/3-year/variable'

# Send a GET request to fetch the HTML content of the webpage
page = requests.get(url)

# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(page.text, 'html')

# Find the table elements that contain the data we want, specifically looking for elements with class 'table-container'
table = soup.find_all('table', class_='table-container')

# Extract and clean the text from each table element
table = [i.text.strip() for i in table]

# Locate the table header and extract text from each column name (header cell)
header = soup.find('thead')
header_text = [th.get_text(strip=True) for th in header.find_all('th') if th.get_text(strip=True)]

# Create an empty DataFrame with the extracted column headers
df = pd.DataFrame(columns=header_text)

# Find all table rows within the <tbody> elements (contains actual rate data)
table_rows = soup.find_all('tbody')

# Initialize an empty list to store extracted row data
data = []

# Loop through each table row and extract text from the first three columns: Rate, Provider, and Payment
for row in table_rows:
    for tr in row.find_all('tr'):
        columns = tr.find_all('td')
        # Ensure there are enough columns before attempting to extract data
        if len(columns) >= 3:
            rate = columns[0].get_text(strip=True)
            provider = columns[1].get_text(strip=True)
            payment = columns[2].get_text(strip=True)
            # Append the extracted values as a new row in the data list
            data.append([rate, provider, payment])

# Create a DataFrame from the extracted data using the previously defined headers
df3v = pd.DataFrame(data, columns=header_text)
df3v.drop(columns=['Payment'], inplace=True)
df3v.rename(columns={'Rate': 'Variable rate 3y'}, inplace=True)

result_df=df5v.merge(df3v, on='Provider', how='outer')
result_df = result_df[['Provider', 'Variable rate 3y', 'Variable rate 5y']]

In [3]:
# Define the URL of the webpage to scrape mortgage rate data
url = 'https://www.ratehub.ca/best-mortgage-rates/1-year/fixed'

# Send a GET request to fetch the HTML content of the webpage
page = requests.get(url)

# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(page.text, 'html')

# Find the table elements that contain the data we want, specifically looking for elements with class 'table-container'
table = soup.find_all('table', class_='table-container')

# Extract and clean the text from each table element
table = [i.text.strip() for i in table]

# Locate the table header and extract text from each column name (header cell)
header = soup.find('thead')
header_text = [th.get_text(strip=True) for th in header.find_all('th') if th.get_text(strip=True)]

# Create an empty DataFrame with the extracted column headers
df = pd.DataFrame(columns=header_text)

# Find all table rows within the <tbody> elements (contains actual rate data)
table_rows = soup.find_all('tbody')

# Initialize an empty list to store extracted row data
data = []

# Loop through each table row and extract text from the first three columns: Rate, Provider, and Payment
for row in table_rows:
    for tr in row.find_all('tr'):
        columns = tr.find_all('td')
        # Ensure there are enough columns before attempting to extract data
        if len(columns) >= 3:
            rate = columns[0].get_text(strip=True)
            provider = columns[1].get_text(strip=True)
            payment = columns[2].get_text(strip=True)
            # Append the extracted values as a new row in the data list
            data.append([rate, provider, payment])

# Create a DataFrame from the extracted data using the previously defined headers
df1f = pd.DataFrame(data, columns=header_text)
df1f.drop(columns=['Payment'], inplace=True)
df1f.rename(columns={'Rate': 'Fixed rate 1y'}, inplace=True)

result_df=result_df.merge(df1f, on='Provider', how='outer')

In [4]:
# Define the URL of the webpage to scrape mortgage rate data
url = 'https://www.ratehub.ca/best-mortgage-rates/2-year/fixed'

# Send a GET request to fetch the HTML content of the webpage
page = requests.get(url)

# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(page.text, 'html')

# Find the table elements that contain the data we want, specifically looking for elements with class 'table-container'
table = soup.find_all('table', class_='table-container')

# Extract and clean the text from each table element
table = [i.text.strip() for i in table]

# Locate the table header and extract text from each column name (header cell)
header = soup.find('thead')
header_text = [th.get_text(strip=True) for th in header.find_all('th') if th.get_text(strip=True)]

# Create an empty DataFrame with the extracted column headers
df = pd.DataFrame(columns=header_text)

# Find all table rows within the <tbody> elements (contains actual rate data)
table_rows = soup.find_all('tbody')

# Initialize an empty list to store extracted row data
data = []

# Loop through each table row and extract text from the first three columns: Rate, Provider, and Payment
for row in table_rows:
    for tr in row.find_all('tr'):
        columns = tr.find_all('td')
        # Ensure there are enough columns before attempting to extract data
        if len(columns) >= 3:
            rate = columns[0].get_text(strip=True)
            provider = columns[1].get_text(strip=True)
            payment = columns[2].get_text(strip=True)
            # Append the extracted values as a new row in the data list
            data.append([rate, provider, payment])

# Create a DataFrame from the extracted data using the previously defined headers
df2f = pd.DataFrame(data, columns=header_text)
df2f.drop(columns=['Payment'], inplace=True)
df2f.rename(columns={'Rate': 'Fixed rate 2y'}, inplace=True)

result_df=result_df.merge(df2f, on='Provider', how='outer')

In [5]:
# Define the URL of the webpage to scrape mortgage rate data
url = 'https://www.ratehub.ca/best-mortgage-rates/3-year/fixed'

# Send a GET request to fetch the HTML content of the webpage
page = requests.get(url)

# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(page.text, 'html')

# Find the table elements that contain the data we want, specifically looking for elements with class 'table-container'
table = soup.find_all('table', class_='table-container')

# Extract and clean the text from each table element
table = [i.text.strip() for i in table]

# Locate the table header and extract text from each column name (header cell)
header = soup.find('thead')
header_text = [th.get_text(strip=True) for th in header.find_all('th') if th.get_text(strip=True)]

# Create an empty DataFrame with the extracted column headers
df = pd.DataFrame(columns=header_text)

# Find all table rows within the <tbody> elements (contains actual rate data)
table_rows = soup.find_all('tbody')

# Initialize an empty list to store extracted row data
data = []

# Loop through each table row and extract text from the first three columns: Rate, Provider, and Payment
for row in table_rows:
    for tr in row.find_all('tr'):
        columns = tr.find_all('td')
        # Ensure there are enough columns before attempting to extract data
        if len(columns) >= 3:
            rate = columns[0].get_text(strip=True)
            provider = columns[1].get_text(strip=True)
            payment = columns[2].get_text(strip=True)
            # Append the extracted values as a new row in the data list
            data.append([rate, provider, payment])

# Create a DataFrame from the extracted data using the previously defined headers
df3f = pd.DataFrame(data, columns=header_text)
df3f.drop(columns=['Payment'], inplace=True)
df3f.rename(columns={'Rate': 'Fixed rate 3y'}, inplace=True)

result_df=result_df.merge(df3f, on='Provider', how='outer')

In [6]:
# Define the URL of the webpage to scrape mortgage rate data
url = 'https://www.ratehub.ca/best-mortgage-rates/4-year/fixed'

# Send a GET request to fetch the HTML content of the webpage
page = requests.get(url)

# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(page.text, 'html')

# Find the table elements that contain the data we want, specifically looking for elements with class 'table-container'
table = soup.find_all('table', class_='table-container')

# Extract and clean the text from each table element
table = [i.text.strip() for i in table]

# Locate the table header and extract text from each column name (header cell)
header = soup.find('thead')
header_text = [th.get_text(strip=True) for th in header.find_all('th') if th.get_text(strip=True)]

# Create an empty DataFrame with the extracted column headers
df = pd.DataFrame(columns=header_text)

# Find all table rows within the <tbody> elements (contains actual rate data)
table_rows = soup.find_all('tbody')

# Initialize an empty list to store extracted row data
data = []

# Loop through each table row and extract text from the first three columns: Rate, Provider, and Payment
for row in table_rows:
    for tr in row.find_all('tr'):
        columns = tr.find_all('td')
        # Ensure there are enough columns before attempting to extract data
        if len(columns) >= 3:
            rate = columns[0].get_text(strip=True)
            provider = columns[1].get_text(strip=True)
            payment = columns[2].get_text(strip=True)
            # Append the extracted values as a new row in the data list
            data.append([rate, provider, payment])

# Create a DataFrame from the extracted data using the previously defined headers
df4f = pd.DataFrame(data, columns=header_text)
df4f.drop(columns=['Payment'], inplace=True)
df4f.rename(columns={'Rate': 'Fixed rate 4y'}, inplace=True)

result_df=result_df.merge(df4f, on='Provider', how='outer')

In [7]:
# Define the URL of the webpage to scrape mortgage rate data
url = 'https://www.ratehub.ca/best-mortgage-rates/5-year/fixed'

# Send a GET request to fetch the HTML content of the webpage
page = requests.get(url)

# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(page.text, 'html')

# Find the table elements that contain the data we want, specifically looking for elements with class 'table-container'
table = soup.find_all('table', class_='table-container')

# Extract and clean the text from each table element
table = [i.text.strip() for i in table]

# Locate the table header and extract text from each column name (header cell)
header = soup.find('thead')
header_text = [th.get_text(strip=True) for th in header.find_all('th') if th.get_text(strip=True)]

# Create an empty DataFrame with the extracted column headers
df = pd.DataFrame(columns=header_text)

# Find all table rows within the <tbody> elements (contains actual rate data)
table_rows = soup.find_all('tbody')

# Initialize an empty list to store extracted row data
data = []

# Loop through each table row and extract text from the first three columns: Rate, Provider, and Payment
for row in table_rows:
    for tr in row.find_all('tr'):
        columns = tr.find_all('td')
        # Ensure there are enough columns before attempting to extract data
        if len(columns) >= 3:
            rate = columns[0].get_text(strip=True)
            provider = columns[1].get_text(strip=True)
            payment = columns[2].get_text(strip=True)
            # Append the extracted values as a new row in the data list
            data.append([rate, provider, payment])

# Create a DataFrame from the extracted data using the previously defined headers
df5f = pd.DataFrame(data, columns=header_text)
df5f.drop(columns=['Payment'], inplace=True)
df5f.rename(columns={'Rate': 'Fixed rate 5y'}, inplace=True)

result_df=result_df.merge(df5f, on='Provider', how='outer')

In [8]:
result_df[:20]

Unnamed: 0,Provider,Variable rate 3y,Variable rate 5y,Fixed rate 1y,Fixed rate 2y,Fixed rate 3y,Fixed rate 4y,Fixed rate 5y
0,Canadian Lender,5.05%,4.70%,7.15%,5.49%,4.59%,4.89%,3.99%
1,CanwiseA Ratehub Company,,4.95%,,5.54%,4.44%,,4.29%
2,Big 6 Bank,,5.00%,5.89%,5.34%,4.39%,4.49%,4.44%
3,First National,,5.00%,6.63%,5.92%,4.94%,4.89%,4.54%
4,CMLS Financial,,5.05%,,,,,4.44%
5,CIBC,5.45%,5.15%,6.29%,5.59%,4.79%,4.64%,4.34%
6,Desjardins,,5.20%,6.64%,5.64%,4.54%,4.34%,4.29%
7,MCAP,,5.25%,7.44%,7.14%,5.09%,5.04%,4.74%
8,Scotiabank,6.30%,5.25%,6.24%,5.59%,4.64%,4.74%,4.69%
9,Alterna Savings,5.30%,5.30%,6.44%,6.34%,4.84%,5.54%,4.69%
