In [1]:
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from webdriver_manager.firefox import GeckoDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd

In [2]:
options = webdriver.FirefoxOptions()
options.add_argument("--headless")  # Run in headless mode (no GUI)
driver = webdriver.Firefox(service=Service(GeckoDriverManager().install()), options=options)

In [3]:
dfs_list = []
month_names = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]
for i in range(9):
    for month in range(12):
        year = 2016+i
        url2 = "https://www150.statcan.gc.ca/t1/tbl1/en/tv.action?pid=1410028802&pickMembers%5B0%5D=1.1&pickMembers%5B1%5D=3.1&cubeTimeFrame.startMonth=" + "{:02d}".format(month+1) + "&cubeTimeFrame.startYear=" + str(year) + "&referencePeriods=" + str(year) + "{:02d}".format(month+1) + "01%2C" + str(year) + "{:02d}".format(month+1) + "01"        
        driver.get(url2)
        wait = WebDriverWait(driver, 10)
        table = wait.until(EC.presence_of_element_located((By.TAG_NAME, "table")))
        table_html = table.get_attribute('outerHTML')
        df_temp = pd.read_html(table_html)[0]
        df_temp = df_temp.iloc[[0,1,2,3,4,5],[0,3]]
        new_header = df_temp.iloc[0]
        df_temp = df_temp[1:]
        df_temp.columns = ['Class of Employee', month_names[month] + " " + str(year)]
        dfs_list.append(df_temp)

In [4]:
dfs_list

[                        Class of Employee January 2016
 1  Total employed, all classes of workers      17871.6
 2                               Employees      15204.3
 3                Public sector employees8       3516.5
 4               Private sector employees9      11687.8
 5                         Self-employed10       2667.3,
                         Class of Employee February 2016
 1  Total employed, all classes of workers       17889.0
 2                               Employees       15213.1
 3                Public sector employees8        3513.0
 4               Private sector employees9       11700.1
 5                         Self-employed10        2675.9,
                         Class of Employee March 2016
 1  Total employed, all classes of workers    17892.8
 2                               Employees    15220.2
 3                Public sector employees8     3499.1
 4               Private sector employees9    11721.2
 5                         Self-employed10     267

In [6]:
dfs = [df.set_index('Class of Employee') for df in dfs_list]

In [7]:
df_new = pd.concat(dfs, axis=1)

In [8]:
df_new

Unnamed: 0_level_0,January 2016,February 2016,March 2016,April 2016,May 2016,June 2016,July 2016,August 2016,September 2016,October 2016,...,March 2024,April 2024,May 2024,June 2024,July 2024,August 2024,September 2024,October 2024,November 2024,December 2024
Class of Employee,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Total employed, all classes of workers",17871.6,17889.0,17892.8,17919.7,17947.4,17954.5,17943.2,17994.2,18032.5,18051.2,...,20614.5,20700.5,20698.3,20715.9,20712.9,20742.6,20779.3,20782.6,20826.4,20917.4
Employees,15204.3,15213.1,15220.2,15256.9,15273.4,15258.7,15256.9,15332.9,15349.5,15377.5,...,17957.0,18024.7,18020.1,18042.4,18048.1,18084.8,18095.5,18081.6,18130.4,18207.9
Public sector employees8,3516.5,3513.0,3499.1,3504.4,3525.7,3497.2,3489.0,3515.1,3506.4,3526.6,...,4434.9,4445.1,4441.2,4441.9,4490.6,4489.4,4465.2,4443.4,4492.3,4530.3
Private sector employees9,11687.8,11700.1,11721.2,11752.6,11747.7,11761.5,11767.9,11817.8,11843.1,11850.9,...,13522.1,13579.6,13578.9,13600.5,13557.5,13595.5,13630.2,13638.2,13638.1,13677.5
Self-employed10,2667.3,2675.9,2672.6,2662.8,2674.0,2695.8,2686.3,2661.3,2683.0,2673.8,...,2657.5,2675.8,2678.2,2673.5,2664.8,2657.7,2683.8,2701.0,2696.0,2709.5


In [13]:
df_new.to_csv('canada_employees.csv')