In [2]:
import pandas as pd
import numpy as np
import os
import altair as alt
import eco_style
alt.themes.enable("light")

ThemeRegistry.enable('light')

In [10]:
dfs = pd.read_html("https://en.wikipedia.org/wiki/Opinion_polling_for_the_2025_Canadian_federal_election#Pre-campaign_period")

In [67]:
campaign_df = dfs[1].copy()
pre_df = dfs[2].copy()

# remove the multi-index
campaign_df.columns = campaign_df.columns.droplevel(1)
pre_df.columns = pre_df.columns.droplevel(1)

#campaign_df.cols
campaign_df.columns = ['firm', 'date', 'link'] + list(campaign_df.columns[3:])
pre_df.columns = ['firm', 'date', 'link'] + list(pre_df.columns[3:])
campaign_df = campaign_df[['firm', 'date', 'link', 'CPC', 'LPC', 'NDP', 'BQ', 'PPC', 'GPC']]
pre_df = pre_df[['firm', 'date', 'link', 'CPC', 'LPC', 'NDP', 'BQ', 'PPC', 'GPC']]

df = pd.concat([campaign_df, pre_df], ignore_index=True)
df['date'] = pd.to_datetime(df['date'], format='mixed', errors='coerce')
for col in df.columns[3:]:
    df[col] = df[col].str.replace('%', '')
    df[col] = df[col].replace("—", 0)
    df[col] = pd.to_numeric(df[col], errors='coerce')

df = df.dropna(subset=["CPC", "date"])
df = df.query("date >= '2024-04-24'")

df = df.melt(id_vars=['firm', 'date', 'link'], var_name='party', value_name='value').drop(columns=['link'])
df['value'] = df['value'].astype(float)/100

# Define party colours
party_colors = {
    'CPC': '#1A4782',
    'LPC': '#EA6D6A',
    'NDP': '#F37021',
    'BQ': '#00A7E1',
    'PPC': '#9B4F96',
    'GPC': '#3D9B35'
}


# Create the base chart
base = alt.Chart(df).mark_circle(
    opacity=0.5,
    size=25).encode(
    x=alt.X('date:T', title='', axis=alt.Axis(format='%b %Y')),
    y=alt.Y('value:Q', title='', axis=alt.Axis(format='%')),
    color=alt.Color('party:N', scale=alt.Scale(domain=list(party_colors.keys()), range=list(party_colors.values())), title='Party'),
    tooltip=[alt.Tooltip('firm:N', title='Firm'), alt.Tooltip('party:N', title='Party'), alt.Tooltip('date:T', title='Date'), alt.Tooltip('value:Q', title='Support (%)')]
)

# Create the LOESS smoothed line
loess = alt.Chart(df).transform_loess(
    'date', 'value', groupby=['party'], bandwidth=0.05
).mark_line(size=3).encode(
    x=alt.X('date:T', title=''),
    y=alt.Y('value:Q', title=''),
    color=alt.Color('party:N', 
                    legend=alt.Legend(
                        title=None,
                        orient='top',
                    ),
                    scale=alt.Scale(domain=list(party_colors.keys()), range=list(party_colors.values())), title='')
)

# Layer the points and the smooth line
chart = (base + loess).properties(
    width="container",
    background="rgb(247,247,247)",
    height=400,
)

chart.save('can_polls_2025.json')


chart.properties(
    width=700,
    height=400,
).save('can_polls_2025.png', scale_factor=3)

chart