# External Drivers vs. PC Prices

Exploratory analysis focusing on how external datasets (electricity, automotive demand, BPA capacity loss, etc.) relate to grouped PC prices. The goal is to quickly generate presentation-ready figures.


In [None]:
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

sns.set_theme(style="whitegrid")
DATA_DIR = Path("../..") / "data" / "processed"

In [None]:
dataset_path = DATA_DIR / "uni_3m_grouped.csv"
df = pd.read_csv(dataset_path, parse_dates=["date"])

price_cols = [
    col for col in df.columns if col.startswith("eu_pc") or col.startswith("asia_pc")
]
external_cols = [
    "avg_price_eur_mwhe",
    "new_passenger_car_registration",
    "acetone_capacity_loss_kt",
    "phenol_capacity_loss_kt",
    "bpa_capacity_loss_kt",
]
df

In [None]:
fig, ax1 = plt.subplots(figsize=(10, 5))
price_series = df.set_index("date")
series_l = price_series["eu_pc_crystal_best_price"]
series_r = price_series["avg_price_eur_mwhe"]

ax1.plot(series_l.index, series_l, color="#1f77b4", label="EU Crystal Price (€/kg)")
ax1.set_ylabel("EU Crystal Price (€/kg)", color="#1f77b4")
ax1.tick_params(axis="y", labelcolor="#1f77b4")
ax1.set_xlabel("Date")

ax2 = ax1.twinx()
ax2.plot(
    series_r.index, series_r, color="#ff7f0e", label="Avg. Electricity Price (€/MWh)"
)
ax2.set_ylabel("Avg. Electricity Price (€/MWh)", color="#ff7f0e")
ax2.tick_params(axis="y", labelcolor="#ff7f0e")

lines, labels = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines + lines2, labels + labels2, loc="upper right")
ax1.set_title("EU Crystal Price vs. European Electricity Price")
plt.tight_layout()
plt.show()

In [None]:
corr_df = df[price_cols + external_cols].corr(method="spearman")
focus_cols = [
    "eu_pc_crystal_best_price",
    "eu_pc_white_best_price",
    "avg_price_eur_mwhe",
    "new_passenger_car_registration",
    "bpa_capacity_loss_kt",
]
heatmap = corr_df.loc[focus_cols, focus_cols]

plt.figure(figsize=(8, 6))
sns.heatmap(heatmap, annot=True, fmt=".2f", cmap="BrBG", center=0)
plt.title("Spearman Correlation: PC Prices vs. External Drivers")
plt.tight_layout()
plt.show()

In [None]:
# Spearman heatmap with selected EU PCs on X-axis and external factors on Y-axis
selected_eu_pcs = [
    "eu_pc_crystal_best_price",
    "eu_pc_white_best_price",
    "eu_pc_gf10_best_price",
    "eu_pc_gf20_best_price",
    "eu_pc_recycled_gf10_white_best_price",
    "eu_pc_recycled_gf10_grey_best_price",
    "eu_pc_si_best_price",
]
pc_cols_for_heatmap = [col for col in price_cols if col in selected_eu_pcs]
external_cols_for_heatmap = external_cols

subset = df[pc_cols_for_heatmap + external_cols_for_heatmap]
spearman = subset.corr(method="spearman")
heatmap_matrix = spearman.loc[pc_cols_for_heatmap, external_cols_for_heatmap]

plt.figure(figsize=(12, 6))
sns.heatmap(
    heatmap_matrix,
    annot=True,
    fmt=".2f",
    cmap="PuOr",
    center=0,
    cbar_kws={"label": "Spearman ρ"},
)
plt.xlabel("External Drivers")
plt.ylabel("Selected EU PC Series")
plt.title("Spearman Correlation: Selected EU PCs vs. External Factors")
plt.tight_layout()
plt.show()

In [None]:
# Pearson heatmap with selected EU PCs on X-axis and external factors on Y-axis
subset = df[pc_cols_for_heatmap + external_cols_for_heatmap]
pearson = subset.corr(method="pearson")
pearson_matrix = pearson.loc[pc_cols_for_heatmap, external_cols_for_heatmap]

plt.figure(figsize=(12, 6))
sns.heatmap(
    pearson_matrix,
    annot=True,
    fmt=".2f",
    cmap="RdBu_r",
    center=0,
    cbar_kws={"label": "Pearson r"},
)
plt.xlabel("External Drivers")
plt.ylabel("Selected EU PC Series")
plt.title("Pearson Correlation: Selected EU PCs vs. External Factors")
plt.tight_layout()
plt.show()

In [None]:
# Direct Pearson vs Spearman comparison for BPA vs phenol capacity loss
subset_loss = df[["bpa_capacity_loss_kt", "phenol_capacity_loss_kt"]].dropna()
pearson_loss = subset_loss.corr(method="pearson").iloc[0, 1]
spearman_loss = subset_loss.corr(method="spearman").iloc[0, 1]
print(f"Pearson correlation (BPA vs Phenol capacity loss): {pearson_loss:.3f}")
print(f"Spearman correlation (BPA vs Phenol capacity loss): {spearman_loss:.3f}")

plt.figure(figsize=(6, 4))
sns.scatterplot(
    data=subset_loss,
    x="bpa_capacity_loss_kt",
    y="phenol_capacity_loss_kt",
    alpha=0.7,
)
plt.title("BPA vs Phenol Capacity Loss")
plt.xlabel("BPA capacity loss (kt)")
plt.ylabel("Phenol capacity loss (kt)")
plt.tight_layout()
plt.show()