# Municipal Coupling Validation

This notebook trains the phase 1 national–municipal coupling model and validates its predictions on the 2021 municipal election (holdout set).

In [None]:
from pathlib import Path

import arviz as az
import pandas as pd
import matplotlib.pyplot as plt

from src.models.municipal_coupling_model import train_coupling_model, MunicipalCouplingModel
from src.data.municipal_coupling import build_municipal_coupling_dataset


In [None]:
TRACE_PATH = Path('outputs/latest/trace.zarr')
OUTPUT_DIR = Path('outputs/municipal_coupling_validation')
ELECTION_YEARS = [2009, 2013, 2017, 2021]
TRAIN_YEARS = [2009, 2013, 2017]
DRAW_COUNT = 500
TUNE_STEPS = 500
TARGET_ACCEPT = 0.9
SEED = 42
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
print(OUTPUT_DIR)


## Train the coupling model

> **Note**: This step can take several minutes depending on hardware. Adjust `DRAW_COUNT`/`TUNE_STEPS` above to trade accuracy for runtime.

In [None]:
%%time
model, idata, evaluation = train_coupling_model(
    trace_path=str(TRACE_PATH),
    election_years=ELECTION_YEARS,
    train_years=TRAIN_YEARS,
    output_dir=OUTPUT_DIR,
    draws=DRAW_COUNT,
    tune=TUNE_STEPS,
    target_accept=TARGET_ACCEPT,
    random_seed=SEED,
)
evaluation

## Validate forecasts for 2021

We compute winner accuracy and mean absolute error in vote shares using the held-out 2021 results.

In [None]:
print(f'Winner accuracy: {evaluation.winner_accuracy:.3%}')
print(f'Mean vote-share MAE: {evaluation.mean_vote_share_mae:.3%}')
evaluation.predicted_vote_shares.head()

## Coupling parameter analysis

Municipalities with high coupling lean heavily on the national signal, while low values indicate idiosyncratic local behaviour.

In [None]:
coupling_summary = evaluation.coupling_summary
top_coupling = coupling_summary.nlargest(10, 'coupling_mean')
bottom_coupling = coupling_summary.nsmallest(10, 'coupling_mean')
display(top_coupling[['municipality_name', 'district_name', 'coupling_mean', 'hdi_lower', 'hdi_upper']])
display(bottom_coupling[['municipality_name', 'district_name', 'coupling_mean', 'hdi_lower', 'hdi_upper']])


### Coupling distribution

In [None]:
plt.figure(figsize=(10, 4))
plt.hist(coupling_summary['coupling_mean'], bins=30, color='steelblue', edgecolor='white')
plt.title('Posterior mean coupling per municipality')
plt.xlabel('Coupling (weight on national signal)')
plt.ylabel('Municipalities')
plt.show()

## Save artefacts

Store the inference data and key tables for downstream analysis.

In [None]:
az.to_netcdf(idata, OUTPUT_DIR / 'posterior.nc')
coupling_summary.to_csv(OUTPUT_DIR / 'coupling_summary.csv', index=False)
evaluation.predicted_vote_shares.to_csv(OUTPUT_DIR / 'predictions_2021.csv', index=False)
evaluation.observed_vote_shares.to_csv(OUTPUT_DIR / 'observed_2021.csv', index=False)
list(OUTPUT_DIR.iterdir())