## Import Needed Filepaths and Libraries

In [1]:
import json
import joblib
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import statsmodels.api as sm

from sklearn.metrics import (
    confusion_matrix, accuracy_score, balanced_accuracy_score,
    f1_score, mean_squared_error, r2_score, classification_report
)

from texas_gerrymandering_hb4.config import IMAGES_DIR

ART_DIR = Path("artifacts")
ART_DIR.mkdir(parents=True, exist_ok=True)

COMPACTNESS_PCA_META = {}
compactness_meta_path = ART_DIR / "compactness_pca.json"
if compactness_meta_path.exists():
    with compactness_meta_path.open() as f:
        COMPACTNESS_PCA_META = json.load(f)
    print("Loaded compactness PCA metadata for evaluation.")
else:
    print("Warning: compactness_pca.json not found.")


[32m2025-10-10 20:02:47.563[0m | [1mINFO    [0m | [36mtexas_gerrymandering_hb4.config[0m:[36m<module>[0m:[36m11[0m - [1mPROJ_ROOT path is: /home/aimlexpert/Documents/GitHub/texas-gerrymandering-HB4[0m


Loaded compactness PCA metadata for evaluation.


## Load in Test Split, Model, and Threshold

In [2]:
X_test = pd.read_parquet(ART_DIR / "X_test.parquet")
y_test = pd.read_parquet(ART_DIR / "y_test.parquet")["party"]

pipeline = joblib.load(ART_DIR / "active_model.pkl")
with open(ART_DIR / "train_threshold.json") as f:
    thr_info = json.load(f)
threshold = float(thr_info["threshold"])
variant = thr_info.get("variant", "unknown")
use_poly = thr_info.get("use_polynomial_features")
poly_degree = thr_info.get("polynomial_degree")
print(f"Evaluating model variant: {variant} with threshold={threshold:.3f}")
if use_poly is not None:
    print(f"Polynomial features enabled: {use_poly} (degree={poly_degree})")
if COMPACTNESS_PCA_META:
    print("Compactness PCA components:", COMPACTNESS_PCA_META.get("pca_components"))


Evaluating model variant: lasso with threshold=0.330
Polynomial features enabled: False (degree=1)
Compactness PCA components: [0.5326604170627146, 0.5420231196260704, 0.48495683113594357, 0.43278249712757744]


## Prediction and Classification

In [3]:
y_score = pipeline.predict(X_test).clip(0, 1)
y_pred  = (y_score >= threshold).astype(int)

## Print Metrics and Save Report

In [4]:
acc = accuracy_score(y_test, y_pred)
bal = balanced_accuracy_score(y_test, y_pred)
f1  = f1_score(y_test, y_pred, zero_division=0)
mse = mean_squared_error(y_test, y_score)
r2  = r2_score(y_test, y_score)
cm  = confusion_matrix(y_test, y_pred)

print(f"Accuracy          : {acc:.3f}")
print(f"Balanced Accuracy : {bal:.3f}")
print(f"F1 (Dem=1)        : {f1:.3f}")
print(f"MSE               : {mse:.4f}")
print(f"RÂ²               : {r2:.4f}")
print("Confusion Matrix:
", cm)

with open(ART_DIR / "metrics.json", "w") as f:
    json.dump({
        "variant": variant,
        "threshold": threshold,
        "accuracy": acc,
        "balanced_accuracy": bal,
        "f1": f1,
        "mse": mse,
        "r2": r2,
        "use_polynomial_features": use_poly,
        "polynomial_degree": poly_degree,
        "compactness_pca": COMPACTNESS_PCA_META,
    }, f, indent=2)

pd.DataFrame(classification_report(
    y_test, y_pred, target_names=["Republican(0)", "Democrat(1)"], output_dict=True, zero_division=0
)).to_csv(ART_DIR / "classification_report.csv")


SyntaxError: unterminated string literal (detected at line 14) (1702343232.py, line 14)

## Confusion Matrix

In [10]:
def save_confusion_matrix(cm, path, labels=("Rep(0)", "Dem(1)")):
    plt.figure()
    plt.imshow(cm, interpolation="nearest")
    plt.title("Confusion Matrix (Test)")
    plt.xticks([0, 1], labels)
    plt.yticks([0, 1], labels)
    for i in range(2):
        for j in range(2):
            plt.text(j, i, str(cm[i, j]), ha="center", va="center")
    plt.tight_layout()
    plt.savefig(path, dpi=200)
    plt.close()

save_confusion_matrix(cm, IMAGES_DIR / "confusion_matrix.png")

print("Evaluation complete. Saved metrics, report, and confusion matrix.")

Evaluation complete. Saved metrics, report, and confusion matrix.
