In [10]:
# evaluate_on_test.py
# This script loads the trained neural network model and evaluates it on a test dataset.
# It computes the confusion matrix (counts and normalized) and common performance metrics.

import numpy as np
import pandas as pd
import json
import os
from joblib import load
from sklearn.metrics import (
    confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, classification_report
)

# Paths
TEST_CSV = "2halfmoonsTest.csv"        # Path to the test dataset CSV file
MODEL    = "double_moon_model.joblib"  # Trained model saved from part 1
SCALER   = "scaler.joblib"             # StandardScaler saved from training
OUTDIR   = "test_artifacts"            # Folder to save evaluation outputs
os.makedirs(OUTDIR, exist_ok=True)

# 1) Load test data (supports both 'X,Y,ClassLabel' or 'X,Y,label' column names)
df = pd.read_csv(TEST_CSV)
cols = {c.lower(): c for c in df.columns}

if {"x", "y", "classlabel"}.issubset(cols):
    X = df[[cols["x"], cols["y"]]].to_numpy(float)
    y = df[cols["classlabel"]].astype(int).to_numpy()
elif {"x", "y", "label"}.issubset(cols):
    X = df[[cols["x"], cols["y"]]].to_numpy(float)
    y = df[cols["label"]].astype(int).to_numpy()
else:
    # Default: assume first 2 columns are features, last one is label
    X = df.iloc[:, :2].to_numpy(float)
    y = df.iloc[:, -1].astype(int).to_numpy()

# Normalize labels to {0,1} if they are {1,2}
y = y - y.min()

# 2) Load trained model and scaler
mlp = load(MODEL)
scaler = load(SCALER)

# 3) Make predictions and compute metrics
Xs = scaler.transform(X)
proba = mlp.predict_proba(Xs)[:, 1]
y_pred = (proba >= 0.5).astype(int)

# Confusion matrix
cm = confusion_matrix(y, y_pred)
cm_norm = confusion_matrix(y, y_pred, normalize="true")
tn, fp, fn, tp = cm.ravel()

# Metrics
acc = accuracy_score(y, y_pred)
prec = precision_score(y, y_pred, zero_division=0)
rec = recall_score(y, y_pred, zero_division=0)
f1 = f1_score(y, y_pred, zero_division=0)

# Print results
print("Confusion matrix (counts):\n", cm)
print("\nConfusion matrix (normalized):\n", np.round(cm_norm, 3))
print(f"\nTP={tp}, FP={fp}, FN={fn}, TN={tn}")
print(f"Accuracy={acc:.4f}, Precision={prec:.4f}, Recall={rec:.4f}, F1={f1:.4f}")
print("\nClassification report:\n", classification_report(y, y_pred, digits=3, zero_division=0))

# 4) Save results to files for report
np.savetxt(os.path.join(OUTDIR, "confusion_matrix_counts.csv"), cm, fmt="%d", delimiter=",")
np.savetxt(os.path.join(OUTDIR, "confusion_matrix_normalized.csv"), cm_norm, fmt="%.6f", delimiter=",")
with open(os.path.join(OUTDIR, "test_metrics.json"), "w") as f:
    json.dump({
        "TP": int(tp), "FP": int(fp), "FN": int(fn), "TN": int(tn),
        "accuracy": float(acc), "precision": float(prec),
        "recall": float(rec), "f1": float(f1)
    }, f, indent=2)

print("Saved test artifacts to:", OUTDIR)


Confusion matrix (counts):
 [[100   0]
 [  0 100]]

Confusion matrix (normalized):
 [[1. 0.]
 [0. 1.]]

TP=100, FP=0, FN=0, TN=100
Accuracy=1.0000, Precision=1.0000, Recall=1.0000, F1=1.0000

Classification report:
               precision    recall  f1-score   support

           0      1.000     1.000     1.000       100
           1      1.000     1.000     1.000       100

    accuracy                          1.000       200
   macro avg      1.000     1.000     1.000       200
weighted avg      1.000     1.000     1.000       200

Saved test artifacts to: test_artifacts
