<a href="https://colab.research.google.com/github/arnavdesai6143/Datathon_TM126/blob/main/inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""
CTG Fetal Distress Classification - Testing/Inference Script
Loads trained models and evaluates on test data
"""

import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
from sklearn.metrics import (
    balanced_accuracy_score, f1_score, classification_report,
    confusion_matrix, ConfusionMatrixDisplay, precision_recall_fscore_support
)

# Load test data (same split as training)
from sklearn.model_selection import train_test_split

cleaned_df = pd.read_csv('ctg_cleaned.csv')
X = cleaned_df.drop(columns=['NSP'], errors='ignore')
y = cleaned_df['NSP'].astype(int)
X = X.select_dtypes(include=[np.number])

RANDOM_STATE = 42
TEST_SIZE = 0.20

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=TEST_SIZE, stratify=y, random_state=RANDOM_STATE
)

# ============================================================================
# LOAD TRAINED MODELS AND EVALUATE
# ============================================================================
print("="*70)
print("FINAL TEST SET EVALUATION")
print("="*70)

test_results = []
class_names = ['Normal', 'Suspect', 'Pathologic']

# Load all trained models
model_names = ['Logistic_Regression', 'Random_Forest', 'Gradient_Boosting', 'MLP_Neural_Network']

for name in model_names:
    print(f"\n{'='*70}")
    print(f"Evaluating: {name}")
    print(f"{'='*70}")

    # Load model
    estimator = joblib.load(f'models/{name}_model.pkl')

    # Predictions
    y_pred = estimator.predict(X_test)

    # Core metrics
    bal_acc = balanced_accuracy_score(y_test, y_pred)
    macro_f1 = f1_score(y_test, y_pred, average='macro')
    weighted_f1 = f1_score(y_test, y_pred, average='weighted')

    print(f"\n📊  Test Set Performance:")
    print(f"  {'Balanced Accuracy:':<25} {bal_acc:.4f}")
    print(f"  {'Macro F1-Score:':<25} {macro_f1:.4f}")
    print(f"  {'Weighted F1-Score:':<25} {weighted_f1:.4f}")

    # Per-class metrics
    precision, recall, f1, support = precision_recall_fscore_support(
        y_test, y_pred, labels=[1, 2, 3], zero_division=0
    )

    print("\n📈  Per-Class Performance:")
    print(f"  {'Class':<15} {'Precision':<12} {'Recall':<12} {'F1-Score':<12} {'Support':<12}")
    print("  " + "-"*60)
    for i, cls_name in enumerate(class_names):
        print(f"  {cls_name:<15} {precision[i]:<12.4f} {recall[i]:<12.4f} {f1[i]:<12.4f} {support[i]:<12}")

    # Classification report
    print("\n" + classification_report(y_test, y_pred, target_names=class_names, digits=4))

    # Confusion matrix
    cm = confusion_matrix(y_test, y_pred, labels=[1, 2, 3])

    fig, ax = plt.subplots(figsize=(10, 8))
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
    disp.plot(cmap='Blues', ax=ax, values_format='d')
    ax.set_title(f'{name} - Confusion Matrix\n' +
                 f'Balanced Acc: {bal_acc:.4f} | Macro F1: {macro_f1:.4f}',
                 fontsize=14, fontweight='bold', pad=20)
    plt.tight_layout()
    plt.savefig(f'{name}_confusion_matrix.png', dpi=300, bbox_inches='tight')
    plt.show()

    # Store results
    test_results.append({
        'Model': name,
        'Test_Balanced_Accuracy': float(bal_acc),
        'Test_Macro_F1': float(macro_f1),
        'Class_3_Recall': float(recall[2])
    })

# Display test results summary
test_results_df = pd.DataFrame(test_results).sort_values('Test_Macro_F1', ascending=False)
print("\n" + "="*70)
print("TEST SET RESULTS SUMMARY")
print("="*70)
print(test_results_df.to_string(index=False))

test_results_df.to_csv('test_results.csv', index=False)
print("\n✓ Test results saved to: test_results.csv")