In [10]:
import torch
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import json
import logging

# Setup logging
logging.basicConfig(level=logging.INFO)

# Constants (replace with your real model paths)
MODEL_PATHS = {
    "DeBERTA": "/Users/amalkurian/Desktop/Dissertation/Bias Detection/models/Deberta/final_model.pth",
    "DistilBERT": "/Users/amalkurian/Desktop/Dissertation/Bias Detection/models/DistilBERT/final_model_new(2).pth",
    "RoBERTa": "/Users/amalkurian/Desktop/Dissertation/Bias Detection/models/logistic_regression_model.pkl",
}
ANALYTICS_FILE = "analytics.json"

# Dummy dataset (replace with your test_loader)
X_test = torch.randn(100, 768)  # 100 samples, embedding size 768
y_test = torch.randint(0, 2, (100,))  # random binary labels

# -------------------------------
# Function to evaluate a model
# -------------------------------
def evaluate_model(model, X, y_true):
    """Evaluate model and return performance metrics."""
    model.eval()
    with torch.no_grad():
        y_pred = model(X).argmax(dim=1).cpu().numpy()

    y_true = y_true.cpu().numpy()
    results = {
        "accuracy": accuracy_score(y_true, y_pred),
        "precision": precision_score(y_true, y_pred),
        "recall": recall_score(y_true, y_pred),
        "f1": f1_score(y_true, y_pred),
        "confusion_matrix": confusion_matrix(y_true, y_pred).tolist(),
    }
    return results

# -------------------------------
# Mock Model Loader (replace with actual)
# -------------------------------
class DummyModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc = torch.nn.Linear(768, 2)  # binary classifier

    def forward(self, x):
        return self.fc(x)

def load_model(path):
    # Replace with: torch.load(path) and model.load_state_dict(...)
    model = DummyModel()
    return model

# -------------------------------
# Main comparison
# -------------------------------
all_results = {}
for model_name, model_path in MODEL_PATHS.items():
    logging.info(f"Evaluating {model_name}...")
    model = load_model(model_path)
    results = evaluate_model(model, X_test, y_test)
    all_results[model_name] = results

# Save to JSON
with open(ANALYTICS_FILE, "w") as f:
    json.dump(all_results, f, indent=4)

logging.info("Evaluation complete. Results saved to analytics.json.")


INFO:root:Evaluating DeBERTA...
INFO:root:Evaluating DistilBERT...


INFO:root:Evaluating RoBERTa...
INFO:root:Evaluation complete. Results saved to analytics.json.
