In [None]:
#Additional validation

In [1]:
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from torchvision.models import mobilenet_v3_large, MobileNet_V3_Large_Weights
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import numpy as np

# -----------------------------
# 1. Config
# -----------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#new_data_root = "/90daydata/nematode_ml/BLD/nematode_project/additional test/Aphids/"  # <-- change this
new_data_root = "/90daydata/nematode_ml/BLD/nematode_project/additional test/images/"  # <-- change this
batch_size = 16
img_size = 224

#weights_path = "/90daydata/nematode_ml/BLD/nematode_project/outputs/20 epoch wait/mobilenet_final_model.pth"
weights_path = "/90daydata/nematode_ml/BLD/nematode_project/outputs/5 epoch wait/mobilenet_aphids_final_model.pth"
#weights_path = "/90daydata/nematode_ml/BLD/nematode_project/outputs/MobileNet/mobile_net_final_model.pth"


In [2]:
# -----------------------------
# 2. Transforms (same as val set)
# -----------------------------
MEAN = (0.485, 0.456, 0.406)
STD = (0.229, 0.224, 0.225)
eval_tf = transforms.Compose([
    transforms.Resize((img_size, img_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=MEAN, std=STD),
])

# -----------------------------
# 3. Dataset & Loader
# -----------------------------
dataset = torchvision.datasets.ImageFolder(new_data_root, transform=eval_tf)
loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=2)

In [3]:
# -----------------------------
# 4. Model definition & load weights
# -----------------------------
# Load MobileNetV3 Large with default pretrained weights
model = mobilenet_v3_large(weights=MobileNet_V3_Large_Weights.DEFAULT)

# Replace the final classification layer for binary classification
num_ftrs = model.classifier[3].in_features  # index 3 is the final Linear layer in MobileNetV3 Large
model.classifier[3] = nn.Linear(num_ftrs, 1)  # Binary classification: output logits for 1 class

# Load your own trained weights (if any)
model.load_state_dict(torch.load(weights_path, map_location=device))

# Send to device and set to evaluation mode
model.to(device)
model.eval()


  model.load_state_dict(torch.load(weights_path, map_location=device))


MobileNetV3(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): Conv2dNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        )
      )
    )
    (2): InvertedResidual(
      (block): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bi

In [4]:
# -----------------------------
# 5. Evaluation loop (auto label alignment)
# -----------------------------
#Prepares lists to collect labels, predictions, and probabilities
all_labels = []
all_preds = []
all_probs = []

# Detect positive label value from dataset if possible
class_to_idx = getattr(getattr(loader, 'dataset', None), 'class_to_idx', None)
if class_to_idx is not None and "BLD" in class_to_idx:
    pos_label_val = class_to_idx["BLD"]
else:
    pos_label_val = 1  # fallback

with torch.no_grad():
    for inputs, labels in loader:
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        probs = torch.sigmoid(outputs).squeeze() # convert logits to probabilities

        preds = (probs > 0.5).long() #Thresholds at 0.5 to get binary predictions

        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(preds.cpu().numpy())
        all_probs.extend(probs.cpu().numpy())

# -----------------------------
# 6. Metrics
# -----------------------------
#Converts lists to NumPy arrays.
y_true = np.array(all_labels)
y_pred = np.array(all_preds)
y_score = np.array(all_probs)

# Flip labels if dataset encodes BLD as 0
if pos_label_val != 1:
    y_true = 1 - y_true

acc  = accuracy_score(y_true, y_pred)
prec = precision_score(y_true, y_pred, zero_division=0)
rec  = recall_score(y_true, y_pred, zero_division=0)
f1   = f1_score(y_true, y_pred, zero_division=0)
try:
    auroc = roc_auc_score(y_true, y_score)
except ValueError:
    auroc = float("nan")

print(f"Unseen dataset results:")
print(f"Accuracy:  {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall:    {rec:.4f}")
print(f"F1 score:  {f1:.4f}")
print(f"AUROC:     {auroc:.4f}")


Unseen dataset results:
Accuracy:  0.8793
Precision: 0.8444
Recall:    0.8444
F1 score:  0.8444
AUROC:     0.9393
