In [33]:
# run_benchmark.py

import time
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


##############################
# 1. Count the number of parameters
##############################
def count_params_torch(model):
    return sum(p.numel() for p in model.parameters())


##############################
# 2. Evaluation of PyTorch Models (TinyCNN/ViT / 3-Layer CNN)
##############################
def evaluate_torch_model(model, loader, num_classes=3, calc_auc=True):
    model.eval()
    all_labels = []
    all_preds = []
    all_probs = []

    start = time.perf_counter()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            y = y.to(device)

            logits = model(x)                # (B, C)
            probs = F.softmax(logits, dim=1)

            pred = logits.argmax(dim=1)

            all_labels.append(y.cpu().numpy())
            all_preds.append(pred.cpu().numpy())
            all_probs.append(probs.cpu().numpy())

    end = time.perf_counter()
    infer_time = (end - start) / sum(len(b) for b in all_labels) * 1000.0  # ms / image

    all_labels = np.concatenate(all_labels)
    all_preds  = np.concatenate(all_preds)
    all_probs  = np.concatenate(all_probs)

    acc = accuracy_score(all_labels, all_preds)
    f1  = f1_score(all_labels, all_preds, average="macro")

    if calc_auc:
        try:
            auc = roc_auc_score(all_labels, all_probs, multi_class="ovr")
        except Exception:
            auc = np.nan
    else:
        auc = np.nan

    return {
        "acc": acc,
        "f1": f1,
        "auc": auc,
        "infer_ms": infer_time,
    }


##############################
# 3. Evaluation of sklearn Models (SVM/RF)
##############################
def evaluate_sklearn_model(model, X_test, y_test, calc_auc=True):
    start = time.perf_counter()
    y_pred = model.predict(X_test)
    end = time.perf_counter()
    infer_time = (end - start) / len(y_test) * 1000.0

    acc = accuracy_score(y_test, y_pred)
    f1  = f1_score(y_test, y_pred, average="macro")

    if calc_auc:
        try:
            # RF has predict_probaï¼ŒSVM can use decision_function
            if hasattr(model, "predict_proba"):
                prob = model.predict_proba(X_test)
            elif hasattr(model, "decision_function"):
                df = model.decision_function(X_test)
                if df.ndim == 1:
                    df = np.vstack([-df, df]).T
                exp_df = np.exp(df - df.max(axis=1, keepdims=True))
                prob = exp_df / exp_df.sum(axis=1, keepdims=True)
            else:
                prob = None

            if prob is not None:
                auc = roc_auc_score(y_test, prob, multi_class="ovr")
            else:
                auc = np.nan
        except Exception:
            auc = np.nan
    else:
        auc = np.nan

    return {
        "acc": acc,
        "f1": f1,
        "auc": auc,
        "infer_ms": infer_time,
    }


Using device: cpu


In [34]:
########################################
# 4. Define the existing model structure
########################################

import torch.nn as nn
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os

# ====== TinySpineNet ======
class TinySpineNet(nn.Module):
    def __init__(self, num_classes=3):
        super(TinySpineNet, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.conv4 = nn.Sequential(
            nn.Conv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Sequential(
            nn.Linear(256, 192),
            nn.ReLU(),
            nn.Dropout(0.35),
            nn.Linear(192, 96),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(96, num_classes)
        )


    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.pool(x)
        x = x.flatten(1)
        x = self.fc(x)
        return x


# ====== ViT ======
from torchvision.models import vit_b_16, ViT_B_16_Weights

class ViTSpineClassifier(nn.Module):
    def __init__(self, num_classes=3):
        super(ViTSpineClassifier, self).__init__()
        weights = ViT_B_16_Weights.IMAGENET1K_V1
        self.vit = vit_b_16(weights=weights)
        for p in self.vit.parameters():
            p.requires_grad = False
        in_features = self.vit.heads.head.in_features
        self.vit.heads.head = nn.Linear(in_features, num_classes)

    def forward(self, x):
        return self.vit(x)


# ====== 3-layer CNN ======
class CNN3(nn.Module):
    def __init__(self, num_classes=3, dropout=0.5):
        super(CNN3, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.adapt_pool = nn.AdaptiveAvgPool2d((16, 16))
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 16 * 16, 256),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(256, num_classes),
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.adapt_pool(x)
        x = self.fc_layers(x)
        return x
# ====== ResNet18 ======
import torch.nn.functional as F

class CommonBlock(nn.Module):
    def __init__(self, in_channel, out_channel, stride=1):
        super(CommonBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channel)
        self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channel)

    def forward(self, x):
        identity = x
        x = F.relu(self.bn1(self.conv1(x)), inplace=True)
        x = self.bn2(self.conv2(x))
        x += identity
        return F.relu(x, inplace=True)


class SpecialBlock(nn.Module):
    def __init__(self, in_channel, out_channel, stride):
        super(SpecialBlock, self).__init__()
        self.change_channel = nn.Sequential(
            nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride, bias=False),
            nn.BatchNorm2d(out_channel)
        )
        self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channel)
        self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channel)

    def forward(self, x):
        identity = self.change_channel(x)
        out = F.relu(self.bn1(self.conv1(x)), inplace=True)
        out = self.bn2(self.conv2(out))
        out += identity
        return F.relu(out, inplace=True)


class ResNet18(nn.Module):
    def __init__(self, classes_num=3):
        super(ResNet18, self).__init__()
        self.prepare = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=5, stride=1, padding=2, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
        )
        self.layer1 = nn.Sequential(
            CommonBlock(64, 64),
            CommonBlock(64, 64),
        )
        self.layer2 = nn.Sequential(
            SpecialBlock(64, 128, stride=2),
            CommonBlock(128, 128),
        )
        self.layer3 = nn.Sequential(
            SpecialBlock(128, 256, stride=2),
            CommonBlock(256, 256),
        )
        self.layer4 = nn.Sequential(
            SpecialBlock(256, 512, stride=2),
            CommonBlock(512, 512),
        )
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(256, classes_num),
        )

    def forward(self, x):
        x = self.prepare(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.pool(x)
        x = x.flatten(1)
        x = self.fc(x)
        return x


In [35]:
########################################
# 5. Build a unified test_df
########################################
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os
from sklearn.model_selection import train_test_split

img_dir = "Dataset Binary"
label_file = "Dataset_Labels.xlsx"
test_df = pd.read_csv("spine_test_split.csv")

df = pd.read_excel(label_file)
df.columns = ["Spine_Name", "Spine_Label"]

train_df, test_df = train_test_split(
    df,
    test_size=0.2,
    shuffle=True,
    random_state=42,
    stratify=df["Spine_Label"],
)


label_map = {"Mushroom": 0, "Stubby": 1, "Thin": 2}


class SpineDatasetTorch(Dataset):
    def __init__(self, dataframe, root_dir, transform):
        self.data = dataframe.reset_index(drop=True)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        img_path = os.path.join(self.root_dir, row["Spine_Name"])
        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        label = label_map[row["Spine_Label"]]
        return img, label


# TinyCNN use 250*250 size
tiny_transform = transforms.Compose([
    transforms.Resize((250, 250)),
    transforms.ToTensor(),
])

# ViT / CNN3 use 224*224 size
vit_cnn3_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

test_dataset_250 = SpineDatasetTorch(test_df, img_dir, tiny_transform)
test_dataset_224 = SpineDatasetTorch(test_df, img_dir, vit_cnn3_transform)

test_loader_250 = DataLoader(test_dataset_250, batch_size=32, shuffle=False)
test_loader_224 = DataLoader(test_dataset_224, batch_size=32, shuffle=False)



In [36]:
########################################
# 6. Build X_test, y_test for SVM/RF
########################################
from skimage import io

X_test = []
y_test = []

for _, row in test_df.iterrows():
    img_path = os.path.join(img_dir, row["Spine_Name"])
    img = io.imread(img_path, as_gray=True)
# Suppose your data is originally 250x250. If not, you need to resize it
    flat = img.flatten()
    X_test.append(flat)
    y_test.append(label_map[row["Spine_Label"]])

X_test = np.array(X_test)
y_test = np.array(y_test)


In [40]:
########################################
# 7. Load the model and evaluate it
########################################
import joblib

results = []

# ----- TinyCNN -----
tiny = TinySpineNet(num_classes=3).to(device)
checkpoint = torch.load("./Spine_tinyCNN.pth", map_location=device)
tiny.load_state_dict(checkpoint["model_state_dict"])
tiny.eval()

#tiny.load_state_dict(torch.load("./Spine_tinyCNN.pth", map_location=device))
tiny_metrics = evaluate_torch_model(tiny, test_loader_250)
tiny_params = count_params_torch(tiny)
results.append({
    "model": "TinyCNN",
    "params": tiny_params,
    **tiny_metrics
})

# Load ResNet18
##############################
resnet18 = ResNet18(classes_num=3).to(device)

ckpt = torch.load("./Spine_ResNet18.pth", map_location=device)
resnet18.load_state_dict(ckpt["model_state_dict"])
resnet18.eval()

resnet_metrics = evaluate_torch_model(resnet18, test_loader_250)
resnet_params = count_params_torch(resnet18)

results.append({
    "model": "ResNet18",
    "params": resnet_params,
    **resnet_metrics
})



# calculate parameters
res_params = sum(p.numel() for p in resnet18.parameters())
print("ResNet18 Params =", res_params)

# ----- ViT -----
vit = ViTSpineClassifier(num_classes=3).to(device)
vit.load_state_dict(torch.load("./Spine_ViT.pth", map_location=device))
vit_metrics = evaluate_torch_model(vit, test_loader_224)
vit_params = count_params_torch(vit)
results.append({
    "model": "ViT-B16 (frozen+head)",
    "params": vit_params,
    **vit_metrics
})

# ----- 3-layer CNN -----
cnn3 = CNN3(num_classes=3).to(device)
cnn3.load_state_dict(torch.load("./spine_cnn_pytorch.pth", map_location=device))
cnn3_metrics = evaluate_torch_model(cnn3, test_loader_224)
cnn3_params = count_params_torch(cnn3)
results.append({
    "model": "CNN-3Layer",
    "params": cnn3_params,
    **cnn3_metrics
})

# ----- SVM -----
svm_model = joblib.load("./spine_svm_model.joblib")
svm_metrics = evaluate_sklearn_model(svm_model, X_test, y_test)
results.append({
    "model": "SVM",
    "params": np.nan,
    **svm_metrics
})

# ----- Random Forest -----
rf_model = joblib.load("./random_forest_model.joblib")
rf_metrics = evaluate_sklearn_model(rf_model, X_test, y_test)
results.append({
    "model": "Random Forest",
    "params": np.nan,
    **rf_metrics
})

# collect to DataFrame
df_res = pd.DataFrame(results)
print("\n===== Benchmark Summary =====")
print(df_res)

df_res.to_csv("benchmark_results.csv", index=False)
print("Results saved to benchmark_results.csv")


  checkpoint = torch.load("./Spine_tinyCNN.pth", map_location=device)
  ckpt = torch.load("./Spine_ResNet18.pth", map_location=device)


ResNet18 Params = 11304003


  vit.load_state_dict(torch.load("./Spine_ViT.pth", map_location=device))
  cnn3.load_state_dict(torch.load("./spine_cnn_pytorch.pth", map_location=device))



===== Benchmark Summary =====
                   model      params       acc        f1       auc    infer_ms
0                TinyCNN    457539.0  0.945652  0.927125  0.986931   27.130960
1               ResNet18  11304003.0  0.934783  0.902900  0.984082  433.602751
2  ViT-B16 (frozen+head)  85800963.0  0.956522  0.946034  0.991255   95.583809
3             CNN-3Layer   8482883.0  0.902174  0.853082  0.967878   15.840471
4                    SVM         NaN  0.826087  0.748367  0.866585    6.697620
5          Random Forest         NaN  0.913043  0.882868  0.965506    0.136197
Results saved to benchmark_results.csv
