In [None]:
# HelmetGuard – Training ResNet18 on Synthetic Data Only

#This notebook trains a ResNet18 classifier on the synthetic dataset only, and evaluates it on real test images.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os

BASE_DIR = "/content/drive/MyDrive/helmetguard"
DATA_DIR = os.path.join(BASE_DIR, "data_synth")

print("DATA_DIR:", DATA_DIR)
print("Folders:", os.listdir(DATA_DIR))


Mounted at /content/drive
DATA_DIR: /content/drive/MyDrive/helmetguard/data_synth
Folders: ['safe', 'unsafe']


In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms, models

from tqdm.auto import tqdm


In [None]:

train_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

val_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

full_dataset = datasets.ImageFolder(root=DATA_DIR, transform=train_transform)

class_names = full_dataset.classes
print("Classes:", class_names)
print("Total images:", len(full_dataset))

val_ratio = 0.2
val_size = int(len(full_dataset) * val_ratio)
train_size = len(full_dataset) - val_size

train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

val_dataset.dataset.transform = val_transform

print(f"Train size: {len(train_dataset)}, Val size: {len(val_dataset)}")


Classes: ['safe', 'unsafe']
Total images: 614
Train size: 492, Val size: 122


In [None]:
batch_size = 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


In [None]:

try:
    weights = models.ResNet18_Weights.IMAGENET1K_V1
    model = models.resnet18(weights=weights)
except:
    model = models.resnet18(pretrained=True)

num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 2)

model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 192MB/s]


In [None]:
def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in tqdm(loader, leave=False):
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc


@torch.no_grad()
def eval_one_epoch(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    return epoch_loss, epoch_acc


In [None]:
num_epochs = 5

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")

    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion, device)
    val_loss, val_acc = eval_one_epoch(model, val_loader, criterion, device)

    print(f"Train  Loss: {train_loss:.4f} | Acc: {train_acc*100:.2f}%")
    print(f"Val    Loss: {val_loss:.4f} | Acc: {val_acc*100:.2f}%")



Epoch 1/5


  0%|          | 0/16 [00:00<?, ?it/s]

Train  Loss: 0.2977 | Acc: 89.63%
Val    Loss: 0.0991 | Acc: 97.54%

Epoch 2/5


  0%|          | 0/16 [00:00<?, ?it/s]

Train  Loss: 0.0316 | Acc: 99.39%
Val    Loss: 0.0965 | Acc: 96.72%

Epoch 3/5


  0%|          | 0/16 [00:00<?, ?it/s]

Train  Loss: 0.0054 | Acc: 100.00%
Val    Loss: 0.0618 | Acc: 97.54%

Epoch 4/5


  0%|          | 0/16 [00:00<?, ?it/s]

Train  Loss: 0.0036 | Acc: 100.00%
Val    Loss: 0.0490 | Acc: 97.54%

Epoch 5/5


  0%|          | 0/16 [00:00<?, ?it/s]

Train  Loss: 0.0035 | Acc: 100.00%
Val    Loss: 0.0688 | Acc: 97.54%


In [None]:
MODEL_DIR = os.path.join(BASE_DIR, "models")
os.makedirs(MODEL_DIR, exist_ok=True)

model_path = os.path.join(MODEL_DIR, "resnet18_synth_only.pt")
torch.save(model.state_dict(), model_path)

print("Saved model to:", model_path)


Saved model to: /content/drive/MyDrive/helmetguard/models/resnet18_synth_only.pt


In [None]:
from torchvision import datasets

REAL_TEST_DIR = os.path.join(BASE_DIR, "data_real", "test")
print("REAL_TEST_DIR:", REAL_TEST_DIR)
print("Folders:", os.listdir(REAL_TEST_DIR))


REAL_TEST_DIR: /content/drive/MyDrive/helmetguard/data_real/test
Folders: ['safe', 'unsafe']


In [None]:
test_dataset = datasets.ImageFolder(root=REAL_TEST_DIR, transform=val_transform)

print("Test classes:", test_dataset.classes)
print("Test size:", len(test_dataset))


Test classes: ['safe', 'unsafe']
Test size: 59


In [None]:
from torch.utils.data import DataLoader

test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [None]:
import torch

@torch.no_grad()
def evaluate_on_loader(model, loader, device):
    model.eval()
    correct = 0
    total = 0

    for inputs, labels in loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)

        correct += (preds == labels).sum().item()
        total += labels.size(0)

    acc = correct / total if total > 0 else 0
    return acc

test_acc = evaluate_on_loader(model, test_loader, device)
print(f"Real test accuracy: {test_acc*100:.2f}%")


Real test accuracy: 38.98%


In [None]:
from collections import Counter

@torch.no_grad()
def evaluate_per_class(model, loader, device, class_names):
    model.eval()
    correct_per_class = Counter()
    total_per_class = Counter()

    for inputs, labels in loader:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)

        for y, y_pred in zip(labels.cpu().tolist(), preds.cpu().tolist()):
            total_per_class[y] += 1
            if y == y_pred:
                correct_per_class[y] += 1

    for idx, name in enumerate(class_names):
        total = total_per_class[idx]
        correct = correct_per_class[idx]
        acc = correct / total if total > 0 else 0
        print(f"Class '{name}': {correct}/{total} correct ({acc*100:.2f}%)")

evaluate_per_class(model, test_loader, device, test_dataset.classes)


Class 'safe': 8/40 correct (20.00%)
Class 'unsafe': 15/19 correct (78.95%)


In [None]:
MODEL_DIR = os.path.join(BASE_DIR, "models")
os.makedirs(MODEL_DIR, exist_ok=True)

model_path = os.path.join(MODEL_DIR, "resnet18_synth_plus_real_oversampled.pt")
torch.save(model.state_dict(), model_path)

print("Saved model to:", model_path)
