## Mount + Unzip + Subset (if required)

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import zipfile
with zipfile.ZipFile("/content/drive/MyDrive/data_trimmed_clean.zip", 'r') as zip_ref:
    zip_ref.extractall("/content")

In [3]:
# ONLY USED FOR TESTING AND DEBUGGING - for final model we will use the whole dataset

import os
import shutil

def create_debug_subset_sequential(source_dir, dest_dir, train_limit=6000, test_limit=2000):
    if os.path.exists(dest_dir):
        shutil.rmtree(dest_dir)
    os.makedirs(dest_dir, exist_ok=True)

    for split, limit in [("Train", train_limit), ("Test", test_limit)]:
        src_split_path = os.path.join(source_dir, split)
        dst_split_path = os.path.join(dest_dir, split)
        os.makedirs(dst_split_path, exist_ok=True)

        for class_name in os.listdir(src_split_path):
            class_src = os.path.join(src_split_path, class_name)
            class_dst = os.path.join(dst_split_path, class_name)
            os.makedirs(class_dst, exist_ok=True)

            valid_images = sorted([f for f in os.listdir(class_src) if f.endswith(".png") and not f.startswith("._")])
            selected_images = valid_images[:limit]

            for img in selected_images:
                shutil.copy(os.path.join(class_src, img), os.path.join(class_dst, img))

create_debug_subset_sequential("/content/data_trimmed", "/content/data_trimmed_subset", train_limit=6000, test_limit=2000)

# paths
train_dir = "/content/data_trimmed_subset/Train"
test_dir = "/content/data_trimmed_subset/Test"

## Restructure Dataset

In [4]:
import os, shutil
from collections import defaultdict

def extract_video_id(filename):
    parts = filename.rsplit("_", 1)
    return parts[0] if len(parts) == 2 else filename.split("_frame")[0]

def restructure_dataset(src_dir, dst_dir):
    os.makedirs(dst_dir, exist_ok=True)
    for class_name in os.listdir(src_dir):
        class_path = os.path.join(src_dir, class_name)
        if not os.path.isdir(class_path): continue
        video_frame_dict = defaultdict(list)
        for fname in os.listdir(class_path):
            if fname.endswith(".png"):
                vid = extract_video_id(fname)
                video_frame_dict[vid].append(fname)
        for vid, frames in video_frame_dict.items():
            out_folder = os.path.join(dst_dir, class_name, vid)
            os.makedirs(out_folder, exist_ok=True)
            for f in frames:
                shutil.copy2(os.path.join(class_path, f), os.path.join(out_folder, f))

# Run restructure
# full dataset:
restructure_dataset("/content/data_trimmed/Train", "/content/data_trimmed_restructured/Train")
restructure_dataset("/content/data_trimmed/Test", "/content/data_trimmed_restructured/Test")

# subsetted dataset
restructure_dataset("/content/data_trimmed_subset/Train", "/content/data_trimmed_subset_restructured/Train")
restructure_dataset("/content/data_trimmed_subset/Test", "/content/data_trimmed_subset_restructured/Test")

## Data Loading & Pre-processing + Configs

In [5]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoImageProcessor
from PIL import Image
from sklearn.preprocessing import LabelEncoder
from glob import glob
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import os
import random
import wandb

CLASS_NAMES = ['Arrest','Arson','Assault','Burglary','Explosion','Fighting','NormalVideos','Shooting']
label_encoder = LabelEncoder()
label_encoder.fit(CLASS_NAMES)

from torch.utils.data import Dataset
from transformers import AutoImageProcessor
from glob import glob
from PIL import Image
import torch
import os

class VideoDatasetSingle(Dataset):
    """
    One sequence per video – takes first `sequence_length` frames.
    """
    def __init__(self, base_dir, label_encoder, sequence_length=16, image_size=224):
        self.sequence_length = sequence_length
        self.image_size = image_size
        self.samples = []
        self.processor = AutoImageProcessor.from_pretrained('facebook/dinov2-base')

        for class_name in os.listdir(base_dir):
            class_path = os.path.join(base_dir, class_name)
            if not os.path.isdir(class_path): continue
            for video_folder in os.listdir(class_path):
                video_path = os.path.join(class_path, video_folder)
                frame_paths = sorted(glob(os.path.join(video_path, '*.png')))
                if len(frame_paths) >= sequence_length:
                    label = label_encoder.transform([class_name])[0]
                    clip = frame_paths[:sequence_length]
                    self.samples.append((clip, label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        paths, label = self.samples[idx]
        imgs = [Image.open(p).convert("RGB").resize((224, 224)) for p in paths]
        processed = self.processor(images=imgs, return_tensors="pt")
        return processed['pixel_values'].squeeze(0).permute(1, 0, 2, 3), torch.tensor(label)


class VideoDatasetMulti(Dataset):
    """
    Multiple sequences per video – extracted with stride.
    """
    def __init__(self, base_dir, label_encoder, sequence_length=16, image_size=224, stride=8):
        self.sequence_length = sequence_length
        self.image_size = image_size
        self.samples = []
        self.processor = AutoImageProcessor.from_pretrained('facebook/dinov2-base')

        for class_name in os.listdir(base_dir):
            class_path = os.path.join(base_dir, class_name)
            if not os.path.isdir(class_path): continue
            for video_folder in os.listdir(class_path):
                video_path = os.path.join(class_path, video_folder)
                frame_paths = sorted(glob(os.path.join(video_path, '*.png')))
                label = label_encoder.transform([class_name])[0]
                for i in range(0, len(frame_paths) - sequence_length + 1, stride):
                    clip = frame_paths[i:i+sequence_length]
                    if len(clip) == sequence_length:
                        self.samples.append((clip, label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        paths, label = self.samples[idx]
        imgs = [Image.open(p).convert("RGB").resize((224, 224)) for p in paths]
        processed = self.processor(images=imgs, return_tensors="pt")
        return processed['pixel_values'].squeeze(0).permute(1, 0, 2, 3), torch.tensor(label)

class VideoDataset3D(Dataset):
    """
    Wrapper for any video dataset to convert shape to (C, T, H, W) for 3D CNNs.
    """
    def __init__(self, base_dataset):
        self.base_dataset = base_dataset

    def __len__(self):
        return len(self.base_dataset)

    def __getitem__(self, idx):
        pixel_values, label = self.base_dataset[idx]  # (T, C, H, W)
        pixel_values = pixel_values.permute(1, 0, 2, 3)  # (C, T, H, W)
        return pixel_values, label

In [14]:
from sklearn.preprocessing import LabelEncoder

SEQUENCE_LENGTH = 16
IMG_SIZE = 224
BATCH_SIZE = 4
NUM_CLASSES = 8
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
use_multi_sequence = False  # change to 'True' this to switch dataset type

def create_dataset(path, multi=True):
    base_class = VideoDatasetMulti if multi else VideoDatasetSingle
    base_dataset = base_class(path, label_encoder, sequence_length=SEQUENCE_LENGTH, image_size=IMG_SIZE)
    return VideoDataset3D(base_dataset)

# full dataset
train_path = "/content/data_trimmed_restructured/Train"
test_path = "/content/data_trimmed_restructured/Test"

train_dataset = create_dataset(train_path, multi=use_multi_sequence)
test_dataset = create_dataset(test_path, multi=use_multi_sequence)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# subsetted dataset
train_subset_path = "/content/data_trimmed_subset_restructured/Train"
test_subset_path = "/content/data_trimmed_subset_restructured/Test"

train_subset_dataset = create_dataset(train_subset_path, multi=use_multi_sequence)
test_subset_dataset = create_dataset(test_subset_path, multi=use_multi_sequence)

train_subset_loader = DataLoader(train_subset_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
test_subset_loader = DataLoader(test_subset_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

## Phase 1: Initial Model Setup

* Simple 3D CNN (3 conv blocks + global avg pool + FC)
* Optimizer: Adam
* Loss: CrossEntropy
* Dataset: Full, single sequence per video (for faster iteration)
* 15 epochs


In [15]:
# RUN JUST ONCE ON COLAB
!pip install -q wandb

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, roc_auc_score
import wandb

class Simple3DCNN(nn.Module):
    def __init__(self, num_classes=8):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv3d(3, 32, kernel_size=(3, 5, 5), stride=1, padding=(1, 2, 2)),
            nn.BatchNorm3d(32),
            nn.ReLU(),
            nn.MaxPool3d((1, 2, 2)),

            nn.Conv3d(32, 64, kernel_size=(3, 3, 3), stride=1, padding=1),
            nn.BatchNorm3d(64),
            nn.ReLU(),
            nn.MaxPool3d((2, 2, 2)),

            nn.Conv3d(64, 128, kernel_size=(3, 3, 3), stride=1, padding=1),
            nn.BatchNorm3d(128),
            nn.ReLU(),
            nn.AdaptiveAvgPool3d((1, 1, 1)),
        )
        self.fc = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.model(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

In [7]:
# evaluation function
def evaluate(model, loader, criterion, split='Test', return_metrics=False):
    model.eval()
    total_loss = 0.0
    total_preds, total_probs, total_labels = [], [], []

    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(DEVICE), y.to(DEVICE)

            # Fix: permute input to match model's expected shape
            x = x.permute(0, 2, 1, 3, 4)  # [B, T, 3, H, W] → [B, 3, T, H, W]

            logits = model(x)
            probs = F.softmax(logits, dim=1)
            loss = criterion(logits, y)

            total_loss += loss.item()
            total_probs.append(probs.cpu())
            total_preds.append(torch.argmax(probs, dim=1).cpu())
            total_labels.append(y.cpu())

    y_true = torch.cat(total_labels).numpy()
    y_pred = torch.cat(total_preds).numpy()
    y_probs = torch.cat(total_probs).numpy()

    acc = accuracy_score(y_true, y_pred)
    try:
        auc = roc_auc_score(y_true, y_probs, multi_class='ovr', average='macro')
    except ValueError:
        auc = 0.0

    avg_loss = total_loss / len(loader)

    if return_metrics:
        return avg_loss, acc, auc
    else:
        print(f"{split} Loss: {avg_loss:.4f} | Accuracy: {acc:.4f} | AUC: {auc:.4f}")

In [16]:
model = Simple3DCNN(num_classes=NUM_CLASSES).to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# init wandb
wandb.init(project="3dcnn-phase1", name="simple3dcnn-baseline")
wandb.config.update({
    "architecture": "Simple3DCNN",
    "optimizer": "Adam",
    "learning_rate": 1e-4,
    "loss": "CrossEntropyLoss",
    "batch_size": train_loader.batch_size,
    "device": DEVICE
})

# training loop
EPOCHS = 15
for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0

    for x, y in train_loader:
        x, y = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        logits = model(x.permute(0, 2, 1, 3, 4))
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    avg_train_loss = running_loss / len(train_loader)
    train_loss, train_acc, train_auc = evaluate(model, train_loader, criterion, split='Train', return_metrics=True)
    test_loss, test_acc, test_auc = evaluate(model, test_loader, criterion, split='Test', return_metrics=True)

    # log to wandb
    wandb.log({
        "epoch": epoch + 1,
        "train_loss": train_loss,
        "train_accuracy": train_acc,
        "train_auc": train_auc,
        "test_loss": test_loss,
        "test_accuracy": test_acc,
        "test_auc": test_auc
    })

    # print summary
    print(f"\nEpoch {epoch+1}")
    print(f"Train → Loss: {train_loss:.4f} | Acc: {train_acc:.4f} | AUC: {train_auc:.4f}")
    print(f"Test  → Loss: {test_loss:.4f} | Acc: {test_acc:.4f} | AUC: {test_auc:.4f}")


Epoch 1
Train → Loss: 1.8922 | Acc: 0.3462 | AUC: 0.6682
Test  → Loss: 2.0284 | Acc: 0.2308 | AUC: 0.5935

Epoch 2
Train → Loss: 1.8660 | Acc: 0.3531 | AUC: 0.6644
Test  → Loss: 1.9466 | Acc: 0.3077 | AUC: 0.6314

Epoch 3
Train → Loss: 1.8110 | Acc: 0.3741 | AUC: 0.6994
Test  → Loss: 1.9394 | Acc: 0.2923 | AUC: 0.6284

Epoch 4
Train → Loss: 1.7728 | Acc: 0.3636 | AUC: 0.7073
Test  → Loss: 1.9418 | Acc: 0.3077 | AUC: 0.6330

Epoch 5
Train → Loss: 1.7466 | Acc: 0.3951 | AUC: 0.7257
Test  → Loss: 1.9630 | Acc: 0.3231 | AUC: 0.6305

Epoch 6
Train → Loss: 1.8084 | Acc: 0.3881 | AUC: 0.7267
Test  → Loss: 1.9369 | Acc: 0.2769 | AUC: 0.6290

Epoch 7
Train → Loss: 1.7893 | Acc: 0.3741 | AUC: 0.7261
Test  → Loss: 1.9841 | Acc: 0.2923 | AUC: 0.6241

Epoch 8
Train → Loss: 1.7259 | Acc: 0.4126 | AUC: 0.7191
Test  → Loss: 1.9767 | Acc: 0.3077 | AUC: 0.6367

Epoch 9
Train → Loss: 1.7306 | Acc: 0.4021 | AUC: 0.7359
Test  → Loss: 1.9961 | Acc: 0.2462 | AUC: 0.6324

Epoch 10
Train → Loss: 1.7139 | Acc:

## Phase 2: Base model with AdamW, weight decay, and learning rate scheduler

* Same model
* Optimizer: AdamW with weight decay
* Scheduler: StepLR
* Dataset: Full, single sequence per video (for faster iteration)
* 15 epochs

In [18]:
from torch.optim import AdamW
from torch.optim.lr_scheduler import StepLR

model = Simple3DCNN(num_classes=NUM_CLASSES).to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = StepLR(optimizer, step_size=10, gamma=0.5)

# init wandb
wandb.init(project="3d-cnn-phase2", name="phase2-single-seq-full", config={
    "model": "Simple3DCNN",
    "optimizer": "AdamW",
    "lr": 1e-4,
    "weight_decay": 1e-4,
    "scheduler": "StepLR (step=10, gamma=0.5)",
    "epochs": 15,
    "batch_size": BATCH_SIZE,
    "sequence_type": "single",
    "dataset": "full"
})

# training loop
EPOCHS = 15
for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    for x, y in train_loader:
        x, y = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        logits = model(x.permute(0, 2, 1, 3, 4))
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    avg_train_loss = running_loss / len(train_loader)
    train_loss, train_acc, train_auc = evaluate(model, train_loader, criterion, split='Train', return_metrics=True)
    test_loss, test_acc, test_auc = evaluate(model, test_loader, criterion, split='Test', return_metrics=True)

    # log
    wandb.log({
        "epoch": epoch + 1,
        "train_loss": train_loss,
        "train_accuracy": train_acc,
        "train_auc": train_auc,
        "test_loss": test_loss,
        "test_accuracy": test_acc,
        "test_auc": test_auc,
        "lr": scheduler.get_last_lr()[0]
    })

    print(f"\nEpoch {epoch+1}")
    print(f"Train → Loss: {train_loss:.4f} | Acc: {train_acc:.4f} | AUC: {train_auc:.4f}")
    print(f"Test  → Loss: {test_loss:.4f} | Acc: {test_acc:.4f} | AUC: {test_auc:.4f}")

    scheduler.step()


Epoch 1
Train → Loss: 1.9199 | Acc: 0.3252 | AUC: 0.6700
Test  → Loss: 2.0497 | Acc: 0.2308 | AUC: 0.5822

Epoch 2
Train → Loss: 1.8026 | Acc: 0.3566 | AUC: 0.7111
Test  → Loss: 1.9851 | Acc: 0.3077 | AUC: 0.6112

Epoch 3
Train → Loss: 1.7898 | Acc: 0.3636 | AUC: 0.7132
Test  → Loss: 1.9625 | Acc: 0.3385 | AUC: 0.6063

Epoch 4
Train → Loss: 1.8295 | Acc: 0.3357 | AUC: 0.6746
Test  → Loss: 2.0562 | Acc: 0.2769 | AUC: 0.6092

Epoch 5
Train → Loss: 1.7592 | Acc: 0.3741 | AUC: 0.7212
Test  → Loss: 2.0010 | Acc: 0.3231 | AUC: 0.6084

Epoch 6
Train → Loss: 1.7527 | Acc: 0.3776 | AUC: 0.7295
Test  → Loss: 2.0291 | Acc: 0.2769 | AUC: 0.6338

Epoch 7
Train → Loss: 1.7258 | Acc: 0.3811 | AUC: 0.7445
Test  → Loss: 1.9944 | Acc: 0.3077 | AUC: 0.6323

Epoch 8
Train → Loss: 1.6967 | Acc: 0.3951 | AUC: 0.7475
Test  → Loss: 1.9895 | Acc: 0.3077 | AUC: 0.6278

Epoch 9
Train → Loss: 1.7317 | Acc: 0.3776 | AUC: 0.7315
Test  → Loss: 1.9815 | Acc: 0.3077 | AUC: 0.6444

Epoch 10
Train → Loss: 1.6719 | Acc:

## Phase 3: Added Conv layer

In [9]:
import torch
import torch.nn as nn

class Simple3DCNNPhase3(nn.Module):
    def __init__(self, num_classes=8, dropout=None):
        super().__init__()
        layers = [
            nn.Conv3d(3, 32, kernel_size=(3, 5, 5), stride=1, padding=(1, 2, 2)),
            nn.BatchNorm3d(32),
            nn.ReLU(),
            nn.MaxPool3d((1, 2, 2)),

            nn.Conv3d(32, 64, kernel_size=(3, 3, 3), stride=1, padding=1),
            nn.BatchNorm3d(64),
            nn.ReLU(),
            nn.MaxPool3d((2, 2, 2)),

            nn.Conv3d(64, 128, kernel_size=(3, 3, 3), stride=1, padding=1),
            nn.BatchNorm3d(128),
            nn.ReLU(),
            nn.MaxPool3d((2, 2, 2)),

            # Extra Conv Block (Phase 3 addition)
            nn.Conv3d(128, 256, kernel_size=(3, 3, 3), stride=1, padding=1),
            nn.BatchNorm3d(256),
            nn.ReLU(),
        ]

        if dropout:
            layers.append(nn.Dropout3d(p=dropout))

        layers.append(nn.AdaptiveAvgPool3d((1, 1, 1)))
        self.model = nn.Sequential(*layers)
        self.fc = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.model(x)  # (B, 256, 1, 1, 1)
        x = x.view(x.size(0), -1)  # (B, 256)
        return self.fc(x)

In [20]:
model = Simple3DCNNPhase3(num_classes=NUM_CLASSES, dropout=None).to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = StepLR(optimizer, step_size=10, gamma=0.5)

wandb.init(project="3dcnn-phase3", name="phase3-extra-conv", config={
    "architecture": "Simple3DCNNPhase3",
    "dropout": None,
    "optimizer": "AdamW",
    "learning_rate": 1e-4,
    "weight_decay": 1e-4,
    "scheduler": "StepLR (step=10, gamma=0.5)",
    "epochs": 15,
    "batch_size": train_loader.batch_size,
    "dataset": "full-single-sequence"
})

EPOCHS = 15
for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0

    for x, y in train_loader:  # full dataset, single sequence per video
        x, y = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        logits = model(x.permute(0, 2, 1, 3, 4))  # (B, 3, T, H, W)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    avg_train_loss = running_loss / len(train_loader)
    train_loss, train_acc, train_auc = evaluate(model, train_loader, criterion, split='Train', return_metrics=True)
    test_loss, test_acc, test_auc = evaluate(model, test_loader, criterion, split='Test', return_metrics=True)

    wandb.log({
        "epoch": epoch + 1,
        "train_loss": train_loss,
        "train_accuracy": train_acc,
        "train_auc": train_auc,
        "test_loss": test_loss,
        "test_accuracy": test_acc,
        "test_auc": test_auc,
        "lr": scheduler.get_last_lr()[0]
    })

    print(f"\nEpoch {epoch+1}")
    print(f"Train → Loss: {train_loss:.4f} | Acc: {train_acc:.4f} | AUC: {train_auc:.4f}")
    print(f"Test  → Loss: {test_loss:.4f} | Acc: {test_acc:.4f} | AUC: {test_auc:.4f}")
    scheduler.step()

0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
lr,██████████▁▁▁▁▁
test_accuracy,▁▆█▄▇▄▆▆▆▇▆▇▇█▅
test_auc,▁▄▄▄▄▇▇▆█▆▆▅▆▇▆
test_loss,█▃▂█▄▆▄▃▃▂▃▂▂▁▃
train_accuracy,▁▃▃▂▄▄▅▅▄▆▆████
train_auc,▁▄▄▁▄▅▆▆▅▇▇▇▇██
train_loss,█▅▅▆▄▄▃▃▄▂▂▂▁▂▁

0,1
epoch,15.0
lr,5e-05
test_accuracy,0.29231
test_auc,0.63017
test_loss,1.98735
train_accuracy,0.43706
train_auc,0.7806
train_loss,1.62363



Epoch 1
Train → Loss: 1.8836 | Acc: 0.3462 | AUC: 0.6775
Test  → Loss: 2.0515 | Acc: 0.2308 | AUC: 0.6006

Epoch 2
Train → Loss: 1.8030 | Acc: 0.3287 | AUC: 0.6928
Test  → Loss: 2.0253 | Acc: 0.2154 | AUC: 0.5900

Epoch 3
Train → Loss: 1.7203 | Acc: 0.3846 | AUC: 0.7339
Test  → Loss: 2.0430 | Acc: 0.3231 | AUC: 0.6309

Epoch 4
Train → Loss: 1.7407 | Acc: 0.3427 | AUC: 0.7345
Test  → Loss: 2.1026 | Acc: 0.2923 | AUC: 0.6167

Epoch 5
Train → Loss: 1.7221 | Acc: 0.3846 | AUC: 0.7324
Test  → Loss: 2.0567 | Acc: 0.3385 | AUC: 0.6316

Epoch 6
Train → Loss: 1.6295 | Acc: 0.4371 | AUC: 0.7612
Test  → Loss: 2.0172 | Acc: 0.2769 | AUC: 0.6341

Epoch 7
Train → Loss: 1.6201 | Acc: 0.4336 | AUC: 0.7786
Test  → Loss: 1.9558 | Acc: 0.2769 | AUC: 0.6390

Epoch 8
Train → Loss: 1.6247 | Acc: 0.4231 | AUC: 0.7723
Test  → Loss: 2.0037 | Acc: 0.2769 | AUC: 0.6430

Epoch 9
Train → Loss: 1.5841 | Acc: 0.4161 | AUC: 0.7846
Test  → Loss: 2.1543 | Acc: 0.2769 | AUC: 0.6257

Epoch 10
Train → Loss: 1.5689 | Acc:

## Phase 4: Class-balanced Loss

In [12]:
from collections import Counter
import torch

labels = [int(label) for _, label in train_dataset]
label_counts = Counter(labels)

class_counts = torch.tensor([label_counts.get(i, 0) for i in range(NUM_CLASSES)], dtype=torch.float)
print("Class Counts:", class_counts)

class_weights = 1.0 / (class_counts + 1e-6)
class_weights = class_weights / class_weights.sum() * NUM_CLASSES
print("Class Weights:", class_weights)

Class Counts: tensor([30., 20., 34., 40., 20., 35., 80., 27.])
Class Weights: tensor([1.0031, 1.5046, 0.8851, 0.7523, 1.5046, 0.8598, 0.3761, 1.1145])


In [16]:
import wandb
from torch.optim import AdamW
from torch.optim.lr_scheduler import StepLR

model = Simple3DCNNPhase3(num_classes=NUM_CLASSES, dropout=None).to(DEVICE)
criterion = nn.CrossEntropyLoss(weight=class_weights.to(DEVICE)) # change added here!!!
optimizer = AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = StepLR(optimizer, step_size=10, gamma=0.5)

# Init wandb
wandb.init(project="3dcnn-phase4", config={
    "model": "Simple3DCNNPhase3",
    "optimizer": "AdamW",
    "lr": 1e-4,
    "weight_decay": 1e-4,
    "scheduler": "StepLR (step=10, gamma=0.5)",
    "epochs": 15,
    "loss": "CrossEntropyLoss (class-weighted)",
    "dropout": None
})

EPOCHS = 15
for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    for x, y in train_loader:
        x, y = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        logits = model(x.permute(0, 2, 1, 3, 4))  # [B, C, T, H, W]
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    avg_train_loss = running_loss / len(train_loader)
    train_loss, train_acc, train_auc = evaluate(model, train_loader, criterion, split='Train', return_metrics=True)
    test_loss, test_acc, test_auc = evaluate(model, test_loader, criterion, split='Test', return_metrics=True)

    wandb.log({
        "epoch": epoch + 1,
        "train_loss": train_loss,
        "train_accuracy": train_acc,
        "train_auc": train_auc,
        "test_loss": test_loss,
        "test_accuracy": test_acc,
        "test_auc": test_auc,
        "lr": scheduler.get_last_lr()[0]
    })

    print(f"\nEpoch {epoch+1}")
    print(f"Train → Loss: {train_loss:.4f} | Acc: {train_acc:.4f} | AUC: {train_auc:.4f}")
    print(f"Test  → Loss: {test_loss:.4f} | Acc: {test_acc:.4f} | AUC: {test_auc:.4f}")

    scheduler.step()


Epoch 1
Train → Loss: 1.9277 | Acc: 0.2972 | AUC: 0.6831
Test  → Loss: 2.1697 | Acc: 0.1077 | AUC: 0.5405

Epoch 2
Train → Loss: 1.9445 | Acc: 0.2308 | AUC: 0.6737
Test  → Loss: 2.2439 | Acc: 0.0769 | AUC: 0.5222

Epoch 3
Train → Loss: 1.8460 | Acc: 0.3916 | AUC: 0.7201
Test  → Loss: 2.0864 | Acc: 0.2154 | AUC: 0.5837

Epoch 4
Train → Loss: 1.8391 | Acc: 0.2902 | AUC: 0.7431
Test  → Loss: 2.1505 | Acc: 0.1077 | AUC: 0.5338

Epoch 5
Train → Loss: 1.8435 | Acc: 0.4091 | AUC: 0.7465
Test  → Loss: 2.2093 | Acc: 0.2615 | AUC: 0.5896

Epoch 6
Train → Loss: 1.7581 | Acc: 0.4056 | AUC: 0.7803
Test  → Loss: 2.1125 | Acc: 0.1692 | AUC: 0.5701

Epoch 7
Train → Loss: 1.7196 | Acc: 0.4406 | AUC: 0.7878
Test  → Loss: 2.1110 | Acc: 0.1846 | AUC: 0.5706

Epoch 8
Train → Loss: 1.7861 | Acc: 0.2797 | AUC: 0.7812
Test  → Loss: 2.1983 | Acc: 0.1692 | AUC: 0.6088

Epoch 9
Train → Loss: 1.6733 | Acc: 0.4720 | AUC: 0.8008
Test  → Loss: 2.0676 | Acc: 0.2308 | AUC: 0.5948

Epoch 10
Train → Loss: 1.6778 | Acc:

## Phase 5: Grid Search

In [17]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import AdamW
from torch.optim.lr_scheduler import StepLR
from sklearn.metrics import accuracy_score, roc_auc_score
import wandb
from copy import deepcopy

class Simple3DCNNGrid(nn.Module):
    def __init__(self, num_classes=8, dropout=None):
        super().__init__()
        layers = [
            nn.Conv3d(3, 32, kernel_size=(3, 5, 5), stride=1, padding=(1, 2, 2)),
            nn.BatchNorm3d(32),
            nn.ReLU(),
            nn.MaxPool3d((1, 2, 2)),

            nn.Conv3d(32, 64, kernel_size=(3, 3, 3), stride=1, padding=1),
            nn.BatchNorm3d(64),
            nn.ReLU(),
            nn.MaxPool3d((2, 2, 2)),

            nn.Conv3d(64, 128, kernel_size=(3, 3, 3), stride=1, padding=1),
            nn.BatchNorm3d(128),
            nn.ReLU(),
            nn.MaxPool3d((2, 2, 2)),

            nn.Conv3d(128, 256, kernel_size=(3, 3, 3), stride=1, padding=1),
            nn.BatchNorm3d(256),
            nn.ReLU()
        ]
        if dropout:
            layers.append(nn.Dropout3d(p=dropout))
        layers.append(nn.AdaptiveAvgPool3d((1, 1, 1)))

        self.model = nn.Sequential(*layers)
        self.fc = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.model(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

In [None]:
import pandas as pd

# define grid
dropouts = [None, 0.2, 0.5]
weight_decays = [0.0, 1e-4]
learning_rates = [1e-4, 5e-5]

EPOCHS = 15
results = []

# grid search loop
sweep_idx = 0
for dropout in dropouts:
    for wd in weight_decays:
        for lr in learning_rates:
            sweep_id = f"d{dropout}_wd{wd}_lr{lr}"

            print(f"\nStarting Sweep: {sweep_id}")

            # Set up model, loss, optimizer
            model = Simple3DCNNGrid(num_classes=NUM_CLASSES, dropout=dropout).to(DEVICE)
            criterion = nn.CrossEntropyLoss(weight=class_weights.to(DEVICE))  # no label smoothing
            optimizer = AdamW(model.parameters(), lr=lr, weight_decay=wd)
            scheduler = StepLR(optimizer, step_size=10, gamma=0.5)

            wandb.init(project="3dcnn-gridsearch", name=sweep_id)
            wandb.config.update({
                "dropout": dropout,
                "weight_decay": wd,
                "lr": lr,
                "epochs": EPOCHS,
                "batch_size": train_loader.batch_size,
            })

            for epoch in range(EPOCHS):
                model.train()
                running_loss = 0.0

                for x, y in train_loader:
                    x, y = x.to(DEVICE), y.to(DEVICE)
                    optimizer.zero_grad()
                    logits = model(x.permute(0, 2, 1, 3, 4))  # (B, 3, T, H, W)
                    loss = criterion(logits, y)
                    loss.backward()
                    optimizer.step()
                    running_loss += loss.item()

                avg_train_loss = running_loss / len(train_loader)

                train_loss, train_acc, train_auc = evaluate(model, train_loader, criterion, return_metrics=True)
                test_loss, test_acc, test_auc = evaluate(model, test_loader, criterion, return_metrics=True)

                # Log to wandb
                wandb.log({
                    "epoch": epoch + 1,
                    "train_loss": train_loss,
                    "train_accuracy": train_acc,
                    "train_auc": train_auc,
                    "test_loss": test_loss,
                    "test_accuracy": test_acc,
                    "test_auc": test_auc,
                    "lr": scheduler.get_last_lr()[0],
                })

                # Log to results df
                results.append({
                    "sweep_id": sweep_id,
                    "epoch": epoch + 1,
                    "dropout": dropout,
                    "weight_decay": wd,
                    "lr": lr,
                    "train_loss": train_loss,
                    "train_acc": train_acc,
                    "train_auc": train_auc,
                    "test_loss": test_loss,
                    "test_acc": test_acc,
                    "test_auc": test_auc,
                })

                scheduler.step()

            wandb.finish()
            # print last epoch summary for this sweep
            last_epoch_metrics = results[-1]
            print(f"\nSweep {sweep_id} complete.")
            print(f"Last Epoch Summary:")
            print(f"  Train → Loss: {last_epoch_metrics['train_loss']:.4f} | "
                  f"Acc: {last_epoch_metrics['train_acc']:.4f} | "
                  f"AUC: {last_epoch_metrics['train_auc']:.4f}")
            print(f"  Test  → Loss: {last_epoch_metrics['test_loss']:.4f} | "
                  f"Acc: {last_epoch_metrics['test_acc']:.4f} | "
                  f"AUC: {last_epoch_metrics['test_auc']:.4f}\n")
            sweep_idx += 1

# Save all sweep results
df_grid_search = pd.DataFrame(results)


Starting Sweep: dNone_wd0.0_lr0.0001


0,1
epoch,▁▂▃▄▅▅▆▇█
lr,▁▁▁▁▁▁▁▁▁
test_accuracy,▂▅▁█▁▆▄▄▅
test_auc,▁▇▄▇█▇▄▇█
test_loss,▄▁▄▅█▁▄▅▁
train_accuracy,▁▄▄▇▅▄██▇
train_auc,▁▃▅▅▆▆▇██
train_loss,█▅▄▄▃▄▁▁▂

0,1
epoch,9.0
lr,0.0001
test_accuracy,0.26154
test_auc,0.59529
test_loss,2.07944
train_accuracy,0.40559
train_auc,0.79879
train_loss,1.69904


0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
lr,██████████▁▁▁▁▁
test_accuracy,▁▂▃▅█▁▂▅▂▅▅▂▅▅▅
test_auc,▁▄▆█▅▃▅▆▄▃▅▅▆▅▆
test_loss,▇▇▄▄▄▄█▁▄▆▄█▄▅▆
train_accuracy,▁▄▄▄▆▃▃▅▃▆█▇▇▇█
train_auc,▁▂▃▃▅▄▅▄▅▅▇▇▇▇█
train_loss,█▆▆▆▅▅▅▅▄▄▂▂▂▁▁

0,1
epoch,15.0
lr,5e-05
test_accuracy,0.27692
test_auc,0.59259
test_loss,2.16671
train_accuracy,0.48951
train_auc,0.84887
train_loss,1.55051



Sweep dNone_wd0.0_lr0.0001 complete.
Last Epoch Summary:
  Train → Loss: 1.5505 | Acc: 0.4895 | AUC: 0.8489
  Test  → Loss: 2.1667 | Acc: 0.2769 | AUC: 0.5926


Starting Sweep: dNone_wd0.0_lr5e-05
