<a href="https://colab.research.google.com/github/bejaeger/ProScanNet/blob/main/ProScanNet_train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

!git clone https://github.com/bejaeger/ProScanNet

%cd ProScanNet
!git pull
!git config pull.rebase false
!git pull

!pip install -r requirements.txt  # Assuming you have a requirements.txt file

Mounted at /content/drive
Cloning into 'ProScanNet'...
remote: Enumerating objects: 86, done.[K
remote: Counting objects: 100% (86/86), done.[K
remote: Compressing objects: 100% (58/58), done.[K
remote: Total 86 (delta 48), reused 57 (delta 21), pack-reused 0[K
Receiving objects: 100% (86/86), 25.64 KiB | 2.85 MiB/s, done.
Resolving deltas: 100% (48/48), done.
/content/ProScanNet
Already up to date.
Already up to date.


In [2]:
import os
import argparse
import logging
from typing import *

import torch
from torch.nn import CrossEntropyLoss,  Module
from torch.optim import AdamW
from torch.utils.data import DataLoader

from dataset import CustomDataset
from data_utils import TorchDataset, preprocess
from model import *

In [3]:
from dataset import CustomDataset

dataset_folder = "/content/drive/MyDrive/MLProjects/chaimeleon/data/train"
dataset = CustomDataset(dataset_folder)

100%|██████████| 295/295 [00:32<00:00,  9.15it/s]

loaded Dataset with 295 images, 85 of which are high grade. Average age: 68





In [9]:
num_epochs = 50
batch_size = 32
learning_rate = 1e-4
train_portion = 0.8

In [5]:
images = preprocess(images=dataset.image_data)  # pad images to have depth 40 (found to be max depth)
labels = dataset.labels

# TODO: Improve split to enusre equal amounts of positive labels in train and val
train_images = images[:int(len(images) * train_portion)]
train_labels = labels[:int(len(labels) * train_portion)]
val_images = images[int(len(images) * train_portion):]
val_labels = labels[int(len(labels) * train_portion):]

print(f"[train set] num positive/negative labels: {sum(train_labels)} / {len(train_labels) - sum(train_labels)}")
print(f"[val set] num positive/negative labels: {sum(val_labels)} / {len(val_labels) - sum(val_labels)}")

train_dataset = TorchDataset(images=train_images, labels=train_labels)
val_dataset = TorchDataset(images=val_images, labels=val_labels)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)

[train set] num positive/negative labels: 63 / 173
[val set] num positive/negative labels: 22 / 37


In [6]:
model = AlexNet3D()
loss_fn = CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=learning_rate)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [7]:
print(model)

AlexNet3D(
  (layer1): Sequential(
    (0): Conv3d(1, 8, kernel_size=(11, 11, 11), stride=(4, 4, 4))
    (1): BatchNorm3d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool3d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv3d(8, 12, kernel_size=(5, 5, 5), stride=(1, 1, 1), padding=(2, 2, 2))
    (1): BatchNorm3d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool3d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv3d(12, 4, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (1): BatchNorm3d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (fc): Sequential(
    (0): Dropout(p=0.7, inplace=False)
    (1): Linear(in_features=784, out_features=1024, bias=True)
    (2): ReLU()
  )
  (fc1): Sequential(
    (0): Dropout(p=0.7, inpla

In [10]:
def evaluate(model: Module, loader: DataLoader, loss_fn: Callable) -> Tuple[float, float]:
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        loss = 0
        for images, labels in loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            loss += loss_fn(outputs, labels)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.shape[0]
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    loss = loss / len(loader)
    model.train()
    return accuracy, loss

print("Begin training with model")
print(model)

model.train()
iteration_index = 0
for epoch in range(num_epochs):
    train_losses = []
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        train_losses.append(loss.item())

        iteration_index += 1

    train_accuracy, _ = evaluate(model=model, loader=train_loader, loss_fn=loss_fn)
    val_accuracy, val_loss = evaluate(model=model, loader=val_loader, loss_fn=loss_fn)

    print(
        f"[Epoch {epoch+1}/{num_epochs}] "
        f"Train loss: {torch.mean(torch.tensor(train_losses)):.3f}, Val loss: {val_loss:.3f}, "
        f"Train accuracy: {train_accuracy:.3f}, "
        f"Val accuracy: {val_accuracy:.3f}")

output_path = "checkpoints/model.pth"
os.makedirs(os.path.dirname(output_path), exist_ok=True)
print(f"Saving model to `{output_path}`...")
torch.save(model.state_dict(), "checkpoints/model.pth")


Begin training with model
AlexNet3D(
  (layer1): Sequential(
    (0): Conv3d(1, 8, kernel_size=(11, 11, 11), stride=(4, 4, 4))
    (1): BatchNorm3d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool3d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv3d(8, 12, kernel_size=(5, 5, 5), stride=(1, 1, 1), padding=(2, 2, 2))
    (1): BatchNorm3d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool3d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv3d(12, 4, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (1): BatchNorm3d(4, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (fc): Sequential(
    (0): Dropout(p=0.7, inplace=False)
    (1): Linear(in_features=784, out_features=1024, bias=True)
    (2): ReLU()
  )
  (fc1): Sequential(
   