In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader, random_split, WeightedRandomSampler
from torchvision import transforms
from dataset import MelanomaDataset

In [2]:
root_dir = "dataset/siim-isic-melanoma-classification/jpeg/train/"
annotations_csv = "dataset/siim-isic-melanoma-classification/train.csv"

batch_size = 256
n_workers = 8


## 1. Prepare Data

In [3]:
train_transforms = transforms.Compose([

    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.5, 0.5, 0.5],
        std=[0.24, 0.25, 0.25]
    )
])

val_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.Normalize(
        mean=[0.5, 0.5, 0.5],
        std=[0.24, 0.25, 0.25])
])

In [4]:
train_dataset = MelanomaDataset(
    annotations_csv,
    root_dir=root_dir,
    transform=train_transforms,
    test=False,
    test_size=0.2,
    seed=42)

test_dataset = MelanomaDataset(
    annotations_csv,
    root_dir=root_dir,
    transform=train_transforms,
    test=True,
    test_size=0.2,
    seed=42)


In [5]:
import pandas as pd
import numpy as np

# Weight class imabalance
def get_sampler(labels_df):
    unique_labels, count = np.unique(labels_df, return_counts=True)
    label_weight = [sum(count) / c for c in count]
    weights = [label_weight[e] for e in labels_df]
    return WeightedRandomSampler(weights, len(weights), replacement=True)

sampler = get_sampler(train_dataset.y_train)

In [6]:
sampler.num_samples

26500

In [7]:
len(train_dataset)

26500

In [8]:
train_loader = DataLoader(
    train_dataset, batch_size, num_workers=n_workers, sampler=sampler, pin_memory=True
    )

val_loader = DataLoader(
    test_dataset, batch_size, num_workers=n_workers, shuffle=True, pin_memory=True
    )

## 2. Prepare Model

In [9]:
device = "cuda" if torch.cuda.is_available else "cpu"
device 

'cuda'

In [10]:
model = torch.hub.load("pytorch/vision:v0.10.0", "resnet34", pretrained=True)
model.fc = nn.Linear(512, 1)
model.to(device)

Using cache found in /home/marcelo/.cache/torch/hub/pytorch_vision_v0.10.0


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [11]:
learning_rate = 1e-3
lambda_l2 = 1e-5

epochs = 10

criterion = nn.BCELoss()

optimizer = torch.optim.Adam(
    model.parameters(), lr=learning_rate, weight_decay=lambda_l2
)

In [12]:
for epoch in range(epochs):

    # Training
    running_loss = 0.0
    correct_preds = 0.0
    wrong_preds = 0.0

    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device).float()

        model.train()
        # 1. Feed forward to get logits
        y_pred = model(images)
        y_pred = torch.sigmoid(y_pred).view(-1)

        # 2. Compute loss and accuracy
        loss = criterion(y_pred, labels)
        correct_preds += (labels == torch.round(y_pred)).sum()
        wrong_preds += (labels != torch.round(y_pred)).sum()

        # 3. zero gradients before running the backward pass
        optimizer.zero_grad()

        # 4. Backward pass to compute the gradient of the loss
        # w.r.t our learnable parameters
        loss.backward()

        # 5. Update parameters
        optimizer.step()

        running_loss += loss.item()

        # if i % 1000 == 0:
        #     acc = correct_preds / (correct_preds + wrong_preds)
        #     print(f"Train {i}/{len(train_loader)}, Loss: {running_loss: .2f}, Accuracy: {acc: .4f}")
        #     running_loss = 0.0
    acc = correct_preds / (correct_preds + wrong_preds)
    print(f"Train - Epoch: {epoch}, Loss: {running_loss: .2f}, Accuracy: {acc: .4f}")

    # Validation
    running_loss = 0.0
    correct_preds = 0.0
    wrong_preds = 0.0

    with torch.no_grad():
        for i, (images, labels) in enumerate(val_loader):
            images = images.to(device)
            labels = labels.to(device).float()       

            model.eval()
            y_pred = model(images)
            y_pred = torch.sigmoid(y_pred).view(-1)
            loss = criterion(y_pred, labels)

            running_loss += loss.item()
            correct_preds += (labels == torch.round(y_pred)).sum()
            wrong_preds += (labels != torch.round(y_pred)).sum()

            
            # if i % 1000 == 0:
            #     acc = correct_preds / (correct_preds + wrong_preds)
            #     print(f"Eval {i}/{len(val_loader)}, Loss: {running_loss: .2f}, Accuracy: {acc: .4f}")
            #     running_loss = 0.0
        acc = correct_preds / (correct_preds + wrong_preds)
        print(f"Eval - Epoch: {epoch}, Loss: {running_loss: .2f}, Accuracy: {acc: .4f}")