In [1]:
from google.colab import drive
drive.mount('/content/drive')
from pathlib import Path
import os
repo_path = Path.cwd()/'drive/MyDrive/calcification-detection-project/calcification_detecion/calc-det/notebooks/'
os.chdir(str(repo_path))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
from pathlib import Path
thispath = Path.cwd().resolve()
import sys; sys.path.insert(0, str(thispath.parent))

from deep_learning.dataset.dataset import INBreast_Dataset_pytorch

import copy
import torch
import time
import random

import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as T

from torch.optim import lr_scheduler
from torchvision import models
from torch.utils.data import DataLoader
from tqdm import tqdm

INFO:numexpr.utils:NumExpr defaulting to 2 threads.


### Transformations and dataloader

In [3]:
import numpy as np
from sklearn.metrics import roc_curve, f1_score, roc_auc_score, accuracy_score, precision_score, confusion_matrix


def sensivity_specifity_cutoff(y_true: np.ndarray, y_score: np.ndarray):
    '''Finds data-driven cut-off for classification
    Cut-off is determied using Youden's index defined as sensitivity + specificity - 1.
    Args:
      y_true (np.ndarray): True binary labels.
      y_score (np.ndarray): Target scores.
    '''
    fpr, tpr, thresholds = roc_curve(y_true, y_score)
    idx = np.argmax(tpr - fpr)
    return thresholds[idx]


def get_metrics(labels, preds):
    th = sensivity_specifity_cutoff(labels, preds)
    bin_preds = np.where(preds > th, True, False)
    tn, fp, fn, tp = confusion_matrix(labels, bin_preds).ravel()
    return {'auroc': roc_auc_score(labels, preds),
            'f1_score': f1_score(labels, bin_preds),
            'accuracy': (tp+tn)/(tp+tn+fp+fn),
            'precision': tp/(tp+fp),
            'sensitivity': tp/(tp+fn),
            'specificity': tn/(tn+fp),
            'threshold': th
            }

In [4]:
transforms = nn.Sequential(
    T.ColorJitter(brightness=0.4, contrast=0.4, saturation=0, hue=0),
    T.RandomAffine(
        degrees=(0, 20), translate=None, scale=None, shear=(1, 10, 1, 10),
        interpolation=T.InterpolationMode.BILINEAR, fill=0
    ),
    T.RandomPerspective(distortion_scale=0.2),
    T.RandomRotation(degrees=(0, 20)),
    T.RandomRotation(degrees=(90, 110)),
    T.RandomResizedCrop(size=(224, 224), scale=(0.9,1), ratio=(1,1)),
    T.RandomAutocontrast(),
    T.RandomHorizontalFlip(),
    T.RandomVerticalFlip()
)

data_transforms = {
    'train': nn.Sequential(
        T.RandomApply(transforms=transforms, p=0.5),
        T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ),
    'val': T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
}

dataset_arguments = {
    'extract_patches': False, 'delete_previous': False,
    'extract_patches_method': 'all', 'patch_size': 224, 'stride': 100,
    'min_breast_fraction_roi': 0.5, 'n_jobs': -1, 'cropped_imgs': True,
    'ignore_diameter_px': 15
}

val_dataset = INBreast_Dataset_pytorch(
    partitions=['validation'], neg_to_pos_ratio=None, **dataset_arguments)
val_dataloader = DataLoader(
    val_dataset, batch_size=32, shuffle=True, sampler=None,
    batch_sampler=None, num_workers=2, pin_memory=True, drop_last=False)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

  return_lesions_mask=False, max_lesion_diam_mm=None, use_muscle_mask=False


In [20]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=30):
    
    since = time.time()

    # Guarantee reproducibility
    random.seed(0)
    torch.manual_seed(1442)
    np.random.seed(0)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Holders for best model
    best_model_wts = copy.deepcopy(model.state_dict())
    best_f1 = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        train_dataset = INBreast_Dataset_pytorch(
            partitions=['train'], neg_to_pos_ratio=5,
            balancing_seed=epoch, **dataset_arguments)
        
        image_datasets = {'train': train_dataset, 'val': val_dataset}

        dataloaders = {
            'val': val_dataloader,
            'train': DataLoader(
                train_dataset, batch_size=64, shuffle=True, sampler=None,
                batch_sampler=None, num_workers=2, pin_memory=True, drop_last=False)
        }

        dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
        
        for phase in ['train', 'val']:
            if phase == 'train':
                if epoch != 0:
                    scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            # Holders for losses, preds and labels
            running_loss = 0.0
            epoch_preds = []
            epoch_labels = []

            # Iterate over data.
            for sample in tqdm(dataloaders[phase], total=len(dataloaders[phase])):
                # Apply transformations and send to device
                sample['img'] = data_transforms[phase](sample['img'])
                inputs = sample['img'].to(device)
                labels = sample['label'].to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward pass
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    epoch_preds.append(np.asarray(outputs.detach().cpu()))
                    epoch_labels.append(np.asarray(labels.detach().cpu()))
                    
                    loss = criterion(outputs, labels)
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Get the loss itertively
                running_loss += loss.item() * inputs.size(0)

            # Compute the metrics for the epoch
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_preds = np.concatenate(epoch_preds)
            epoch_labels = np.concatenate(epoch_labels)
            metrics = get_metrics(epoch_labels, epoch_preds)
            epoch_acc = metrics['accuracy']
            epoch_f1 = metrics['f1_score']
            epoch_auroc = metrics['auroc']
            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}' \
                  f'F1: {epoch_f1:.4f} AUROC: {epoch_auroc:.4f}')

            # deep copy the model
            if phase == 'val' and epoch_f1 > best_f1:
                best_f1 = epoch_f1
                best_threshold = metrics['threshold']
                best_model_wts = copy.deepcopy(model.state_dict())
        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {(time_elapsed // 60):.0f}m ' \
          f'{(time_elapsed % 60):.0f}s')
    print(f'Best val F1 score: {best_f1:4f}, threshold {best_threshold}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [None]:
model_conv = models.resnet18(pretrained=True)
for param in model_conv.parameters():
    param.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, 2)

model_conv = model_conv.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opoosed to before.
optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)

model_ft = train_model(model_conv, criterion, optimizer_conv, exp_lr_scheduler,
                       num_epochs=30)

Epoch 1/30
----------


  return_lesions_mask=False, max_lesion_diam_mm=None, use_muscle_mask=False
  2%|▏         | 11/544 [01:10<1:03:55,  7.20s/it]

In [None]:
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 1)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [None]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=30)