In [None]:
from google.colab import drive
drive.mount('/content/drive')
from pathlib import Path
import os
repo_path = Path.cwd()/'drive/MyDrive/calcification-detection-project/calcification_detecion/calc-det/notebooks/'
os.chdir(str(repo_path))

In [None]:
!cp -r /content/drive/MyDrive/calcification-detection-project/calcification_detecion/data_rois.zip /home/
!unzip /home/data_rois.zip -d /home
!mv /home/home/vzalevskyi/projects/data_rois /home/data_rois
!rm -r /home/home

In [1]:
from pathlib import Path
thispath = Path.cwd().resolve()
import sys; sys.path.insert(0, str(thispath.parent))

from deep_learning.dataset.dataset import INBreast_Dataset_pytorch

import copy
import torch
import time
import random
import pickle
from general_utils.plots import simple_im_show, simple_im_show2
from collections import Counter

import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as T

from torch.optim import lr_scheduler
from torchvision import models
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

In [2]:
import numpy as np
from sklearn.metrics import roc_curve, f1_score, roc_auc_score, accuracy_score, precision_score, confusion_matrix


def sensivity_specifity_cutoff(y_true: np.ndarray, y_score: np.ndarray):
    '''Finds data-driven cut-off for classification
    Cut-off is determied using Youden's index defined as sensitivity + specificity - 1.
    Args:
      y_true (np.ndarray): True binary labels.
      y_score (np.ndarray): Target scores.
    '''
    fpr, tpr, thresholds = roc_curve(y_true, y_score)
    idx = np.argmax(tpr - fpr)
    return thresholds[idx]


def get_metrics(labels, preds):
    th = sensivity_specifity_cutoff(labels, preds)
    bin_preds = np.where(preds > th, True, False)
    tn, fp, fn, tp = confusion_matrix(labels, bin_preds).ravel()
    
    return {'auroc': roc_auc_score(labels, preds),
            'f1_score': f1_score(labels, bin_preds),
            'accuracy': (tp+tn)/(tp+tn+fp+fn),
            'precision': tp/(tp+fp),
            'sensitivity': tp/(tp+fn),
            'specificity': tn/(tn+fp),
            'threshold': th
            }


def tensorboard_logs(writer, epoch_loss, epoch, metrics, phase):
    writer.add_scalar(f"Loss/{phase}", epoch_loss, epoch)
    writer.add_scalar(f"Accuracy/{phase}", metrics['accuracy'], epoch)
    writer.add_scalar(f"F1_score/{phase}", metrics['f1_score'], epoch)
    writer.add_scalar(f"Auroc/{phase}", metrics['auroc'], epoch)
    writer.add_scalar(f"Sensitivity/{phase}", metrics['sensitivity'], epoch)
    writer.add_scalar(f"Specificity/{phase}", metrics['specificity'], epoch)
    writer.add_scalar(f"Precision/{phase}", metrics['precision'], epoch)

In [3]:
settings = {
    'activation': 'leaky_relu',
    'dropout': 0.2,
    'fc_dims': (512, 512),
    'freeze_weights': False,
    'backbone': 'resnet18',
    'pretrained': True,
    'criterion': 'bce_with_logits',
    'lr': 0.0001,
    'optim': 'sgd',
    'momentum': 0.9,
    'lr_scheduler': 'steplr_sz_10_g_0.1',
    'n_epochs': 3,
    'experiment_name': 'resnet18',
    'transforms': False
}


# CHANGEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE
dataset_arguments = {
    'extract_patches': False, 'delete_previous': False,
    'extract_patches_method': 'all', 'patch_size': 224, 'stride': 100,
    'min_breast_fraction_roi': 0.5, 'n_jobs': -1, 'cropped_imgs': True,
    'ignore_diameter_px': 15, 'patch_images_path': Path('/home/vzalevskyi/projects/data_rois/') # FOR GDRIVE '/home/data_rois/'
}
# CHANGEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE

settings['dataset_arguments'] = dataset_arguments
settings['neg_to_pos_ratio'] = 1

image_datasets = {
    'train': INBreast_Dataset_pytorch(
        partitions=['train'],
        neg_to_pos_ratio=settings['neg_to_pos_ratio'],
        balancing_seed=0,
        **dataset_arguments
    ),
    'val': INBreast_Dataset_pytorch(
        partitions=['validation'],
        neg_to_pos_ratio=None,
        **dataset_arguments
    )
}

dataloaders = {
    'val': DataLoader(
        image_datasets['val'],
        batch_size=128,
        num_workers=4,
        drop_last=False
    ),
    'train': DataLoader(
        image_datasets['train'], 
        batch_size=128,
        shuffle=True,
        num_workers=4,
        drop_last=False
    )
}

data_transforms = {
    'train': None,
    'val': None
}

  super(INBreast_Dataset_pytorch, self).__init__(


In [4]:
def train_model(model, criterion, optimizer, scheduler, experiment_name, num_epochs=30):
    
    since = time.time()

    # Guarantee reproducibility
    random.seed(0)
    torch.manual_seed(1442)
    np.random.seed(0)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Holders for best model
    best_model_wts = copy.deepcopy(model.state_dict())
    best_f1 = 0.0

    # Tensorboard loggs
    log_dir = \
        Path.cwd().parent.parent/f'data/deepl_runs/{experiment_name}/tensorboard'
    log_dir.mkdir(exist_ok=True, parents=True)
    writer = SummaryWriter(log_dir=log_dir)

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        image_datasets['train'].update_sample_used(epoch)
        dataloaders['train'] = DataLoader(
            image_datasets['train'], 
            batch_size=16,
            shuffle=True,
            num_workers=4,
            drop_last=False
        )

        dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
        
        for phase in ['train', 'val']:
            if phase == 'train':
                if epoch != 0:
                    scheduler.step()
                writer.add_scalar(
                    f"LearningRate/{phase}", scheduler.get_last_lr()[0], epoch)
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            # Holders for losses, preds and labels
            running_loss = 0.0
            epoch_preds = []
            epoch_labels = []

            # Iterate over data.
            for it, sample in tqdm(
                  enumerate(dataloaders[phase]), total=len(dataloaders[phase])):
                
                # WE WANT TO TRANSFORMATIONS!!
                # Apply transformations and send to device
                # sample['img'] = data_transforms[phase](sample['img'])
                
                
                inputs = sample['img'].to(device)
                labels = sample['label'].to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward pass
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    epoch_preds.append(np.asarray(
                        torch.sigmoid(outputs.detach()).flatten().cpu()))
                    epoch_labels.append(np.asarray(labels.detach().cpu()))
                    
                    loss = criterion(outputs.flatten(), labels.float())
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                if it in [25, 50, 100]:
                    writer.add_images(
                        f'Images/{phase}', sample['img'].cpu(), epoch)
                # Get the loss itertively
                running_loss += loss.item() * inputs.size(0)

            # Compute the metrics for the epoch
            epoch_preds = np.concatenate(epoch_preds)
            epoch_labels = np.concatenate(epoch_labels)
            epoch_loss = running_loss / len(epoch_preds)
            
            metrics = get_metrics(epoch_labels, epoch_preds)
            tensorboard_logs(writer, epoch_loss, epoch, metrics, phase)
            
            epoch_acc = metrics['accuracy']
            epoch_f1 = metrics['f1_score']
            epoch_auroc = metrics['auroc']
            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}' \
                  f' F1: {epoch_f1:.4f} AUROC: {epoch_auroc:.4f}')

            # deep copy the model
            if phase == 'val' and epoch_f1 > best_f1:
                best_f1 = epoch_f1
                best_threshold = metrics['threshold']
                best_model_wts = copy.deepcopy(model.state_dict())
        print()
    time_elapsed = time.time() - since
    print(f'Training complete in {(time_elapsed // 60):.0f}m ' \
          f'{(time_elapsed % 60):.0f}s')
    print(f'Best val F1 score: {best_f1:4f}, threshold {best_threshold}')
    
    writer.flush()
    writer.close()
    
    # load best model weights
    models_path = \
        Path.cwd().parent.parent/f'data/deepl_runs/{experiment_name}'
    models_path.mkdir(exist_ok=True, parents=True)
    with open(str(models_path/f'{experiment_name}.p'), 'wb') as f:
        pickle.dump(best_model_wts, f)

    model.load_state_dict(best_model_wts)
    return model

In [9]:
# hyperparameters
import torch.nn as nn
from collections import OrderedDict

# Parameters of newly constructed modules have requires_grad=True by default
class CNNClasssifier:
    def __init__(
        self,
        activation: nn.Module = nn.LeakyReLU(),
        dropout: float = 0.5,
        fc_dims: tuple = (512, 512),
        freeze_weights: bool = False,
        backbone: str = 'resnet18',
        pretrained: bool = True,
    ):
        self.model = getattr(models, backbone)
        if pretrained:
            self.model = self.model(pretrained=pretrained)
        else:
            self.model = self.model()

        if freeze_weights:
            for param in self.model.parameters():
                param.requires_grad = False

        if hasattr(self.model, 'fc'):
            n_inputs = self.model.fc.in_features
        else:
            n_inputs = self.model.classifier[0].in_features
        classifier = nn.Sequential(OrderedDict([
            ('fc1', nn.Linear(n_inputs, fc_dims[0])),
            ('act1', activation),
            ('do1', nn.Dropout(dropout)),
            ('fc2', nn.Linear(fc_dims[0], fc_dims[1])),
            ('act2', activation),
            ('do2', nn.Dropout(dropout)),
            ('fc3', nn.Linear(fc_dims[1], 1))
        ]))

        if hasattr(self.model, 'fc'):
          self.model.fc = classifier
        else:
          self.model.classifier = classifier

        self.model.apply(self.initialize_weights)

    @staticmethod
    def initialize_weights(m):
        # if isinstance(m, nn.Conv2d):
        #     nn.init.kaiming_uniform_(m.weight.data,nonlinearity='relu')
        #     if m.bias is not None:
        #         nn.init.constant_(m.bias.data, 0)
        # elif isinstance(m, nn.BatchNorm2d):
        #     nn.init.constant_(m.weight.data, 1)
        #     nn.init.constant_(m.bias.data, 0)
        if isinstance(m, nn.Linear):
            nn.init.kaiming_uniform_(m.weight.data)
            nn.init.constant_(m.bias.data, 0)

In [None]:
settings['experiment_name'] = 'resnet_overfit_02'

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

activation = nn.LeakyReLU() if settings['activation'] == 'leaky_relu' else None

cnn = CNNClasssifier(
    activation=activation,
    dropout=settings['dropout'],
    fc_dims=settings['fc_dims'],
    freeze_weights=settings['freeze_weights'],
    backbone=settings['backbone'],
    pretrained=settings['pretrained'],
)

model = cnn.model.to(device)

criterion = nn.BCEWithLogitsLoss()

# Observe that only parameters of final layer are being optimized as
# opoosed to before.
optimizer_conv = optim.SGD(
    model.parameters(), lr=settings['lr'], momentum=settings['momentum'])
# Decay LR by a factor of 0.1 every 10 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=10, gamma=0.1)

model_ft = train_model(model, criterion, optimizer_conv, exp_lr_scheduler,
                       settings['experiment_name'], settings['n_epochs'])

store_path = \
    Path.cwd().parent.parent/f'data/deepl_runs/{settings["experiment_name"]}'
with open(store_path/'cofig.p', 'wb') as f:
    pickle.dump(settings, f)

Epoch 1/3
----------


  1%|          | 3/547 [00:05<15:13,  1.68s/it]


KeyboardInterrupt: 

In [7]:
# Iterate over data.
pos_imgs = 0
tot_imgs = 0
for it, sample in tqdm(enumerate(dataloaders['train']), total=len(dataloaders['train'])):
    
    # print(sample.keys())
    # print(sample['label'])
    # print(sample['img'][0].shape)
    
    # break
    pos_imgs+=sample['label'].sum()
    tot_imgs+=len(sample['label'])

100%|██████████| 137/137 [00:05<00:00, 25.18it/s]


In [None]:
%load_ext tensorboard
%tensorboard --logdir /content/drive/MyDrive/calcification-detection-project/calcification_detecion/data/deepl_runs/resnet18/tensorboard 