In [1]:
%%capture
!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1X5liSPWmNcPKbAvQGlrpA8aBOjJV8dY6' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1X5liSPWmNcPKbAvQGlrpA8aBOjJV8dY6" -O paper_data.zip && rm -rf /tmp/cookies.txt
!unzip paper_data.zip
!rm -rf paper_data.zip

In [2]:
!pip install --upgrade mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.10.0/index.html
!pip install transformers torchinfo

Looking in links: https://download.openmmlab.com/mmcv/dist/cu111/torch1.10.0/index.html
Collecting mmcv-full
  Downloading https://download.openmmlab.com/mmcv/dist/cu111/torch1.10.0/mmcv_full-1.4.0-cp37-cp37m-manylinux1_x86_64.whl (58.0 MB)
[K     |████████████████████████████████| 58.0 MB 11.3 MB/s 
[?25hCollecting addict
  Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)
Collecting yapf
  Downloading yapf-0.31.0-py2.py3-none-any.whl (185 kB)
[K     |████████████████████████████████| 185 kB 5.0 MB/s 
Installing collected packages: yapf, addict, mmcv-full
Successfully installed addict-2.4.0 mmcv-full-1.4.0 yapf-0.31.0
Collecting transformers
  Downloading transformers-4.13.0-py3-none-any.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 5.0 MB/s 
[?25hCollecting torchinfo
  Downloading torchinfo-1.5.4-py3-none-any.whl (19 kB)
Collecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 57.2 MB/s 
[

In [3]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import cv2
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torch.nn import functional as F
import sys
from upper_model import UpperModel
import warnings
warnings.filterwarnings('ignore')

# Transformers
from transformers import AdamW, get_linear_schedule_with_warmup, get_cosine_with_hard_restarts_schedule_with_warmup

# Pytorch
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, random_split
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
from torchinfo import summary

# Others
import os
import glob
import math
import random
from tqdm.notebook import tqdm
import albumentations as A
from PIL import Image
import cv2
# Make computations repeatable
RANDOM_SEED = 42
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed_all(RANDOM_SEED)

No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'


In [4]:
def dice_score(y_true, y_pred):
    return torch.sum(y_pred[y_true==1])*2.0 / (torch.sum(y_pred) + torch.sum(y_true))

class PaperDataset(Dataset):
    def __init__(self,masks, f=0.25, input_size=1024):
        self.path = 'data/train/images/'
        self.images= os.listdir(self.path)
        self.masks = np.load(masks)
        self.data_len = len(self.images)
        self.f = f
        self.input_size= input_size
        self.to_tensor = transforms.ToTensor()
        self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    def __len__(self):
        return self.data_len

    def __getitem__(self, index):
        image = cv2.cvtColor(cv2.imread(self.path + self.images[index]), cv2.COLOR_BGR2RGB)
        mask = self.masks[self.images[index]].astype(np.float)
        image = cv2.resize(image, (0, 0), fx=self.f, fy=self.f)
        mask = cv2.resize(mask, (0, 0), fx=self.f, fy=self.f)
        image = cv2.blur(image, (5,5))
        image = cv2.copyMakeBorder(image, top=0, bottom=self.input_size - image.shape[0], left=0, right=self.input_size - image.shape[1], borderType=0)
        mask = cv2.copyMakeBorder(mask, top=0, bottom=self.input_size - mask.shape[0], left=0, right=self.input_size - mask.shape[1], borderType=0)
        image = self.to_tensor(image)
        image = self.normalize(image)
        mask = self.to_tensor(mask)
        mask = (mask == 1).float()
        return {
            'pixel_values': torch.squeeze(image),
            'segmentations': mask
        }

BATCH_SIZE = 2
NUM_WORKERS = 1

# Create and split dataset to train and val
dataset = PaperDataset('data/train/binary.npz', f=0.25, input_size=1024)
train_size = int(len(dataset) * 0.8)
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=False)
print(next(iter(train_dataloader)))

{'pixel_values': tensor([[[[ 0.9646,  0.9646,  0.9646,  ..., -2.1179, -2.1179, -2.1179],
          [ 0.9646,  0.9646,  0.9646,  ..., -2.1179, -2.1179, -2.1179],
          [ 0.9646,  0.9817,  0.9646,  ..., -2.1179, -2.1179, -2.1179],
          ...,
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
          [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179]],

         [[ 1.1155,  1.1155,  1.1155,  ..., -2.0357, -2.0357, -2.0357],
          [ 1.1155,  1.1155,  1.1155,  ..., -2.0357, -2.0357, -2.0357],
          [ 1.1155,  1.1331,  1.1155,  ..., -2.0357, -2.0357, -2.0357],
          ...,
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
          [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357]],

         [[ 1.1237,  1.1237,  1.1237,  ..., -1.8044, -1.8044, -1.8044],
          [ 1

In [5]:
# Create loss classes
class DiceLoss(nn.Module):
    def __init__(self, weight=None, size_average=True):
        super(DiceLoss, self).__init__()

    def forward(self, inputs, targets, smooth=1):
        
        inputs = F.sigmoid(inputs)       
        
        inputs = inputs.view(-1)
        targets = targets.view(-1)
        
        intersection = (inputs * targets).sum()                            
        dice = (2.*intersection + smooth)/(inputs.sum() + targets.sum() + smooth)  
        
        return 1 - dice

class BCEDiceLoss(nn.Module):
    def __init__(self, bce, dice):
        super(BCEDiceLoss, self).__init__()
        self.bce = bce
        self.dice = dice
    
    def forward(self, inputs, targets):
        return self.bce(inputs, targets) + self.dice(inputs, targets)

In [6]:
def train_epoch(model, dataloader, loss_fn, optimizer, scheduler, device, writer=None, epoch_index=0):
    # Tracking variables.
    losses = []

    # Put the model into training mode.
    model.train()

    # For each batch of training data...
    for batch_index, batch in enumerate(tqdm(dataloader, total=len(dataloader), desc="Training on batches")):
        global_batch_index = epoch_index * len(dataloader) + batch_index # Global step index

        pixel_values = batch['pixel_values'].to(device) # Pixel values
        segmentations = batch['segmentations'].to(device) # Segmentation
        
        # Forward
        outputs = model(pixel_values)
        loss = loss_fn(outputs, segmentations)
        losses.append(loss.item())

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) # Clip the norm of gradient to prevent gradient expolosion
        optimizer.step() # Update weights
        scheduler.step() # Update the learning rate.

        # Write loss per batch to tensorboard
        if writer is not None:
            writer.add_scalar('Loss/train (per batch)', loss.item(), global_batch_index)

    return np.mean(losses)


def val_epoch(model, dataloader, loss_fn, device, writer=None, epoch_index=0):
    # Tracking variables.
    losses = []
    metrics = []

    # Put the model into evaluation mode.
    model.eval()

    # For each batch of training data...
    with torch.no_grad():
        for batch_index, batch in enumerate(tqdm(dataloader, total=len(dataloader), desc="Validation on batches")):
            global_batch_index = epoch_index * len(dataloader) + batch_index # Global step index

            pixel_values = batch['pixel_values'].to(device) # Pixel values
            segmentations = batch['segmentations'].to(device) # Segmentation
            
            # Forward
            outputs = model(pixel_values)
            loss = loss_fn(outputs, segmentations)

            # Compute metric
            outputs = torch.sigmoid(outputs)
            outputs = (outputs > 0.5).float()
            metric = dice_score(segmentations, outputs)

            losses.append(loss.item())
            metrics.append(metric.item())

            # Write to tensorboard
            if writer is not None:
                writer.add_scalar('Loss/val (per batch)', loss.item(), global_batch_index)
                writer.add_scalar('Dice/val (per batch)', metric.item(), global_batch_index)

    return np.mean(losses), np.mean(metrics)

In [7]:
from resnet_backbone import ResNet
from uper_head import UPerHead
from mmcv_custom import load_checkpoint

# Set params
device = "cuda" if torch.cuda.is_available() else "cpu"
EPOCHS = 10
LEARNING_RATE = 1e-4

# Create main model
model = UpperModel()

# Create backbone
resnet_model = ResNet(depth=50,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        dilations=(1, 1, 1, 1),
        strides=(1, 2, 2, 2),
        norm_eval=False,
        contract_dilation=True)
load_checkpoint(resnet_model, 'https://download.pytorch.org/models/resnet50-0676ba61.pth')

# Set backbone to main model
model.backbone = resnet_model

# Create head
uper_head = UPerHead(in_channels=[256, 512, 1024, 2048],
        in_index=[0, 1, 2, 3],
        pool_scales=(1, 2, 3, 6),
        channels=512,
        dropout_ratio=0.1,
        num_classes=1,
        align_corners=False,)

# Set head to main model
model.head = uper_head

model.to(device)
model.train()

# Loss and optimizer
bce_loss = nn.BCEWithLogitsLoss()
dice_loss = DiceLoss()
criterion = BCEDiceLoss(bce_loss, dice_loss)
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE)
total_steps = len(train_dataloader) * EPOCHS

scheduler = get_cosine_with_hard_restarts_schedule_with_warmup(
    optimizer, 
    num_warmup_steps = len(train_dataloader),
    num_training_steps = total_steps
)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

The model and loaded state dict do not match exactly

unexpected key in source state_dict: fc.weight, fc.bias



In [8]:
summary(model)

Layer (type:depth-idx)                        Param #
UpperModel                                    --
├─UPerHead: 1-1                               --
│    └─Conv2d: 2-1                            513
│    └─Dropout2d: 2-2                         --
│    └─PPM: 2-3                               --
│    │    └─Sequential: 3-1                   1,049,088
│    │    └─Sequential: 3-2                   1,049,088
│    │    └─Sequential: 3-3                   1,049,088
│    │    └─Sequential: 3-4                   1,049,088
│    └─ConvModule: 2-4                        --
│    │    └─Conv2d: 3-5                       18,874,880
│    │    └─ReLU: 3-6                         --
│    └─ModuleList: 2-5                        --
│    │    └─ConvModule: 3-7                   131,584
│    │    └─ConvModule: 3-8                   262,656
│    │    └─ConvModule: 3-9                   524,800
│    └─ModuleList: 2-6                        --
│    │    └─ConvModule: 3-10                  2,359,808
│    

In [9]:
max_dice = 0
# Loop through each epoch.
os.mkdir('tensorboard')
writer = SummaryWriter(log_dir='tensorboard')
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=False)
for epoch in tqdm(range(EPOCHS), desc="Epoch"):
    # Perform one full pass over the training and validation sets
    train_loss = train_epoch(model, train_dataloader, criterion, optimizer, scheduler, device, writer, epoch)
    val_loss, val_metric = val_epoch(model, val_dataloader, criterion, device, writer, epoch)

    # Populate tensorboard
    writer.add_scalar('Loss/train (per epoch)', train_loss, epoch)
    writer.add_scalar('Loss/val (per epoch)',val_loss, epoch)
    writer.add_scalar('Dice/val (per epoch)',val_metric, epoch)

    # Print loss and accuracy values to see how training evolves.
    print(f'epoch: {epoch} - train_loss: {train_loss:.5f} - val_loss: {val_loss:.5f} - dice: {val_metric:.5f}\n')

    # Save checkpoint
    if val_metric > max_dice:
        torch.save(model, f"upper_model_blur.pt")
        max_dice = val_metric

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Training on batches:   0%|          | 0/523 [00:00<?, ?it/s]

KeyboardInterrupt: ignored