In [None]:
import random
import math
import os
from pathlib import Path

import pandas as pd
import numpy as np

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

# Image augmentation
from albumentations import Compose, Normalize, HorizontalFlip, VerticalFlip
from albumentations.pytorch import ToTensorV2

from skimage import io
from sklearn.metrics import cohen_kappa_score
from sklearn.model_selection import train_test_split

from tqdm import tqdm_notebook


In [None]:
# remove this cell if run locally
# This takes the pretrained model into a location pytorch checks when we call torch.hub.load(...)
!mkdir 'cache'
!mkdir 'cache/torch'
!mkdir 'cache/torch/checkpoints'
!cp '../input/pytorch-pretrained-models/semi_supervised_resnext50_32x4-ddb3e555.pth' 'cache/torch/checkpoints/'
torch.hub.DEFAULT_CACHE_DIR = '/cache'

In [None]:
torch.hub.DEFAULT_CACHE_DIR

<h3>This way we can call models and model inputs <>.to(device) and have it work regardless if on cpu or gpu</h3>

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
def debug_memory():
    import collections, gc, resource, torch
    print('maxrss = {}'.format(
        resource.getrusage(resource.RUSAGE_SELF).ru_maxrss))
    tensors = collections.Counter((str(o.device), o.dtype, tuple(o.shape))
                                  for o in gc.get_objects()
                                  if torch.is_tensor(o))
    for line in tensors.items():
        print('{}\t{}'.format(*line))

<h3>Seen a lot of people on Kaggle set all seeds in one place </h3>

In [None]:
def seed_torch(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

In [None]:
seed_torch(9)

In [None]:
img_size = 128
batch_size = 16
N = 10  # number of tiles per image

# Need to change if putting onto a Kaggle kernel?
TILES = '/kaggle/input/panda-16x128x128-tiles-data/train/'
LABELS = '/kaggle/input/prostate-cancer-grade-assessment/train.csv'

In [None]:
data = pd.read_csv(LABELS).set_index('image_id')

In [None]:
data.shape

In [None]:
data.head()

<font size="3"> Should check at some point if test set has similar distribution of target labels. Thought I saw in paper that test set was more heavily biased towards Grade 5 images...</>

In [None]:
data.isup_grade.value_counts()

## Dataset Construction

Only consider images we have processed and stored as tiles in TRAIN folder

In [None]:
image_ids = {filepath[:32] for filepath in os.listdir(TILES)}

In [None]:
data = data.loc[image_ids]
data.reset_index(inplace=True)

Lose about 100 images (from IAFoss pre-processing, he only used images with masks)

In [None]:
data.shape

In [None]:
train, valid = train_test_split(data, test_size=0.3, random_state=9)

In [None]:
train.reset_index(inplace=True, drop=True)
valid.reset_index(inplace=True, drop=True)

In [None]:
train.head()

In [None]:
class TileTrainDataSet(Dataset):
    def __init__(self, df, transform_fn=None):
        self.X = df['image_id']
        self.Y = df['isup_grade']
        self.transform = transform_fn

    def __getitem__(self, idx):
        # Take image id and use the first N tiles (all have the same target label)
        img_id = self.X[idx]
        imgs = []
        for i in range(N):
            img = io.imread(os.path.join(TILES,img_id+f"_{i}.png"))
            
            if self.transform:
                augmented = self.transform(image=img)
                img = augmented['image']
            imgs.append(img)
        # Final shape is x:  N x 3 x 128 x 128, y: 1
        x = torch.stack(imgs)
        return x, self.Y[idx]

    def __len__(self):
        return len(self.Y)

In [None]:
def img_transforms(*, partition):
    
    assert partition in ('train', 'valid')
    
    if partition == 'train':
        return Compose([
            HorizontalFlip(p=0.5),  # 50/50 chance of performing horizontal flip
            VerticalFlip(p=0.5),
            # Normalize images according to ResNext specifications
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])
    
    elif partition == 'valid':
        # Don't flip validation data 
        return Compose([
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

In [None]:
train_ds = TileTrainDataSet(train, transform_fn=img_transforms(partition='train'))

For a set of tiles we have just one label (as the tiles will be concatenated in later layers of the network)

In [None]:
train_ds[0][1]

Each tile is a 128x128 rgb (3-channel) image

In [None]:
train_ds[0][0].shape

In [None]:
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, drop_last=True)

In [None]:
batch = next(iter(train_dl))

In [None]:
batch[1].shape

In [None]:
batch[1].unsqueeze(1).shape

In [None]:
shape = batch[0].shape

In [None]:
shape

Our strategy is to treat each tile as independent. Pass it into the network individually (not as a single composite image of tiles). <br>So we reshape from batch_size x N to (batch_size*N)

In [None]:
batch[0].view(-1, shape[2], shape[3], shape[4]).shape

In [None]:
valid_ds = TileTrainDataSet(valid, transform_fn=img_transforms(partition='valid'))

In [None]:
len(valid_ds)

In [None]:
valid_dl = DataLoader(valid_ds, batch_size=batch_size, shuffle=True)

## Model

In [None]:
class Model(nn.Module):
    # n=6 represents number of label classes, give better name. 
    # Except for now doing regression instead of classification
    def __init__(self, arch='resnext50_32x4d_ssl', n=6, pre=True):
        super().__init__()
        m = torch.hub.load('facebookresearch/semi-supervised-ImageNet1K-models', arch)
        self.enc = nn.Sequential(*list(m.children())[:-2])  # Remove last two layers from ResNext
        nc = list(m.children())[-1].in_features  # 2048 (last linear layer of resnext50)
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.linear1 = nn.Linear(nc,512)
        self.bn = nn.BatchNorm1d(512)
        self.dropout = nn.Dropout(0.5)
        self.linear2 = nn.Linear(512,1)
                                 
    def forward(self, x):
        # Original shape: bs x N x 3 x 128 x 128
        shape = x.shape
        x = x.view(-1,shape[2],shape[3],shape[4])  # bs*N x 3 x 128 x 128
        # C represents output_size from ResNext
        x = self.enc(x)  # bs*N x C x 4 x 4
        
        shape = x.shape
        # concatenate the output for tiles into a single map
        # Need to do in two steps to 1) Separate batch_size and N, 2) Combine N into outer dimensions 
        # Result: bs x C x N*4 x 4
        x = x.view(-1,N,shape[1],shape[2],shape[3]).permute(0,2,1,3,4).contiguous()\
          .view(-1,shape[1],shape[2]*N,shape[3])  
        
        # With 2-D pooling over size 1, reduces last two dimensions to 1 
        x = self.pool(x)  # bs x C x 1 x 1
        # Flatten last three dimensions (result: bs x C)
        x = self.linear1(torch.flatten(x, start_dim=1))  # bs x 512
        x = self.bn(x)
        x = self.dropout(x)
        x = self.linear2(x)
        # Look at other pre-trained models intended for regression?
        return x


## Train

In [None]:
def train_model(model, optimizer, scheduler, train_dl, epochs,):
    iterations = epochs*len(train_dl)
    pbar = tqdm_notebook(total=iterations)
    best_kappa = 0.0
    for i in range(epochs):
        model.train()
        total_loss = 0
        total = 0

        for img, label in train_dl:
            img = img.to(device)
            label = label.to(device).float().unsqueeze(1)
            out = model(img)
            # some suggest since kappa is a quasi-measure of "distance" from true label, 
            # better to calculate MSE regression loss than classification loss
            loss = F.mse_loss(out, label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += label.size(0)*loss.item()
            total += label.size(0)
            pbar.update()
            
        train_loss = total_loss/total
        
        val_loss, val_kappa = valid_metrics(model, valid_dl)
        scheduler.step(val_loss)
        print(f"\tTrain loss: {train_loss:.3f} \t Valid loss: {val_loss:.3f} \t Valid Kapp:  {val_kappa:.3f}")
        
        if val_kappa > best_kappa:
            best_kappa = val_kappa
            path = f"/kaggle/working/best_model.pth"
            save_model(model, path)

            print(f"Best kappa: {best_kappa:.3f}")
    return best_kappa

In [None]:
def valid_metrics(model, valid_dl):
    model.eval()
    total = 0
    total_loss = 0
    preds = []
    labels = []
    for img, label in valid_dl:
        img = img.to(device)
        batch = label.shape[0]
        out = model(img)
        loss = F.mse_loss(out, label.to(device).float().unsqueeze(1))
        total_loss += batch*(loss.item())
        total += batch
        
        preds.append(out.detach().to('cpu').apply_(threshold).long().numpy())
        labels.append(label.long().unsqueeze(1).numpy())
    
    preds, labels = np.vstack(preds), np.vstack(labels)
    val_loss = total_loss/total
    val_kappa = cohen_kappa_score(preds, labels)
    return val_loss, val_kappa

In [None]:
def threshold(x):
    """
    Our model has a regression loss function, 
    but we need to convert those values to the nearest classification label value (0,1,2,3,4,5) 
    """
    return max(
                min(round(x),5)
            ,0)

In [None]:
def save_model(m, p): torch.save(m.state_dict(), p)
    
def load_model(m, p): m.load_state_dict(torch.load(p))

In [None]:
model = Model()
model.to(device);

In [None]:
load_model(model, '/kaggle/working/best_model.pth')

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [None]:
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=2, verbose=True, eps=1e-6)

In [None]:
train_model(model, optimizer, scheduler, train_dl, epochs=8)