In [1]:
# code based on https://towardsdatascience.com/transfer-learning-with-convolutional-neural-networks-in-pytorch-dd09190245ce
# and https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html
# and https://github.com/yanneta/deep-learning-with-pytorch/blob/master/lesson7-transfer-learning-v0.ipynb

In [2]:
import skimage.io
from tqdm.notebook import tqdm
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from pathlib import Path
from PIL import Image 

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision import models

In [3]:
PATH = Path("../data")

In [None]:
test = pd.read_csv(PATH/'test.csv')
labels = pd.read_csv(PATH/'train_labels_clean.csv')
labels.head(3)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(labels['image_id'].values, 
                                                  labels['isup_grade'].values, test_size=0.2, random_state=42)

In [None]:
def read_file(filename):
    '''return array representing image'''
    return skimage.io.imread(PATH/f'train/{filename}')

In [None]:
def get_mask(filename):
    return skimage.io.imread(PATH/f'masks/{filename}')

In [None]:
class PANDADataset(Dataset):
    def __init__(self, X, y):
        files = []
        for i in range(len(X)):
            files.append(np.concatenate(np.array([read_file(X[i] + '_' + str(j) + '.png') 
                          for j in range(16)])))
        self.x = files
        
        masks = []
        for i in range(len(X)):
            masks.append(np.concatenate(np.array([get_mask(X[i] + '_' + str(j) + '.png') 
                          for j in range(16)])))
        self.y = masks
        
        self.labels = y
    
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx], self.labels[idx]
    
train_ds = PANDADataset(X_train[:50], y_train[:50])
valid_ds = PANDADataset(X_val[:50], y_val[:50])

In [None]:
train_dl = DataLoader(train_ds, batch_size=1, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=1)

In [None]:
x, y, labels = next(iter(train_dl))
x.shape, y.shape, labels.shape

In [None]:
model = model.vgg16(pretrained=True).cuda()

In [None]:
for param in model.parameters():
    param.requires_grad = False

In [None]:
model.classifier[6] = nn.Sequential(
                      nn.Linear(n_inputs, 256), 
                      nn.ReLU(), 
                      nn.Dropout(0.4),
                      nn.Linear(256, n_classes),                   
                      nn.LogSoftmax(dim=1))

In [None]:
criterion = F.cross_entropy()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [None]:
def train(model, criterion, optimizer, n_epochs)
    for epoch in range(n_epochs):
        val_loss = 0
        total = 0
        correct = 0
        for x, y, labels in trainloader:
            # Generate predictions
            out = model(x)
            # Calculate loss
            loss = criterion(out, labels)
            # Backpropagation
            loss.backward()
            # Update model parameters
            optimizer.step()
        for x, y, labels in validloader:
            # Generate predictions 
            out = model(x)
            # Calculate loss
            loss = criterion(out, labels)
            _, pred = torch.max(out, 1)
            correct += pred.eq(labels).sum().item()
            total += len(y)
            val_loss += loss

        # Average validation loss
        val_loss = val_loss / len(trainloader)
        print('val_loss:', val_loss)

In [None]:
train(model, criterion, optimizer, 10)