In [1]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import pandas as pd


# import torchvision
from torchvision import datasets, models, transforms
from albumentations import Compose, ShiftScaleRotate, Resize, Normalize, CenterCrop, HorizontalFlip
from albumentations import Rotate, RandomBrightness, RandomContrast
from albumentations.pytorch import ToTensor

import xception
import util

import time
import os
import copy

In [2]:
# Inputs
dir_train_img = 'data/images/stage_1_train_images_jpg'
dir_test_img = 'data/images/stage_1_test_images_jpg'
dir_model = 'models'
dir_labels = 'data/labels'
dir_submission = 'submissions'

In [4]:
util.train_val_files(pd.read_csv('data/labels/train_filtered.csv'),
                positive_rate=None, test_size=.25, random_state=None)

Successfully created train (len: 476840) and val (len: 158947) sets containing around 15% positives.


In [3]:
# Custom data loader
class IntracranialDataset(Dataset):

    def __init__(self, csv_file, path, labels, transform=None):
        self.path = path
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_name = os.path.join(self.path, self.data.loc[idx, 'ID'] + '.jpg')
        img = mpimg.imread(img_name)
        
        if self.transform:       
            augmented = self.transform(image=img)
            img = augmented['image']   
            
        img = img[None, :, :] # Adds a dimension to the array
        img = np.repeat(img, 3, 0) # Copies the grayscale image to the 3 RGB channels
        
        if self.labels:
            labels = torch.tensor(
                self.data.loc[idx, ['epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural', 'any']])
            return {'image': img, 'labels': labels}    
        else:      
            return {'image': img}

In [4]:
# Parameters
n_classes = 6
n_labels = 6
batch_size = 16

#resize = 333 # Xception
resize = 320 #resnext

data_transforms = {
    'train': Compose([Resize(resize, resize),
                      CenterCrop(299, 299),
                      #HorizontalFlip(p=.2),
                      ShiftScaleRotate(p=.5, rotate_limit=20),
                      #Rotate(p=0.5, limit=20),
                      #RandomBrightness(p=0.5, limit=0.2),
                      #RandomContrast(p=0.5, limit=0.2),
                      ToTensor(),
                      #Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
                     ]),
    'val': Compose([Resize(resize, resize),
                    CenterCrop(299, 299),
                    ToTensor(),
                    #Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
                   ]),
    'test': Compose([Resize(resize, resize),
                    CenterCrop(299, 299),
                    ToTensor(),
                    #Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
                    ])
}

train_dataset = IntracranialDataset(
    csv_file=os.path.join(dir_labels, 'train.csv'),
    path=dir_train_img,
    transform=data_transforms['train'],
    labels=True)

val_dataset = IntracranialDataset(
    csv_file=os.path.join(dir_labels, 'val.csv'),
    path=dir_train_img,
    transform=data_transforms['val'],
    labels=True)

dataloaders = {'train': torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, num_workers=0),
              'val': torch.utils.data.DataLoader(
                  val_dataset, batch_size=batch_size, shuffle=True, num_workers=0)}

dataset_sizes = {'train': len(train_dataset),
                'val': len(val_dataset)}

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [6]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=10, image_max=1000):
    """
    Image max is an optional parameter that can be used to prematurely finish the epoch training phase
    and start the validation phase when the number of images processed is reached.
    """
    
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 1.0
    
    # We give more weight to the error on the "any" label like in the evaluation method for this Kaggle
    label_weights = torch.tensor([0.1, 0.1, 0.1, 0.1, 0.1, 0.2]).to(device)
    sum_weights = torch.sum(label_weights)
    
    # Below x% error in predicted probability we consider the output correct for statistical purposes
    dist_from_target_treshold = .05

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:          
            print('{} phase :'.format(phase))
            if phase == 'train':
                #scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode
            
            running_loss = 0.0
            running_corrects = 0
            running_loss_batch = 0.0
            time_start_batch = time.time()
            time_elapsed_batch = 0.0
            img_processed_batch, img_processed_total = 0, 0
            refresh_every_n_batches = 5
            newline_every_n_refreshes = 100
            
            # Iterate over data
            for i, sample_batch in enumerate(dataloaders[phase]):
                if i % (refresh_every_n_batches * newline_every_n_refreshes) == (
                    refresh_every_n_batches * newline_every_n_refreshes - 1):
                    print()
                    running_loss_batch = 0.0
                    img_processed_batch = 0
                    time_start_batch = time.time()
                
                inputs = sample_batch['image'].to(device)
                labels = sample_batch['labels'].float().to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history only if in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    loss = (loss * label_weights * n_labels / sum_weights).mean()

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_loss_batch += loss.item() * inputs.size(0)
                
                pred_error = torch.abs(torch.sigmoid(outputs) - labels)
                running_corrects += torch.sum(pred_error < dist_from_target_treshold) / n_labels
                
                img_processed_total += batch_size
                img_processed_batch += batch_size

                if i % refresh_every_n_batches == (refresh_every_n_batches - 1):
                    now = time.time()
                    time_elapsed_batch = now - time_start_batch
            
                    print('Image {}/{} ({:.1f}%). Loss: {:.4f}  Time: {:.0f}m {:.0f}s ({:.1f} images/sec)'.format(
                        img_processed_total, dataset_sizes[phase],
                        100*img_processed_total / dataset_sizes[phase],
                        running_loss_batch / img_processed_batch,
                        time_elapsed_batch // 60, time_elapsed_batch % 60,
                        img_processed_batch / time_elapsed_batch), end='\r')
                else:
                    print('Image {}/{}'.format(img_processed_total, dataset_sizes[phase]), end='\r')           
                    
                if image_max is not None:
                    if img_processed_total >= image_max:
                        break
                    if phase == 'val':
                        if img_processed_total >= image_max * .4:
                            break

            epoch_loss = running_loss / img_processed_total
            epoch_acc = running_corrects.double() / img_processed_total

            print()
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            if phase == 'val':
                # Note that step should be called after validate()
                scheduler.step(epoch_loss)
                
            if phase == 'val' and epoch_loss < best_loss:
                best_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())

        time_elapsed = time.time() - since
        print('Time elapsed {:.0f}m {:.0f}s'.format(
            time_elapsed // 60, time_elapsed % 60))
        
        print()
            
    print('Best val loss: {:4f}'.format(best_loss))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, best_loss

In [8]:
# Choosing a model
model_name = 'resnext50_32x4d'

model = models.resnext50_32x4d(pretrained=True, progress=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, n_classes)
model = model.to(device)

# model_name = 'xception'

# model = xception.xception()
# num_ftrs = model.last_linear.in_features
# model.last_linear = nn.Linear(num_ftrs, n_classes)
# model = model.to(device)

In [10]:
criterion = nn.BCEWithLogitsLoss(reduction='none')
plist = [{'params': model.parameters(), 'lr': 2e-5}]
optimizer = optim.Adam(plist)
exp_lr_scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.5)

In [11]:
# Load checkpoint if needed
score = '0.0855' # Manually select the model to load
model_path = os.path.join(dir_model, '{}_{}.tar'.format(model_name, score)) 
util.load_model(model_path, model, optim=None)

In [12]:
# Updates all parameters in the CNN
for param in model.parameters():
    param.requires_grad = True
    
model, loss = train_model(model, criterion, optimizer, exp_lr_scheduler, num_epochs=3, image_max=60000)

# Save model with loss information
util.save_model(model, optimizer, loss, model_name, dir_model)

Epoch 1/3
----------
train phase :
Image 7984/476840 (1.7%). Loss: 0.0801  Time: 8m 26s (15.7 images/sec)
Image 15984/476840 (3.3%). Loss: 0.0788  Time: 8m 23s (15.8 images/sec)
Image 23984/476840 (5.0%). Loss: 0.0781  Time: 8m 22s (15.8 images/sec)
Image 31984/476840 (6.7%). Loss: 0.0758  Time: 8m 21s (15.8 images/sec)
Image 39984/476840 (8.4%). Loss: 0.0775  Time: 8m 20s (15.9 images/sec)
Image 47984/476840 (10.0%). Loss: 0.0738  Time: 8m 20s (15.9 images/sec)
Image 55984/476840 (11.7%). Loss: 0.0778  Time: 8m 20s (15.9 images/sec)
Image 60000/476840 (12.6%). Loss: 0.0788  Time: 4m 13s (15.9 images/sec)
train Loss: 0.0776 Acc: 0.8615
val phase :
Image 7984/158947 (5.0%). Loss: 0.0773  Time: 2m 40s (49.6 images/sec)
Image 15984/158947 (10.0%). Loss: 0.0736  Time: 2m 40s (49.7 images/sec)
Image 23984/158947 (15.0%). Loss: 0.0792  Time: 2m 39s (49.8 images/sec)
Image 24000/158947 (15.1%). Loss: 0.1430  Time: 0m 0s (82.7 images/sec)
val Loss: 0.0766 Acc: 0.8653
Time elapsed 71m 17s

Epoc

In [21]:
def model_inference(model, tta_size=1):
    """
    tta_size is a parameter used to apply test time augmentation on images.
    The value of the parameter (when > 1) is the number of augmentations applied on
    each image.
    """
    
    if tta_size == 1:
        test_transform = data_transforms['test']
    elif tta_size > 1:
        test_transform = data_transforms['train']
    else:
        raise ValueError('Number of folds for test-time augmentation must be >= 1.')
        
    test_dataset = IntracranialDataset(
        csv_file='test.csv', path=dir_test_img, transform=test_transform, labels=False)
    dataloaders['test'] = torch.utils.data.DataLoader(
        test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    dataset_sizes['test'] = len(test_dataset)
    
    for param in model.parameters():
        param.requires_grad = False

    model.eval()

    y_pred = np.zeros((dataset_sizes['test'] * n_classes, tta_size))

    with torch.no_grad():
        for k in range(tta_size):
            for i, sample_batch in enumerate(dataloaders['test']):
                inputs = sample_batch['image'].to(device)
                pred = model(inputs)

                y_pred[(i * batch_size * n_classes):((i + 1) * batch_size * n_classes), k:k+1] = torch.sigmoid(
                    pred).detach().cpu().reshape((len(inputs) * n_classes, 1))

                print('TTA {}: Image {}/{}'.format(k+1, i * batch_size, dataset_sizes['test']), end='\r')

        # Averaging the predictions for the different augmentations of the image tested
        y_pred = y_pred.mean(axis=1)
    print()
    
    return y_pred

In [25]:
test_pred = model_inference(model, tta_size=5)

TTA 5: Image 78544/78545


In [26]:
# Puts the predictions in a file
file_name = '{}_{:.5f}.csv'.format(model_name, loss)
util.create_prediction_csv(test_pred, file_name, dir_submission)