In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import numexpr as ne
import time
import copy
import sys

from tqdm import tqdm
from sklearn.metrics import accuracy_score, log_loss

import torch
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from torch.autograd import Variable

import matplotlib.pyplot as plt
%matplotlib inline

sys.path.append('../pytorch_utils/')
from utils import evaluate, get_data, top5_accuracy, per_class_accuracy, count_params

In [None]:
import torch.cuda
torch.cuda.is_available()

In [None]:
torch.backends.cudnn.benchmark = True

# Load data

In [None]:
%%time
X_train, X_val, Y_train, Y_val = get_data()
train_size = len(X_train)
val_size = len(X_val)
print(train_size, val_size)

# Load logits

In [None]:
train_logits = np.load('/home/ubuntu/data/train_logits.npy')[()]

In [None]:
temperature = 20.0

In [None]:
n_models = len(train_logits)
n_models

In [None]:
soft_targets = torch.zeros(train_size, 256)
for k in train_logits:
    soft_targets += F.softmax(torch.FloatTensor(train_logits[k])/temperature)
soft_targets /= n_models

# Create iterators

In [None]:
from torch.utils.data.dataset import Dataset

In [None]:
class TripleDataset(Dataset):

    def __init__(self, data_tensor, target_tensor, third_tensor):
        assert data_tensor.size(0) == target_tensor.size(0)
        self.data_tensor = data_tensor
        self.target_tensor = target_tensor
        self.third_tensor = third_tensor

    def __getitem__(self, index):
        return self.data_tensor[index], self.target_tensor[index], self.third_tensor[index]

    def __len__(self):
        return self.data_tensor.size(0)

In [None]:
val_data = TensorDataset(
    torch.FloatTensor(X_val), 
    torch.LongTensor(Y_val)
)

val_iterator = DataLoader(
    val_data, batch_size=64, shuffle=True, pin_memory=True
)

In [None]:
batch_size = 32

In [None]:
train_data = TripleDataset(
    torch.FloatTensor(X_train), 
    torch.LongTensor(Y_train),
    soft_targets
)

train_iterator = DataLoader(
    train_data, batch_size=batch_size, shuffle=True, pin_memory=True
)

# Model

In [None]:
sys.path.append('../squeezenet/')
from model_squeezenet import make_model

In [None]:
model, criterion, optimizer = make_model()

In [None]:
count_params(model)

# Train

In [None]:
class soft_targets_logloss(nn.Module):

    def __init__(self):
        super(soft_targets_logloss, self).__init__()
        
    def forward(self, logits, targets):
        x = F.log_softmax(logits)
        return -(targets*x).sum(1).mean(0)

In [None]:
soft_criterion = soft_targets_logloss()
logloss_weight = 0.1

In [None]:
def train(model, criterion, optimizer, x_batch, y_batch, soft_y_batch):

    x_batch = Variable(x_batch.cuda())
    y_batch = Variable(y_batch.cuda(async=True))
    soft_y_batch = Variable(soft_y_batch.cuda(async=True))
    
    logits = model(x_batch)

    # compute logloss
    logloss = criterion(logits, y_batch)
    batch_loss = logloss.data[0]

    # compute accuracy
    pred = F.softmax(logits).max(1)[1]
    batch_accuracy = pred.eq(y_batch).float().mean().data[0]

    # compute loss
    loss = logloss_weight*logloss + soft_criterion(logits/temperature, soft_y_batch)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    return batch_loss, batch_accuracy

In [None]:
n_epochs = 6
validation_step = 200
n_batches = int(np.ceil(train_size/batch_size))
M = 3
T = n_batches*n_epochs
initial = 0.01
n_batches

In [None]:
def lr_scheduler(optimizer, step):
    
    global initial
    decay = np.cos(np.pi*((step - 1) % (T // M))/(T // M)) + 1.0
    
    for param_group in optimizer.param_groups:
        param_group['lr'] = initial*decay/2.0
    
    if (step - 1) % (T // M) == 0 and step != 1:
        initial *= 0.9
        print('lr is reset:', initial)
        
    return optimizer

In [None]:
all_losses = []
all_models = []

In [None]:
%%time
running_loss = 0.0
running_accuracy = 0.0 
start = time.time()
model.train()

for epoch in range(0, n_epochs):
    for step, (x_batch, y_batch, soft_y_batch) in enumerate(train_iterator, 1 + epoch*n_batches):
        
        optimizer = lr_scheduler(optimizer, step)
        batch_loss, batch_accuracy = train(
            model, criterion, optimizer, 
            x_batch, y_batch, soft_y_batch
        )
        running_loss += batch_loss
        running_accuracy += batch_accuracy
        
        if step % validation_step == 0:
            model.eval()
            test_loss, test_accuracy = evaluate(
                model, criterion, val_iterator
            )
            end = time.time()
            
            print('{0:.2f}  {1:.3f} {2:.3f}  {3:.3f} {4:.3f}  {5:.3f}'.format(
                step/n_batches, running_loss/validation_step, test_loss, 
                running_accuracy/validation_step, test_accuracy, end - start
            ))
            all_losses += [(
                step/n_batches,
                running_loss/validation_step, test_loss, 
                running_accuracy/validation_step, test_accuracy
            )] 
            
            running_loss = 0.0
            running_accuracy = 0.0 
            start = time.time()
            model.train()
            
        if step % (T // M) == 0:
            
            print('saving')
            model.cpu()
            clone = copy.deepcopy(model)
            all_models += [clone.state_dict()]
            model.cuda()

# Loss/epoch

In [None]:
epochs = [x[0] for x in all_losses]
plt.plot(epochs, [x[1] for x in all_losses], label='train');
plt.plot(epochs, [x[2] for x in all_losses], label='test');
plt.legend();
plt.xlabel('epoch');
plt.ylabel('loss');

In [None]:
plt.plot(epochs, [x[3] for x in all_losses], label='train');
plt.plot(epochs, [x[4] for x in all_losses], label='test');
plt.legend();
plt.xlabel('epoch');
plt.ylabel('accuracy');

# Predict val. set

In [None]:
val_predictions = []

In [None]:
val_iterator_no_shuffle = DataLoader(
    val_data, batch_size=32, shuffle=False, pin_memory=True
)

In [None]:
%%time
# model.load_state_dict(all_models[-1])
model.eval()

for x_batch, _ in val_iterator_no_shuffle:

    x_batch = Variable(x_batch.cuda(), volatile=True)
    logits = model(x_batch)

    # compute probabilities
    probs = F.softmax(logits) 
    val_predictions += [probs.cpu().data.numpy()]

In [None]:
val_predictions = np.concatenate(val_predictions, axis=0)

In [None]:
accuracy_score(Y_val, val_predictions.argmax(1))

In [None]:
log_loss(Y_val, val_predictions)

# Try ensemble

In [None]:
ensemble_predictions = {}

In [None]:
%%time
for i, state in enumerate(all_models):
    
    model.load_state_dict(state)
    ensemble_predictions[i] = []
    model.eval()
    
    for x_batch, _ in val_iterator_no_shuffle:

        x_batch = Variable(x_batch.cuda(), volatile=True)
        logits = model(x_batch)

        # compute probabilities
        probs = F.softmax(logits) 
        ensemble_predictions[i] += [probs.cpu().data.numpy()]

In [None]:
ensemble_predictions = {
    p: np.concatenate(ensemble_predictions[p], axis=0) 
    for p in ensemble_predictions
}

In [None]:
predictions = np.stack(ensemble_predictions.values()).mean(0)

In [None]:
accuracy_score(Y_val, predictions.argmax(1))

In [None]:
log_loss(Y_val, predictions)

In [None]:
top5_accuracy(Y_val, predictions)

In [None]:
per_class_acc = per_class_accuracy(Y_val, predictions)
per_class_acc

# Error analysis

In [None]:
erroneous = Y_val != predictions.argmax(1)
n_errors = len(Y_val[erroneous])
n_errors

In [None]:
to_show = np.random.choice(np.arange(0, n_errors), size=30, replace=False)

In [None]:
pictures = X_val[erroneous][to_show].copy()
pictures_predictions = predictions.argmax(1)[erroneous][to_show]
pictures_probs = predictions.max(1)[erroneous][to_show]
pictures_true = Y_val[erroneous][to_show]

In [None]:
mean = np.array([0.485, 0.456, 0.406], dtype='float32')
std = np.array([0.229, 0.224, 0.225], dtype='float32')
decode = np.load('../utils/decode.npy')[()]

In [None]:
pictures = np.transpose(pictures, axes=(0, 2, 3, 1))
ne.evaluate('pictures*std', out=pictures);
ne.evaluate('pictures + mean', out=pictures);

In [None]:
# show pictures, predicted classes and probabilities
_, axes = plt.subplots(nrows=6, ncols=5, figsize=(14, 16))
axes = axes.flatten()
for i, pic in enumerate(pictures):
    axes[i].set_axis_off();
    axes[i].imshow(pic);
    title = decode[pictures_predictions[i] + 1] + ' ' +\
        str(pictures_probs[i]) + '\ntrue: ' + decode[pictures_true[i] + 1]
    axes[i].set_title(title);
plt.tight_layout()

# Save the model

In [None]:
for i, m in enumerate(all_models):
    torch.save(m, 'model_state' + str(i) + '.pytorch')