In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import numexpr as ne
import time
import copy
import sys

from tqdm import tqdm
from sklearn.metrics import accuracy_score, log_loss

import torch
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from torch.autograd import Variable

import matplotlib.pyplot as plt
%matplotlib inline

sys.path.append('../pytorch_utils/')
from utils import train, evaluate, get_data

In [3]:
import torch.cuda
torch.cuda.is_available()

True

In [4]:
torch.backends.cudnn.benchmark = True

# Load data

In [5]:
%%time
X_train, X_val, Y_train, Y_val = get_data()
train_size = len(X_train)
val_size = len(X_val)
print(train_size, val_size)

51200 5120
CPU times: user 1min 32s, sys: 10.3 s, total: 1min 42s
Wall time: 40.8 s


# Create iterators

In [6]:
val_data = TensorDataset(
    torch.FloatTensor(X_val), 
    torch.LongTensor(Y_val)
)

val_iterator = DataLoader(
    val_data, batch_size=64, shuffle=True, pin_memory=True
)

In [7]:
batch_size = 32

In [8]:
train_data = TensorDataset(
    torch.FloatTensor(X_train), 
    torch.LongTensor(Y_train)
)

train_iterator = DataLoader(
    train_data, batch_size=batch_size, shuffle=True, pin_memory=True
)

# Model

In [4]:
from model_resnet import make_model

In [29]:
model, criterion, optimizer = make_model()

[Parameter containing:
-0.2148
-0.1636
-0.2749
-0.1467
-0.1608
-0.1995
-0.2596
-0.1743
-0.1222
-0.2430
-0.1613
-0.1688
-0.2409
-0.2399
-0.1097
-0.1969
-0.1877
-0.2348
-0.2558
-0.1767
-0.2420
-0.1562
-0.2178
-0.1377
-0.2279
-0.1621
-0.2826
-0.2409
-0.2471
-0.1553
-0.2104
-0.2859
-0.1964
-0.0832
-0.2393
-0.2351
-0.1945
 0.0154
-0.1195
-0.2926
-0.2291
-0.1674
-0.2343
-0.0912
-0.0635
-0.2006
-0.1643
-0.1642
-0.2303
-0.1797
-0.2385
-0.2117
-0.1462
-0.0478
-0.2154
-0.1721
-0.2460
-0.1025
-0.1626
-0.2145
-0.1511
-0.1798
-0.1978
-0.1921
-0.0616
-0.0715
-0.1550
-0.2173
-0.1934
 0.0743
-0.1892
-0.2030
-0.2502
-0.2361
-0.1666
-0.1925
-0.1652
-0.1988
-0.1764
-0.2844
-0.2176
-0.2331
-0.3477
-0.2777
-0.0526
-0.2451
-0.2385
-0.1878
-0.1923
-0.0891
-0.2415
-0.2228
-0.1493
-0.2207
-0.1265
-0.1885
-0.3001
-0.2414
-0.1787
-0.2106
-0.1632
-0.1710
-0.1726
-0.1987
-0.2172
-0.3946
-0.2690
-0.1780
-0.2089
-0.2232
-0.2667
-0.1740
-0.0994
-0.2388
-0.2100
-0.1970
-0.2012
-0.1395
-0.1594
-0.1694
-0.1571
-0.2167
-

# Train

In [13]:
n_epochs = 6
validation_step = 200
n_batches = int(np.ceil(train_size/batch_size))
M = 3
T = n_batches*n_epochs
initial = 0.01
n_batches

1600

In [14]:
def lr_scheduler(optimizer, step):
    
    global initial
    decay = np.cos(np.pi*((step - 1) % (T // M))/(T // M)) + 1.0
    
    for param_group in optimizer.param_groups:
        param_group['lr'] = initial*decay/2.0
    
    if (step - 1) % (T // M) == 0 and step != 1:
        initial *= 0.5
        print('lr is reset:', initial)
        
    return optimizer

In [15]:
all_losses = []
all_models = []

In [16]:
%%time
running_loss = 0.0
running_accuracy = 0.0 
start = time.time()
model.train()

for epoch in range(0, n_epochs):
    for step, (x_batch, y_batch) in enumerate(train_iterator, 1 + epoch*n_batches):
        
        optimizer = lr_scheduler(optimizer, step)
        batch_loss, batch_accuracy = train(
            model, criterion, optimizer, 
            x_batch, y_batch
        )
        running_loss += batch_loss
        running_accuracy += batch_accuracy
        
        if step % validation_step == 0:
            model.eval()
            test_loss, test_accuracy = evaluate(
                model, criterion, val_iterator
            )
            end = time.time()
            
            print('{0:.2f}  {1:.3f} {2:.3f}  {3:.3f} {4:.3f}  {5:.3f}'.format(
                step/n_batches, running_loss/validation_step, test_loss, 
                running_accuracy/validation_step, test_accuracy, end - start
            ))
            all_losses += [(
                step/n_batches,
                running_loss/validation_step, test_loss, 
                running_accuracy/validation_step, test_accuracy
            )] 
            
            running_loss = 0.0
            running_accuracy = 0.0 
            start = time.time()
            model.train()
            
        if step % (T // M) == 0:
            
            print('saving')
            model.cpu()
            clone = copy.deepcopy(model)
            all_models += [clone.state_dict()]
            model.cuda()

0.12  4.340 3.049  0.281 0.561  63.932
0.25  2.476 1.885  0.662 0.702  60.687
0.38  1.654 1.436  0.783 0.772  60.336
0.50  1.325 1.209  0.815 0.791  60.031
0.62  1.120 1.168  0.841 0.792  60.172
0.75  1.017 1.047  0.857 0.812  60.272
0.88  0.953 1.025  0.862 0.806  60.225
1.00  0.899 0.963  0.872 0.818  60.218
1.12  0.795 0.981  0.895 0.822  60.253
1.25  0.806 0.923  0.897 0.833  60.296
1.38  0.763 0.932  0.900 0.828  60.262
1.50  0.765 0.934  0.905 0.825  60.152
1.62  0.756 0.906  0.904 0.831  60.323
1.75  0.745 0.869  0.910 0.847  59.993
1.88  0.762 0.916  0.902 0.834  59.966
2.00  0.771 0.899  0.896 0.839  60.261
saving
lr is reset: 0.005
2.12  0.748 0.920  0.905 0.832  60.254
2.25  0.737 0.897  0.908 0.833  60.255


KeyboardInterrupt: 

# Loss/epoch

In [None]:
epochs = [x[0] for x in all_losses]
plt.plot(epochs, [x[1] for x in all_losses], label='train');
plt.plot(epochs, [x[2] for x in all_losses], label='test');
plt.legend();
plt.xlabel('epoch');
plt.ylabel('loss');

In [None]:
plt.plot(epochs, [x[3] for x in all_losses], label='train');
plt.plot(epochs, [x[4] for x in all_losses], label='test');
plt.legend();
plt.xlabel('epoch');
plt.ylabel('accuracy');

# Predict val. set

In [None]:
val_predictions = []

In [None]:
val_iterator_no_shuffle = DataLoader(
    val_data, batch_size=32, shuffle=False, pin_memory=True
)

In [None]:
%%time
# model.load_state_dict(all_models[-1])
model.eval()

for x_batch, _ in val_iterator_no_shuffle:

    x_batch = Variable(x_batch.cuda(), volatile=True)
    logits = model(x_batch)

    # compute probabilities
    probs = F.softmax(logits) 
    val_predictions += [probs.cpu().data.numpy()]

In [None]:
val_predictions = np.concatenate(val_predictions, axis=0)

In [None]:
accuracy_score(Y_val, val_predictions.argmax(1))

In [None]:
log_loss(Y_val, val_predictions)

# Try ensemble

In [None]:
ensemble_predictions = {}

In [None]:
%%time
for i, state in enumerate(all_models):
    
    model.load_state_dict(state)
    ensemble_predictions[i] = []
    model.eval()
    
    for x_batch, _ in val_iterator_no_shuffle:

        x_batch = Variable(x_batch.cuda(), volatile=True)
        logits = model(x_batch)

        # compute probabilities
        probs = F.softmax(logits) 
        ensemble_predictions[i] += [probs.cpu().data.numpy()]

In [None]:
ensemble_predictions = {
    p: np.concatenate(ensemble_predictions[p], axis=0) 
    for p in ensemble_predictions
}

In [None]:
predictions = np.stack(ensemble_predictions.values()).mean(0)

In [None]:
accuracy_score(Y_val, predictions.argmax(1))

In [None]:
log_loss(Y_val, predictions)

# Error analysis

In [None]:
erroneous = Y_val != predictions.argmax(1)
n_errors = len(Y_val[erroneous])
n_errors

In [None]:
to_show = np.random.choice(np.arange(0, n_errors), size=30, replace=False)

In [None]:
pictures = X_val[erroneous][to_show].copy()
pictures_predictions = predictions.argmax(1)[erroneous][to_show]
pictures_probs = predictions.max(1)[erroneous][to_show]
pictures_true = Y_val[erroneous][to_show]

In [None]:
mean = np.array([0.485, 0.456, 0.406], dtype='float32')
std = np.array([0.229, 0.224, 0.225], dtype='float32')
decode = np.load('../utils/decode.npy')[()]

In [None]:
pictures = np.transpose(pictures, axes=(0, 2, 3, 1))
ne.evaluate('pictures*std', out=pictures);
ne.evaluate('pictures + mean', out=pictures);

In [None]:
# show pictures, predicted classes and probabilities
_, axes = plt.subplots(nrows=6, ncols=5, figsize=(14, 16))
axes = axes.flatten()
for i, pic in enumerate(pictures):
    axes[i].set_axis_off();
    axes[i].imshow(pic);
    title = decode[picture_predictions[i]] + ' ' +\
        str(picture_probs[i]) + '\ntrue: ' + decode[picture_true[i]]
    axes[i].set_title(title);
plt.tight_layout()

# Test set predictions

In [None]:
test_images = np.load('test_images.npy')
test_images = test_images.astype('float32')
f255 = np.array([255.0], dtype='float32')
ne.evaluate('test_images/f255', out=test_images);

In [None]:
ne.evaluate('test_images - mean', out=test_images);
ne.evaluate('test_images/std', out=test_images);

test_images = np.transpose(test_images, axes=(0, 3, 1, 2))

In [None]:
test_size = len(test_images)

In [None]:
test_data = TensorDataset(
    torch.FloatTensor(test_images), 
    torch.LongTensor(np.zeros((test_size,), dtype='int64')) # filler
)

test_loader = DataLoader(
    test_data, batch_size=512, shuffle=False
)

In [None]:
%%time
test_ensemble_predictions = {}

for i, state in enumerate(all_models):
    
    model.load_state_dict(state)
    test_ensemble_predictions[i] = []
    
    for x_batch, _ in test_loader:

        x_batch = Variable(x_batch.cuda(), volatile=True)
        logits = model(x_batch)

        # compute probabilities
        probs = F.softmax(logits) 
        test_ensemble_predictions[i] += [probs.cpu().data.numpy()]

In [None]:
test_ensemble_predictions = {
    p: np.concatenate(test_ensemble_predictions[p], axis=0) 
    for p in test_ensemble_predictions
}

test_predictions = np.stack(test_ensemble_predictions.values()).mean(0)

In [None]:
np.save('test_classes.npy', test_predictions.argmax(1))

# Save the model

In [None]:
for i, m in enumerate(all_models):
    torch.save(m, 'model_state' + str(i) + '.pytorch')