In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
from math import ceil
from sklearn.metrics import accuracy_score, log_loss
import torch
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt
%matplotlib inline

import sys
sys.path.append('../training_utils/')
from data_utils import get_folders, get_class_weights
from train_utils import train, predict
from diagnostic_tools import top_k_accuracy, per_class_accuracy,\
    count_params, entropy, model_calibration, show_errors, most_confused_classes,\
    most_inaccurate_k_classes
    
torch.cuda.is_available()

True

In [3]:
torch.backends.cudnn.benchmark = True

# Create data iterators

In [4]:
batch_size = 32

In [5]:
train_folder, val_folder = get_folders()

train_iterator = DataLoader(
    train_folder, batch_size=batch_size, num_workers=4,
    shuffle=True, pin_memory=True
)

val_iterator = DataLoader(
    val_folder, batch_size=64, num_workers=4,
    shuffle=False, pin_memory=True
)

# number of training samples
train_size = len(train_folder.imgs)
train_size

16980

# Model

In [6]:
from get_resnet18_with_sd import get_model

In [7]:
# w[j]: 1/number_of_samples_in_class_j
# decode: folder name to class name (in human readable format)
w, decode = get_class_weights(val_folder.class_to_idx)

In [8]:
model, criterion, optimizer = get_model(class_weights=torch.FloatTensor(w/w.sum()))

In [9]:
# number of params in the model
count_params(model)

21416000

# Train

In [10]:
# n_epochs = 20
# n_batches = ceil(train_size/batch_size)
# # number of cycles
# M = 1 
# # total number of optimization steps
# T = n_batches*n_epochs 
# # initial learning rates
# initial1 = 1e-2
# initial2 = 1e-3
# n_batches

In [11]:
# # cyclical cosine annealing
# # it changes the learning rate on every optimization step
# # 1e-6 is the minimal learning rate
# def lr_scheduler(optimizer, step):
    
#     global initial1
#     global initial2
#     decay = np.cos(np.pi*((step - 1) % (T // M))/(T // M)) + 1.0
    
#     # params of the last fc layer
#     for param_group in optimizer.param_groups[:2]:
#         param_group['lr'] = ((initial1 - 1e-6)*decay/2.0) + 1e-6
    
#     # params of the last two resnet blocks
#     for param_group in optimizer.param_groups[2:]:
#         param_group['lr'] = ((initial2 - 1e-6)*decay/2.0) + 1e-6
    
#     if (step - 1) % (T // M) == 0 and step != 1:
#         print('lr is reset')
        
#     return optimizer

In [12]:
from torch.optim.lr_scheduler import ReduceLROnPlateau

n_epochs = 50
n_batches = ceil(train_size/batch_size)

scheduler = ReduceLROnPlateau(
    optimizer, mode='max', factor=0.1, patience=6, 
    verbose=True, threshold=0.01, threshold_mode='abs'
)

n_batches

531

In [13]:
%%time
all_losses, _ = train(
    model, criterion, optimizer, 
    train_iterator, n_epochs, n_batches, 
    val_iterator, validation_step=531, n_validation_batches=80, 
    saving_step=None, lr_scheduler=scheduler
)

1.00  4.941 2.761  0.139 0.469  0.300 0.727  100.975
2.00  3.492 1.808  0.339 0.595  0.599 0.838  99.572
3.00  2.932 1.769  0.424 0.658  0.685 0.864  100.329
4.00  2.593 1.631  0.471 0.664  0.730 0.874  99.729
5.00  2.348 1.550  0.517 0.679  0.764 0.882  100.449
6.00  2.205 1.509  0.541 0.707  0.781 0.899  99.999
7.00  2.065 1.502  0.564 0.713  0.799 0.891  99.814
8.00  1.909 1.342  0.579 0.701  0.817 0.903  100.091
9.00  1.789 1.301  0.606 0.733  0.831 0.909  99.909
10.00  1.773 1.214  0.613 0.741  0.837 0.909  100.111
11.00  1.657 1.223  0.635 0.744  0.848 0.909  99.998
12.00  1.622 1.255  0.639 0.726  0.853 0.899  100.094
13.00  1.541 1.132  0.655 0.749  0.864 0.917  100.279
14.00  1.454 1.220  0.669 0.742  0.874 0.908  101.205
15.00  1.434 1.085  0.675 0.747  0.875 0.915  100.064
16.00  1.461 1.116  0.677 0.754  0.876 0.918  99.613
17.00  1.356 1.088  0.687 0.762  0.884 0.920  99.560
18.00  1.325 1.083  0.694 0.755  0.886 0.920  100.385
19.00  1.230 1.034  0.708 0.756  0.899 0.920 

Process Process-369:
Process Process-372:
Process Process-370:
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
Process Process-371:
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/local/lib/python3.5/dist-packages/torch/utils/data/dataloader.py", line 40, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/local/lib/python3.5/dist-packages/torchvision/datasets/folder.py", line 116, in __getitem__
    img = self.loader(path)
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self

KeyboardInterrupt: 

In [None]:
# 1e-4_1e-4 0.25 -> 38.00  1.044 0.923  0.757 0.793  0.916 0.930  99.657
# 1e-5_1e-5 0.15 -> 46.00  0.759 0.881  0.813 0.795  0.942 0.931  100.128


In [None]:
# use all blocks, drop 3
# 1e-3_1e-3 1e-7_1e-7 + adam + drop_0.1 -> 24.00  2.621 2.872  0.429 0.386  0.675 0.640  93.682
# 1e-3_1e-3 1e-7_1e-7 + RMSprop + drop_0.1 -> 4.00  5.084 5.034  0.048 0.040  0.152 0.135  93.993
# 1e-2_1e-2 1e-7_1e-7 + RMSprop + drop_0.1 -> 2.00  5.518 5.468  0.008 0.009  0.039 0.040  93.607

In [None]:
# 1e-2_1e-3 no drop 1e-3_1e-3 + cosine -> 20.00  2.303 2.124  0.540 0.641  0.754 0.849  93.557
# 1e-2_1e-2 no drop 1e-3_1e-3 + on_plateau -> 29.00  1.738 1.990  0.615 0.565  0.816 0.792  93.393
# 1e-1_1e-1 no drop 1e-5_1e-3 + on_plateau -> 5.00  5.763 5.700  0.003 0.004  0.016 0.020  93.239
# 1e-3_1e-2 no drop 1e-5_1e-5 + on_plateau -> 28.00  2.043 2.832  0.589 0.552  0.790 0.785  93.372
# use less blocks
# 1e-3_1e-2 no drop 1e-5_1e-5 + on_plateau -> 26.00  1.775 1.972  0.633 0.665  0.826 0.863  92.524

# 1e-3_1e-2 no drop 1e-7_1e-7 + no nesterov mom 0.95 -> 34.00  1.441 1.743  0.655 0.607  0.859 0.829  92.509
# 1e-3_1e-2 no drop 1e-5_1e-5 + no nesterov mom 0.95 + more blocks -> 9.00  5.264 5.359  0.023 0.020  0.086 0.086  92.917
# 2e-3_2e-2 no drop 1e-7_1e-7 + no nesterov mom 0.99 -> 9.00  5.528 5.539  0.004 0.006  0.019 0.026  93.480
# 1e-3_1e-2 no drop 1e-7_1e-7 + no nesterov mom 0.9 + all blocks but not all drop -> 30.00  1.555 1.487  0.672 0.699  0.855 0.885  93.359
# 1e-3_1e-2 no drop 1e-7_1e-7 + no nesterov mom 0.6 + drop_0.25 -> 28.00  3.010 3.794  0.469 0.602  0.694 0.819  93.513
# 1e-4_1e-2 no drop 1e-7_1e-7 + no nesterov mom 0.97 + drop_0.25 -> 6.00  5.986 5.649  0.005 0.005  0.024 0.025  93.167
# 1e-4_1e-2 no drop 1e-7_1e-7 + no nesterov mom 0.0 + drop_0.25 -> 6.00  4.453 5.431  0.227 0.043  0.421 0.098  93.215

# Loss/epoch plots

In [None]:
epochs = [x[0] for x in all_losses]
plt.plot(epochs, [x[1] for x in all_losses], label='train');
plt.plot(epochs, [x[2] for x in all_losses], label='val');
plt.legend();
plt.xlabel('epoch');
plt.ylabel('loss');

In [None]:
plt.plot(epochs, [x[3] for x in all_losses], label='train');
plt.plot(epochs, [x[4] for x in all_losses], label='val');
plt.legend();
plt.xlabel('epoch');
plt.ylabel('accuracy');

In [None]:
plt.plot(epochs, [x[5] for x in all_losses], label='train');
plt.plot(epochs, [x[6] for x in all_losses], label='val');
plt.legend();
plt.xlabel('epoch');
plt.ylabel('top5_accuracy');

# Error analysis

### get human readable class names

In [None]:
# index to class name
decode = {val_folder.class_to_idx[k]: decode[int(k)] for k in val_folder.class_to_idx}

### get all predictions and all misclassified images 

In [None]:
val_iterator_no_shuffle = DataLoader(
    val_folder, batch_size=64, shuffle=False
)

In [None]:
val_predictions, val_true_targets,\
    erroneous_samples, erroneous_targets,\
    erroneous_predictions = predict(model, val_iterator_no_shuffle, return_erroneous=True)
# erroneous_samples: images that were misclassified
# erroneous_targets: their true labels
# erroneous_predictions: predictions for them

### number of misclassified images (there are overall 5120 images in the val dataset)

In [None]:
n_errors = len(erroneous_targets)
n_errors

### logloss and accuracies

In [None]:
log_loss(val_true_targets, val_predictions)

In [None]:
accuracy_score(val_true_targets, val_predictions.argmax(1))

In [None]:
print(top_k_accuracy(val_true_targets, val_predictions, k=(2, 3, 4, 5, 10)))

### entropy of predictions

In [None]:
hits = val_predictions.argmax(1) == val_true_targets

In [None]:
plt.hist(
    entropy(val_predictions[hits]), bins=30, 
    normed=True, alpha=0.7, label='correct prediction'
);
plt.hist(
    entropy(val_predictions[~hits]), bins=30, 
    normed=True, alpha=0.5, label='misclassification'
);
plt.legend();
plt.xlabel('entropy of predictions');

### confidence of predictions

In [None]:
plt.hist(
    val_predictions[hits].max(1), bins=30, 
    normed=True, alpha=0.7, label='correct prediction'
);
plt.hist(
    val_predictions[~hits].max(1), bins=30, 
    normed=True, alpha=0.5, label='misclassification'
);
plt.legend();
plt.xlabel('confidence of predictions');

### difference between biggest and second biggest probability

In [None]:
sorted_correct = np.sort(val_predictions[hits], 1)
sorted_incorrect = np.sort(val_predictions[~hits], 1)

plt.hist(
    sorted_correct[:, -1] - sorted_correct[:, -2], bins=30, 
    normed=True, alpha=0.7, label='correct prediction'
);
plt.hist(
    sorted_incorrect[:, -1] - sorted_incorrect[:, -2], bins=30, 
    normed=True, alpha=0.5, label='misclassification'
);
plt.legend();
plt.xlabel('difference');

### probabilistic calibration of the model

In [None]:
model_calibration(val_true_targets, val_predictions, n_bins=10)

### per class accuracies

In [None]:
per_class_acc = per_class_accuracy(val_true_targets, val_predictions)
plt.hist(per_class_acc);
plt.xlabel('accuracy');

In [None]:
most_inaccurate_k_classes(per_class_acc, 15, decode)

### class accuracy vs. number of samples in the class

In [None]:
plt.scatter((1.0/w), per_class_acc);
plt.ylabel('class accuracy');
plt.xlabel('number of available samples');

### most confused pairs of classes

In [None]:
confused_pairs = most_confused_classes(
    val_true_targets, val_predictions, decode, min_n_confusions=4
)
confused_pairs

### show some low entropy errors

In [None]:
erroneous_entropy = entropy(erroneous_predictions)
mean_entropy = erroneous_entropy.mean()
low_entropy = mean_entropy < erroneous_entropy
mean_entropy

In [None]:
show_errors(
    erroneous_samples[low_entropy], 
    erroneous_predictions[low_entropy], 
    erroneous_targets[low_entropy], 
    decode
)

### show some high entropy errors

In [None]:
show_errors(
    erroneous_samples[~low_entropy], 
    erroneous_predictions[~low_entropy], 
    erroneous_targets[~low_entropy], 
    decode
)

# Save

In [None]:
model.cpu();
torch.save(model.state_dict(), 'resnet18_with_sd.pytorch_state')