# Training With Transfer Learning
It's time to train the CNN. We will be using transfer learning - we'll take an existing, pretrained model and replace the fully connected layer. The model we'll be using is resnet50 model

We will train this model using mini-batch gradient descent and allow fine-tuning (allowing the bottleneck layers to be updated with each iteration).

In this notebook, we use PyTorch to train the model. Let's import the necessary modules:

In [1]:
# NumPy and pandas for manipulating data
import numpy as np
import pandas as pd

# To make validation and training set
from sklearn.model_selection import train_test_split

# For training diagnostics later
import matplotlib as mpl
from matplotlib import pyplot as plt

# Necessary torch modules
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable

from torchvision import models

# Used to copy weights later
import copy

# Custom DataLoader class and transforms from dataloader.py
import dataloader
from dataloader import DataLoader

Let's also define some constants to use throughout the training process:

In [2]:
# Define run constants

# csv created in preprocessing that where all the images are
PATHS_FILE = '../database/cropped/path_labels.csv' 
# file from raw data that tells all the class names (alphabetized)
ITEM_NAMES_FILE = '../database/raw/food-items.txt'

SEED = 17               # Seed for train_test_split 

IMAGE_SIZE = 224        # Size of input images expected by base model
BATCH_SIZE = 12         # Size of each batch 
N_EPOCHS = 80           # Number of epochs to train for
LEARNING_RATE = 1e-4    # Initial learning rate
STEP_SIZE = 8           # Number of epochs before one step for exponential decay
GAMMA = 0.1             # Amount to reduce learning rate by 

RUN_NAME = "batch_size-{}n_epochs-{}learning_rate-{}step_size-{}gamma-{}"\
    .format(BATCH_SIZE, N_EPOCHS, LEARNING_RATE, STEP_SIZE, GAMMA)

The data is located in `../database/cropped`. We'll load it using our `DataLoader` class. 
The `DataLoader` class has a function `DataLoader.get_data()` which returns a generator that returns data in batches.

In [3]:
# Load data...
# Read in item names 
with open(ITEM_NAMES_FILE) as f:
    item_names = f.read().splitlines()

# Count the number of items
n_classes = len(item_names)

# Make dictionaries to turn string labels into indicies and back
label_dict_itos = dict(zip(range(0, n_classes), item_names))
label_dict_stoi = dict(zip(item_names, range(0, n_classes)))

# Read csv (we made this in the preprocessing step).
df = pd.read_csv(PATHS_FILE)

# Get file paths from DataFrame.
file_paths = df['cropped_path'].values

# Get labels as integer indicies
labels = df['label'].map(label_dict_stoi).values

# Split into test/validation sets 
(file_paths_train, file_paths_valid, 
    labels_train, labels_valid)  = train_test_split(
                                    file_paths,
                                    labels,
                                    stratify=labels,
                                    test_size=0.2,
                                    random_state=SEED)

# List transformations (these are defined in dataloader.py)
transforms = [
    (lambda x: x,                          {}),
    (dataloader.apply_blur,                {}),
    (dataloader.apply_brightness,          {}),
    (dataloader.apply_color_jitter,        {}),
    (dataloader.apply_sp_noise,            {}),
    (dataloader.apply_gauss_noise,         {}),
    (dataloader.apply_affine,              {}),
    (lambda img: dataloader.apply_color_jitter(dataloader.apply_affine(img)), {})
]

# Create data loader (once again, defined in dataloader.py)
data_loader_train = DataLoader(file_paths_train, labels_train, 
                            batch_size=BATCH_SIZE, 
                            image_size=(IMAGE_SIZE, IMAGE_SIZE), 
                            transforms=transforms)

data_loader_valid = DataLoader(file_paths_valid, labels_valid, 
                            batch_size=BATCH_SIZE, 
                            image_size=(IMAGE_SIZE, IMAGE_SIZE), 
                            transforms=[])

dataloaders = {'train': data_loader_train, 'valid': data_loader_valid}
dataset_sizes = {phase: dataloaders[phase].shape[0] for phase in dataloaders}

Let's create a function to train our model using PyTorch:

In [4]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=30):
    """ Train a model and return training history and the trained model.
    model               (torch.nn.Model): PyTorch model to train.
    
    criterion    (torch.nn.modules.loss): PyTorch loss function to optimize for.
    
    optimizer              (torch.optim): PyTorch optimizer to use when optimizing loss.
    
    scheduler (torch.optim.lr_scheduler): PyTorch scheduler to schedule the learning rate.
    
    num_epochs                     (int): Number of epochs to train for.
    """
    # Initialize the best weights.
    # We will be keeping track of the best model throughout training
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    # Lists to keep trach of the changes in loss and accuracy
    train_loss_record = []
    valid_loss_record = []

    train_acc_record = []
    valid_acc_record = []

    # Current epoch loss and accuracy
    epoch_loss = 0
    epoch_acc = 0 

    # Run for num_epochs epochs
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Do training phase and testing phase
        for phase in ['train', 'valid']:
            # Take a step for the scheduler if we're training
            if phase == 'train':
                if scheduler is not None:
                    scheduler.step(epoch_loss)
                model.train(True)
            else:
                model.train(False)

            # Keep track of the loss and accuracy across batches for this epoch
            running_loss = 0.0
            running_corrects = 0

            # Get the data from our DataLoader class
            for data in dataloaders[phase].get_data():
                inputs, labels = data

                # Use PyTorch standard [batch_size, channel, height, width] to make tensors
                inputs = torch.tensor([[inp[:, :, 0], 
                                        inp[:, :, 1], 
                                        inp[:, :, 2]] for inp in inputs])\
                    .type_as(torch.FloatTensor())
                
                # Make tensors from labels too
                labels = torch.tensor(labels).type_as(torch.LongTensor())
                labels = labels.view(-1)

                # Wrap the Tensors in Variables
                inputs, labels = Variable(inputs), Variable(labels)

                # Reset the optimizer's gradient (some optimizers use previous gradients)
                optimizer.zero_grad()

                # Get the outputs (logits) from the model
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1) 
                loss = criterion(outputs, labels)
                
                # Calculate gradient and perform backpropagation if training
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # Keep track of loss and accuracy for this epoch
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            # Calculate total loss and accuracy over the epoch
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = float(running_corrects) / dataset_sizes[phase]

            # Print and record the running loss and accuracy
            print('{} Loss : {:.4f} Acc : {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            if phase == 'train':
                train_loss_record.append(epoch_loss)
                train_acc_record.append(epoch_acc)
            else:
                valid_loss_record.append(epoch_loss)
                valid_acc_record.append(epoch_acc)

            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        # Every 10 epochs, save a checkpoint model
        if (epoch % 10 == 0):
            checkpoint_path = './checkpoints/checkpoint' + RUN_NAME + str(epoch) + '.pt'
            torch.save(model, checkpoint_path)
            print("Saved checkpoint: {}".format(checkpoint_path))
    
    # Load the best model
    model.load_state_dict(best_model_wts)
    
    # Return the model along with the training and validation history
    return model, train_loss_record, valid_loss_record, train_acc_record, valid_acc_record

We need to define the architecture of our model. We take a base model and add our own fully connected layer:

In [5]:
class TransferModel(nn.Module):
    def __init__(self, base_model, n_classes):
        super(TransferModel, self).__init__()
        # Remove the fc layer
        self.base_layer = nn.Sequential(*list(base_model.children())[:-1])
    
        # Create our own fully connected layer
        self.fc = nn.Linear(base_model.fc.in_features, n_classes)
    
    def forward(self, inputs):
        # Connect the bottleneck layers with our fully connected layer 
        x = self.base_layer(inputs)
        x = x.view(x.size(0), -1)

        x = self.fc(x)
        return x

Now, we create and train the model. 

In [None]:
## Create a transfer learning model with resnet as the base
resnet_model = models.resnet50(pretrained=True)
transfer_model = TransferModel(resnet_model, n_classes)

# Use cross entropy loss
criterion = nn.CrossEntropyLoss()

# SGD optimizer; initialize learning rate to LEARNING_RATE
optimizer_conv = optim.SGD(transfer_model.parameters(), lr=LEARNING_RATE, 
                                momentum=0.9, weight_decay=0.001)

# Decrease learning rate by GAMMA for every STEP_SIZE steps
scheduler =  lr_scheduler.StepLR(optimizer_conv, step_size=STEP_SIZE, gamma=GAMMA)

# Call the training function
(transfer_model, train_loss_record, valid_loss_record, 
    train_acc_record, valid_acc_record) = train_model(transfer_model, 
                        criterion, optimizer_conv, scheduler, num_epochs=N_EPOCHS)

Epoch 0/79
----------
train Loss : 3.9719 Acc : 0.0269
valid Loss : 3.8400 Acc : 0.0868


  "type " + obj.__name__ + ". It won't be checked "


Saved checkpoint: ./checkpoints/checkpointbatch_size-12n_epochs-80learning_rate-0.0001step_size-8gamma-0.10.pt
Epoch 1/79
----------
train Loss : 3.8254 Acc : 0.0795
valid Loss : 3.6788 Acc : 0.1074
Epoch 2/79
----------
train Loss : 3.6905 Acc : 0.1136
valid Loss : 3.5089 Acc : 0.1653
Epoch 3/79
----------
train Loss : 3.5686 Acc : 0.1674
valid Loss : 3.3297 Acc : 0.2438
Epoch 4/79
----------
train Loss : 3.4326 Acc : 0.2366
valid Loss : 3.1378 Acc : 0.2975
Epoch 5/79
----------
train Loss : 3.2828 Acc : 0.2800
valid Loss : 2.9411 Acc : 0.4050
Epoch 6/79
----------
train Loss : 3.1364 Acc : 0.3512
valid Loss : 2.7463 Acc : 0.4463
Epoch 7/79
----------
train Loss : 2.9803 Acc : 0.4339
valid Loss : 2.5575 Acc : 0.5207
Epoch 8/79
----------
train Loss : 2.8286 Acc : 0.4576
valid Loss : 2.3493 Acc : 0.5744
Epoch 9/79
----------
train Loss : 2.6879 Acc : 0.5155
valid Loss : 2.1900 Acc : 0.6116
Epoch 10/79
----------
train Loss : 2.5798 Acc : 0.5341
valid Loss : 2.0248 Acc : 0.6488
Saved ch

Our model is finally done training. Let's graph the training record:

In [None]:
# Plot the loss history
x = range(N_EPOCHS)

plt.figure(figsize=(12, 8))
plt.plot(x, valid_loss_record, label='valid')
plt.plot(x, train_loss_record, label='train')

plt.title("Training History", fontsize=24)
plt.ylabel("Loss", fontsize=16)
plt.xlabel("Epoch", fontsize=16)
plt.legend()
plt.grid(axis='both')
plt.show()

In [None]:
# Plot the accuracy history
x = range(N_EPOCHS)

plt.figure(figsize=(12, 8))
plt.plot(x, valid_acc_record, label='valid')
plt.plot(x, train_acc_record, label='train')

plt.title("Training History", fontsize=24)
plt.ylabel("Accuracy", fontsize=16)
plt.xlabel("Epoch", fontsize=16)
plt.legend()
plt.grid(axis='both')
plt.show()

We can see that the best validation accuracy goes up to around ~90.1%. Although it may seem strange that the training loss is often higher than the validation loss, it makes sense considering the transformations we apply on the training set.

Let's look at which classes we did well on:

In [None]:
# Make results DataFrame so that we can figure out where we did well
truth_hist = []
preds_hist = []
inputs_hist = []
for inputs, labels in dataloaders['valid'].get_data():
    inputs_hist.extend(inputs)
    inputs = torch.tensor([[inp[:, :, 0], 
                            inp[:, :, 1], 
                            inp[:, :, 2]] 
                            for inp in inputs])\
                            .type_as(torch.FloatTensor())

    probs = transfer_model(inputs)
    preds = np.argmax(probs.data.numpy(), axis=1)
    preds_hist.extend(preds)
    truth_hist.extend(labels)


result_df = pd.DataFrame()
result_df['truth_code'] = truth_hist
result_df['preds_code'] = preds_hist
result_df['image'] = inputs_hist
result_df['correct'] = result_df['truth_code'] == result_df['preds_code']
result_df['label'] = result_df['truth_code'].map(label_dict_itos)
result_df['guessed'] = result_df['preds_code'].map(label_dict_itos)

accuracy = result_df['correct'].mean()
group_accuracy = result_df.groupby('label')['correct'].mean().sort_values()

print("Accuracy: {}".format(accuracy))
group_accuracy

Let's look at some of the misclassified images, and what the model mistakened them for:

In [None]:
plt.figure(figsize=(15, 20))

incorrect_df = result_df[result_df['correct'] == False].sample(15).reset_index()
for i, row in incorrect_df.iterrows():
    plt.subplot(5, 3, i + 1)
    plt.imshow(row['image'])
    plt.title("Guessed: {} | Actually: {}".format(row['guessed'], row['label']))
    plt.axis('off')

In [None]:

# result_df[result_df['label'] == 'fish'][['label', 'guessed']]
# result_df[result_df['label'] == 'pinto_beans'][['label', 'guessed']]
# result_df[result_df['label'] == 'parmesan_cheese'][['label', 'guessed']]

# def show_result(result_df, item):
#     for i, row in result_df[result_df['label'] == item].iterrows():
#         imshow(row['image'], title="Label: {}, Guessed: {}".format(row['label'], row['guessed']), pause=2.5)

# for item in group_accuracy.index[:8]:
#     show_result(result_df, item)


# for _, row in result_df.iterrows():
#     if not row['correct']:
#         imshow(row['image'], 
#             title="Label: {}, Guessed: {}".format(row['label'], row['guessed']), pause=2.5) 


# show_result(result_df, 'beef')
# show_result(result_df, 'pork')
# show_result(result_df, 'brown_onion')
# show_result(result_df, 'chicken_leg')
# show_result(result_df, 'mushroom')
# show_result(result_df, 'cilantro')

