In [None]:
print("Experiment 1 with resnet18 transfer learning.")

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import csv

import torchnet as tnt

plt.ion()

print("imports 1 complete")

In [None]:
# Including different forms of data augmentation
# One will include nearly all types (excluding random crops, etc. that may remove handwriting.)
# The other will include a selected set of augmentations

# Keeping 'train', 'val', and 'test' transforms just in case we want to include different functionalities

all_transforms = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation(90),
        
        transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0.5),
        transforms.RandomGrayscale(p=0.1),
        
        transforms.Resize((224,224)),
        transforms.ToTensor(),
    ]),
    'val': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation(90),
        
        transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0.5),
        transforms.RandomGrayscale(p=0.1),
        
        transforms.Resize((224,224)),
        transforms.ToTensor(),
    ]),
    'test': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation(90),
        
        transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0.5),
        transforms.RandomGrayscale(p=0.1),
        
        transforms.Resize((224,224)),
        transforms.ToTensor(),
    ]),
}

selected_transforms = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(45),
        
        transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0.5),
        transforms.RandomGrayscale(p=0.1),
        
        transforms.Resize((224,224)),
        transforms.ToTensor(),
    ]),
    'val': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(45),
        
        transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0.5),
        transforms.RandomGrayscale(p=0.1),
        
        transforms.Resize((224,224)),
        transforms.ToTensor(),
    ]),
    'test': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(45),
        
        transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0.5),
        transforms.RandomGrayscale(p=0.1),
        
        transforms.Resize((224,224)),
        transforms.ToTensor(),
    ]),
}

print("Set up data transforms.")


In [None]:
# Some data configuration, like data directory and transforms

# data_dir = "C:\\Users\\rahul\\Documents\\work\\BuildUCLA\\data\\printed_with_ids\\images"
data_dir = "C:\\Users\\rahul\\Documents\\work\\BuildUCLA\\data\\printed_with_ids_harsh_filter\\preprocessed-images"
meta_data_loc = "C:\\Users\\rahul\\Documents\\work\\BuildUCLA\\data\\printed_with_ids_harsh_filter\\book_number_mapping.csv"

print(data_dir)
print(meta_data_loc)

data_transforms = selected_transforms

In [None]:
# Read meta data of the images, and make book-index mapping dictionary

# Assumes meta data file:
#   - has field names in first row
#   - the field names are in this order: ["id", "book", "label"]

# Create a dict of dicts:
#   - keys are based on the value in "label"
#   - values are the "inner" dicts described below
# Each "inner" dict has:
#   - keys are the book names, specified in "book"
#   - each value is a list of all the indices denoted by "id"
book_mapping = {}

with open(meta_data_loc, mode='r') as infile:
    reader = csv.DictReader(infile)
    for row in reader:
        if row["label"] not in book_mapping:
            book_mapping[row["label"]] = {}
        bm = book_mapping[row["label"]]
        
        if row["book"] not in bm:
            bm[row["book"]] = []
        bm[row["book"]].append(row["id"])

count = 0
for l in book_mapping:
    for b in book_mapping[l]:
        count += len(book_mapping[l][b])

print("read meta data for " + str(count) + " images from " + str(meta_data_loc))


In [None]:

set_types = ['train', 'val', 'test']

# Need to split data sets into training and testing sets
data_sets = {t : datasets.ImageFolder(data_dir, transform=data_transforms[t])
             for t in set_types}

# Need to split data into training set, validation set, and testing set
train_size = 0.65     # 65% of all data is training
val_size = 0.15       # 15% of all data is validation
test_size = (1 - train_size - val_size)    # Remaining data (20%) is testing

num_images = len(data_sets["train"]) # length of both sets should be the same
all_ind = list(range(num_images))
random_seed = 11
np.random.seed(random_seed)
np.random.shuffle(all_ind)

train_split = int(num_images * train_size)
val_split = int(num_images * val_size)
test_split = int(num_images * test_size)

data_samplers = {}
data_samplers["train"] = all_ind[:train_split]
data_samplers["val"] = all_ind[train_split : train_split+val_split]
data_samplers["test"] = all_ind[train_split+val_split:]

dataloaders = {t : torch.utils.data.DataLoader(data_sets[t],
                                              sampler=data_samplers[t],
                                              batch_size=4,
                                              num_workers=4)
              for t in set_types}

dataset_sizes = {t : len(data_samplers[t]) for t in set_types}

class_names = data_sets["train"].classes
use_gpu = torch.cuda.is_available()

print("data loaded from " + str(data_dir))
print("read classes: " + str(class_names))
if use_gpu:
    print("use_gpu is true")
else:
    print("use_gpu is false")

In [None]:
def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated

# Get a batch of training data
inputs, classes = next(iter(dataloaders['train']))

# Make a grid from batch
out = torchvision.utils.make_grid(inputs)

imshow(out, title=[class_names[x] for x in classes])

print("visualized a few images")

In [None]:
# Creating a class to deal with early stopping criteria
# Important: minimizes a loss value
class EarlyStopping:
    def __init__(self, min_delta=0, patience=5):
        # The minimum delta in loss to be considered a change in loss
        self.min_delta = min_delta
        
        # number of epochs to wait for improvement before terminating
        self.patience = patience
        
        # number of epochs waited
        self.wait = 0
        
        # Set "best loss" to some large number
        self.best_loss = 1e15
        
    def checkStoppingCriteria(self, curr_loss):
        """ Returns whether the stopping criteria has been met. """
        if (curr_loss - self.best_loss) < -self.min_delta:
            self.best_loss = curr_loss
            self.wait = 1
        elif self.wait < self.patience:
            self.wait += 1
        else:
            return True
        return False
        

In [None]:
print("This 'train_model' function is a generic routine that can be used to train various models.")

def train_model(model, criterion, optimizer, scheduler, num_epochs=25, earlyStopping = None):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    val_acc_loss = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0
            
            # Confusion matrix
            confusion_matrix = tnt.meter.ConfusionMeter(2)

            # Iterate over data.
            for data in dataloaders[phase]:
                # get the inputs
                inputs, labels = data

                # wrap them in Variable
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)
                
                # Add to confusion matrix
                confusion_matrix.add(outputs.data, labels.data)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_loss += loss.data[0] * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                
            # save each epoch's model
#             weights_path = "resnet18_half_frozen_" + str(epoch + 1) + "epochs_transfer-state.pt"
#             torch.save(model_resnet18.state_dict(), weights_path)
#             print("saved epoch " + str(epoch + 1) + " model state (weights) to " + weights_path)
#             print("ran epoch " + str(epoch + 1))


            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))
            
            # Print confusion matrix
            print(confusion_matrix.conf)
            print()

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                
            # store most recent val_loss
            if phase == 'val':
                val_acc_loss = (1.0 - epoch_acc)
                print("val_acc_loss = " + str(val_acc_loss))
        
        
        # Extra spacing
        print()
        print()
        
        # Include early stopping criteria check at end of epoch
        if (earlyStopping is not None) and earlyStopping.checkStoppingCriteria(val_acc_loss):
            print("Stopping after epoch " + str(epoch) + " due to early stopping criteria.")
            break
        

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [None]:
print("Transferring resnet18 and retraining with annotations dataset.")

model_resnet18 = models.resnet18(pretrained=True)
num_params = sum(1 for i in model_resnet18.parameters())

# There are 10 layers (model_ft.children()) in resnet18
# Freezing the first half of resnet18, freezing all params for layers 1-5
max_layer = 5
curr_layer = 1
last_layer = None
for child in model_resnet18.children():
    if curr_layer <= max_layer:
        for param in child.parameters():
            param.requires_grad = False
        last_layer = child
        curr_layer = curr_layer + 1
    else:
        break
        
# for child in model.children():
#     print("")
#     print(child)
        
# Replace the final fully connected layer to perform binary classification
num_ftrs = model_resnet18.fc.in_features
model_resnet18.fc = nn.Linear(num_ftrs, 2)

if use_gpu:
    model_resnet18 = model_resnet18.cuda()

criterion = nn.CrossEntropyLoss()

# Need to create slightly custom optimizer since half of the layers are frozen
optimizer = optim.SGD(list(filter(lambda p: p.requires_grad, model_resnet18.parameters())), lr=0.001, momentum=0.9)

# Create LR scheduler that decays LR by a factor of 0.1 for every 7 epochs (this is from tutorial, might need tweaking)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

print("Created new model to train")

In [None]:
print("perform training")
num_training_epochs = 50
earlyStoppingCriteria = EarlyStopping(min_delta = 1e-4, patience=5)

model_resnet18 = train_model(model_resnet18,
                             criterion,
                             optimizer,
                             exp_lr_scheduler,
                             num_epochs=num_training_epochs,
                             earlyStopping = earlyStoppingCriteria)

print("training complete")

In [None]:
weights_path = "resnet18_half_frozen_5epochs_transfer-state.pt"
torch.save(model_resnet18.state_dict(), weights_path)
print("saved model state (weights) to " + weights_path)

In [None]:
def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    for i, data in enumerate(dataloaders['test']):
        inputs, labels = data
        if use_gpu:
            inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
        else:
            inputs, labels = Variable(inputs), Variable(labels)

        outputs = model(inputs)
        _, preds = torch.max(outputs.data, 1)

        for j in range(inputs.size()[0]):
            images_so_far += 1
            ax = plt.subplot(num_images//2, 2, images_so_far)
            ax.axis('off')
            ax.set_title('predicted: {}'.format(class_names[preds[j]]))
            imshow(inputs.cpu().data[j])

            if images_so_far == num_images:
                model.train(mode=was_training)
                return
    model.train(mode=was_training)

visualize_model(model_resnet18)
print("visualizing model")

In [None]:
print("running on testing dataset")
model_resnet18.train(False)  # Set model to evaluate mode

running_loss = 0.0
running_corrects = 0

criterion = nn.CrossEntropyLoss()

# Iterate over data.
for data in dataloaders["test"]:
    # get the inputs
    inputs, labels = data

    # wrap them in Variable
    if use_gpu:
        inputs = Variable(inputs.cuda())
        labels = Variable(labels.cuda())
    else:
        inputs, labels = Variable(inputs), Variable(labels)

    # zero the parameter gradients
#     optimizer.zero_grad()

    # forward
    outputs = model_resnet18(inputs)
    _, preds = torch.max(outputs.data, 1)
    loss = criterion(outputs, labels)

    # backward + optimize only if in training phase
#     if phase == 'train':
#         loss.backward()
#         optimizer.step()

    # statistics
    running_loss += loss.data[0] * inputs.size(0)
    running_corrects += torch.sum(preds == labels.data)

epoch_loss = running_loss / dataset_sizes["test"]
epoch_acc = running_corrects / dataset_sizes["test"]

print('{} Loss: {:.4f} Acc: {:.4f}'.format(
    "test", epoch_loss, epoch_acc))