In [1]:
print("Experiment 1 with resnet18 transfer learning.")

Experiment 1 with resnet18 transfer learning.


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import csv
import gc
import torchnet as tnt

plt.ion()

use_gpu = torch.cuda.is_available()
# Disabled for memory issues
# use_gpu = False

print("imports 1 complete")

imports 1 complete


In [3]:
def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated

In [4]:
# Creating a class to deal with early stopping criteria
# Important: minimizes a loss value
class EarlyStopping:
    def __init__(self, min_delta=0, patience=5):
        # The minimum delta in loss to be considered a change in loss
        self.min_delta = min_delta
        
        # number of epochs to wait for improvement before terminating
        self.patience = patience
        
        # number of epochs waited
        self.wait = 0
        
        # Set "best loss" to some large number
        self.best_loss = 1e15
        
    def checkStoppingCriteria(self, curr_loss):
        """ Returns whether the stopping criteria has been met. """
        if (curr_loss - self.best_loss) < -self.min_delta:
            self.best_loss = curr_loss
            self.wait = 1
        elif self.wait < self.patience:
            self.wait += 1
        else:
            return True
        return False
        

In [5]:
print("This 'train_model' function is a generic routine that can be used to train various models.")

def train_model(model, criterion, optimizer, scheduler, data_loaders, num_epochs=25, early_stopping = None):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    val_acc_loss = 0.0
    
#     epoch_val_accs = {}
#     epoch_train_accs = {}
    epoch_acc_dict = {"train": {}, "val" : {}}

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Confusion matrix
            confusion_matrix = tnt.meter.ConfusionMeter(2)
            
            count = 0
            
            # Iterate over each book
            for book in data_loaders[phase]:
                
                # Iterate over data.
                for data in data_loaders[phase][book]:
                    # get the inputs
                    inputs, labels = data
                    
                    count += len(inputs)
                    
                    # wrap them in Variable
                    if use_gpu:
                        inputs = Variable(inputs.cuda())
                        labels = Variable(labels.cuda())
#                         inputs = Variable(inputs, volatile=True).cuda()
#                         labels = Variable(labels, volatile=True).cuda()
                    else:
                        inputs, labels = Variable(inputs), Variable(labels)

                    # zero the parameter gradients
                    optimizer.zero_grad()

                    # forward
                    outputs = model(inputs)
                    _, preds = torch.max(outputs.data, 1)
                    loss = criterion(outputs, labels)

                    # Add to confusion matrix
                    confusion_matrix.add(outputs.data, labels.data)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                    # statistics
                    running_loss += loss.data[0] * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)

#             epoch_loss = running_loss / dataset_sizes[phase]
#             epoch_acc = running_corrects / dataset_sizes[phase]

            epoch_loss = running_loss / count
            epoch_acc = running_corrects / count
        
            epoch_acc_dict[phase][epoch] = epoch_acc

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # Print confusion matrix
            print(confusion_matrix.conf)
            print()

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

            # store most recent val_loss
            if phase == 'val':
                val_acc_loss = (1.0 - epoch_acc)
                print("val_acc_loss = " + str(val_acc_loss))

                        # save each epoch's model
#             weights_path = "resnet18_half_frozen_" + str(epoch + 1) + "epochs_transfer-state.pt"
#             torch.save(model_resnet18.state_dict(), weights_path)
#             print("saved epoch " + str(epoch + 1) + " model state (weights) to " + weights_path)
#             print("ran epoch " + str(epoch + 1))
        
        
        # Extra spacing
        print()
        print()
        
        # Include early stopping criteria check at end of epoch
        if (early_stopping is not None) and early_stopping.checkStoppingCriteria(val_acc_loss):
            print("Stopping after epoch " + str(epoch) + " due to early stopping criteria.")
            break
        

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, epoch_acc_dict

This 'train_model' function is a generic routine that can be used to train various models.


In [6]:
# Including different forms of data augmentation
# One will include nearly all types (excluding random crops, etc. that may remove handwriting.)
# The other will include a selected set of augmentations

# Keeping 'train', 'val', and 'test' transforms just in case we want to include different functionalities

all_transforms = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation(90),
        
        transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0.5),
        transforms.RandomGrayscale(p=0.1),
        
        transforms.Resize((224,224)),
        transforms.ToTensor(),
    ]),
    'val': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation(90),
        
        transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0.5),
        transforms.RandomGrayscale(p=0.1),
        
        transforms.Resize((224,224)),
        transforms.ToTensor(),
    ]),
    'test': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation(90),
        
        transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0.5),
        transforms.RandomGrayscale(p=0.1),
        
        transforms.Resize((224,224)),
        transforms.ToTensor(),
    ]),
}

selected_transforms = {
    'train': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(45),
        
        transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0.5),
        transforms.RandomGrayscale(p=0.1),
        
        transforms.Resize((224,224)),
        transforms.ToTensor(),
    ]),
    'val': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(45),
        
        transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0.5),
        transforms.RandomGrayscale(p=0.1),
        
        transforms.Resize((224,224)),
        transforms.ToTensor(),
    ]),
    'test': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(45),
        
        transforms.ColorJitter(brightness=0, contrast=0, saturation=0, hue=0.5),
        transforms.RandomGrayscale(p=0.1),
        
        transforms.Resize((224,224)),
        transforms.ToTensor(),
    ]),
}

print("Set up data transforms.")


Set up data transforms.


In [7]:
# Some data configuration, like data directory and transforms

# data_dir = "C:\\Users\\rahul\\Documents\\work\\BuildUCLA\\data\\printed_with_ids\\images"
data_dir = "C:\\Users\\rahul\\Documents\\work\\BuildUCLA\\data\\printed_with_ids_harsh_filter\\preprocessed-images"
meta_data_loc = "C:\\Users\\rahul\\Documents\\work\\BuildUCLA\\data\\printed_with_ids_harsh_filter\\book_number_mapping.csv"
book_data_dir = "C:\\Users\\rahul\\Documents\\work\\BuildUCLA\\data\\printed_with_ids_harsh_filter\\books-preprocessed-images"

print(data_dir)
print(meta_data_loc)
print(book_data_dir)

data_transforms = selected_transforms

C:\Users\rahul\Documents\work\BuildUCLA\data\printed_with_ids_harsh_filter\preprocessed-images
C:\Users\rahul\Documents\work\BuildUCLA\data\printed_with_ids_harsh_filter\book_number_mapping.csv
C:\Users\rahul\Documents\work\BuildUCLA\data\printed_with_ids_harsh_filter\books-preprocessed-images


In [8]:
# Create data sets/loaders for each book

set_types = ['train', 'val', 'test']

# test books are currently arbitrarily set
test_books = set(["Albin", "Dryden"])
# test_books = ["Dryden"]

# Get the list of all books in the data set
books_in_data = set([b for b in os.listdir(book_data_dir)
                 if os.path.isdir(os.path.join(book_data_dir, b))])


# test_transform = transforms.Compose([
#     transforms.Resize((1000,1000)),
#     transforms.ToTensor()
# ])

# Create a dict of datasets for each book
book_data_sets = {b : {t : datasets.ImageFolder(os.path.join(book_data_dir, b), transform = data_transforms[t])#, transform=test_transform)
                      for t in set_types}
                 for b in books_in_data}

book_data_loaders = {b : {t : torch.utils.data.DataLoader(book_data_sets[b][t],
                                                          batch_size=4,
                                                          num_workers=4)
                          for t in set_types}
                     for b in books_in_data}

print("loaded data")

# book_data_loaders = {b : torch.utils.data.DataLoader(book_data_sets[b]["train"],
#                                                                   batch_size=4,
#                                                                   num_workers=1)
#                                   for b in train_books}

# Create the test data loader, which will only have the books in test_books
# book_data_loaders = {}
# book_data_loaders["test"] = {b : torch.utils.data.DataLoader(book_data_sets[b]["test"],
#                                                         batch_size=4,
#                                                         num_workers=4)
#                         for b in test_books}

# for each cross-validation, the "train" and "val" parts of the data_loaders dict will be modified accordingly



# class_names = book_data_sets["Albin"]["train"].classes
# use_gpu = torch.cuda.is_available()
# i = 0
# for data in book_data_loaders["test"]["Dryden"]:
#     print(i)
#     inputs, classes = data
#     if i == 10:
#         for ind in range(len(inputs)):
#             img = inputs[ind]
#             nimg = img.numpy().T
#             imgplot = plt.imshow(nimg)
#             print(classes[ind])
#         break
#     else:
#         i += 1

# print("visualized a few images")

######



loaded data


In [None]:
# Read meta data of the images, and make book-index mapping dictionary

# Assumes meta data file:
#   - has field names in first row
#   - the field names are in this order: ["id", "book", "label"]

# Create a dict of dicts:
#   - keys are based on the value in "label"
#   - values are the "inner" dicts described below
# Each "inner" dict has:
#   - keys are the book names, specified in "book"
#   - each value is a list of all the indices denoted by "id"

# Below is commented temporarily
# book_mapping = {}

# with open(meta_data_loc, mode='r') as infile:
#     reader = csv.DictReader(infile)
#     for row in reader:
#         if row["label"] not in book_mapping:
#             book_mapping[row["label"]] = {}
#         bm = book_mapping[row["label"]]
        
#         if row["book"] not in bm:
#             bm[row["book"]] = []
#         bm[row["book"]].append(int(row["id"]))

# count = 0
# for l in book_mapping:
#     for b in book_mapping[l]:
#         count += len(book_mapping[l][b])

# print("read meta data for " + str(count) + " images from " + str(meta_data_loc))


In [9]:
from __future__ import print_function
import numpy as np

def valid_imshow_data(data):
    data = np.asarray(data)
    if data.ndim == 2:
        return True
    elif data.ndim == 3:
        if 3 <= data.shape[2] <= 4:
            return True
        else:
            print('The "data" has 3 dimensions but the last dimension '
                  'must have a length of 3 (RGB) or 4 (RGBA), not "{}".'
                  ''.format(data.shape[2]))
            return False
    else:
        print('To visualize an image the data must be 2 dimensional or '
              '3 dimensional, not "{}".'
              ''.format(data.ndim))
        return False

In [None]:
# Following is commented out temporarily

# set_types = ['train', 'val', 'test']
# # test books are currently arbitrarily set
# # test_books = ["Albin", "Dryden"]
# test_books = ["Dryden"]


# test_transform = transforms.Compose([
#     transforms.Resize((1000,1000)),
#     transforms.ToTensor()
# ])
# # Need to split data sets into training and testing sets
# data_sets = {t : datasets.ImageFolder(data_dir, transform = test_transform)#, transform=data_transforms[t])
#              for t in set_types}


# data_samplers = {}
# data_samplers["test"] = sum([bm[b] for bm in book_mapping.values() for b in bm if b in test_books], [])
# data_samplers["train"] = []
# data_samplers["val"] = []


# dataloaders = {t : torch.utils.data.DataLoader(data_sets[t],
#                                               sampler=data_samplers[t],
#                                               batch_size=4,
#                                               num_workers=4)
#               for t in set_types}

# class_names = data_sets["train"].classes
# use_gpu = torch.cuda.is_available()

# print(sorted(data_samplers["test"]))

# # inputs, classes = next(iter(dataloaders["test"]))
# i = 0
# for inputs, classes in iter(dataloaders["test"]):
#     print(i)
#     if i == 0:
#         for img in inputs:
#             nimg = img.numpy().T
#             imgplot = plt.imshow(nimg)
#         break
#     else:
#         i += 1

# # inputs, classes = next(iter(dataloaders["test"]))
# # out = torchvision.utils.make_grid(inputs)
# # imshow(out, title=[class_names[x] for x in classes])


# # print(data_samplers)

# # Get a batch of training data
# # inputs, classes = next(iter(dataloaders['train']))

# # # Make a grid from batch
# # out = torchvision.utils.make_grid(inputs)

# # imshow(out, title=[class_names[x] for x in classes])

# print("visualized a few images")

In [None]:
# Following is commented out temporarily

# set_types = ['train', 'val', 'test']

# # Need to split data sets into training and testing sets
# data_sets = {t : datasets.ImageFolder(data_dir, transform=data_transforms[t])
#              for t in set_types}

# # Need to split data into training set, validation set, and testing set
# train_size = 0.65     # 65% of all data is training
# val_size = 0.15       # 15% of all data is validation
# test_size = (1 - train_size - val_size)    # Remaining data (20%) is testing

# num_images = len(data_sets["train"]) # length of both sets should be the same
# all_ind = list(range(num_images))
# random_seed = 11
# np.random.seed(random_seed)
# np.random.shuffle(all_ind)

# train_split = int(num_images * train_size)
# val_split = int(num_images * val_size)
# test_split = int(num_images * test_size)

# data_samplers = {}
# data_samplers["train"] = all_ind[:train_split]
# data_samplers["val"] = all_ind[train_split : train_split+val_split]
# data_samplers["test"] = all_ind[train_split+val_split:]

# dataloaders = {t : torch.utils.data.DataLoader(data_sets[t],
#                                               sampler=data_samplers[t],
#                                               batch_size=4,
#                                               num_workers=4)
#               for t in set_types}

# dataset_sizes = {t : len(data_samplers[t]) for t in set_types}

# class_names = data_sets["train"].classes
# use_gpu = torch.cuda.is_available()

# print("data loaded from " + str(data_dir))
# print("read classes: " + str(class_names))
# if use_gpu:
#     print("use_gpu is true")
# else:
#     print("use_gpu is false")

In [None]:
# Following is commented out temporarily

# # Get a batch of training data
# inputs, classes = next(iter(dataloaders['train']))

# # Make a grid from batch
# out = torchvision.utils.make_grid(inputs)

# imshow(out, title=[class_names[x] for x in classes])

# print("visualized a few images")

In [10]:
def create_model_architecture():
    print("Transferring resnet18 and retraining with annotations dataset.")

    model_resnet18 = models.resnet18(pretrained=True)
    num_params = sum(1 for i in model_resnet18.parameters())

    # There are 10 layers (model_ft.children()) in resnet18
    # Freezing the first half of resnet18, freezing all params for layers 1-5
    max_layer = 5
    curr_layer = 1
    last_layer = None
    for child in model_resnet18.children():
        if curr_layer <= max_layer:
            for param in child.parameters():
                param.requires_grad = False
            last_layer = child
            curr_layer = curr_layer + 1
        else:
            break

    # for child in model.children():
    #     print("")
    #     print(child)

    # Replace the final fully connected layer to perform binary classification
    num_ftrs = model_resnet18.fc.in_features
    model_resnet18.fc = nn.Linear(num_ftrs, 2)

    if use_gpu:
        model_resnet18 = model_resnet18.cuda()

    criterion = nn.CrossEntropyLoss()

    # Need to create slightly custom optimizer since half of the layers are frozen
    optimizer = optim.SGD(list(filter(lambda p: p.requires_grad, model_resnet18.parameters())), lr=0.001, momentum=0.9)

    # Create LR scheduler that decays LR by a factor of 0.1 for every 7 epochs (this is from tutorial, might need tweaking)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
    
    return model_resnet18

print("Defined function to build model architecture.")

Defined function to build model architecture.


In [11]:
print("perform training")

# Validation scores for determining the best number of epochs
#    - The keys are the book names, and point to dicts that are indexed by epoch number
all_epoch_scores = {"train" : {}, "val" : {}}

num_training_epochs = 50

cross_val_loaders = {}
cross_val_loaders["test"] = {b : book_data_loaders[b]["test"] for b in test_books}

for val_book in books_in_data:
    
    # setup this cross val's data loaders
    train_books = books_in_data - set([val_book])
    
    cross_val_loaders["train"] = {b : book_data_loaders[b]["train"] for b in train_books}
    cross_val_loaders["val"] = {b : book_data_loaders[b]["val"] for b in [val_book]}
    
    # can customize this later to transfer from different models, freezing different numbers of layers, etc.
    model_architecture = create_model_architecture()
    
    criterion = nn.CrossEntropyLoss()

    # Need to create slightly custom optimizer since half of the layers are frozen
    optimizer = optim.SGD(list(filter(lambda p:
                                      p.requires_grad, model_architecture.parameters())),
                          lr=0.001, momentum=0.9)

    # Create LR scheduler that decays LR by a factor of 0.1 for every 7 epochs (this is from tutorial, might need tweaking)
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
    
    earlyStoppingCriteria = EarlyStopping(min_delta = 1e-4, patience=5)
    
    print(cross_val_loaders.keys())

    trained_model_weights, epoch_scores = train_model(model_architecture,
                                                      criterion,
                                                      optimizer,
                                                      exp_lr_scheduler,
                                                      cross_val_loaders,
                                                      num_epochs=num_training_epochs,
                                                      early_stopping = earlyStoppingCriteria)
    # Attempt to resolve memory issue
    gc.collect()
    # testing for memory management
    torch.cuda.empty_cache()
    
    for t in epoch_scores:
        all_epoch_scores[t][val_book] = epoch_scores[t]

print("training complete")

perform training
Transferring resnet18 and retraining with annotations dataset.
dict_keys(['test', 'val', 'train'])
Epoch 0/49
----------
train Loss: 1.7240 Acc: 0.7644
[[ 296  305]
 [ 274 1583]]

val Loss: 5.5554 Acc: 0.5556
[[  0 160]
 [  0 200]]

val_acc_loss = 0.4444444444444444


Epoch 1/49
----------
train Loss: 1.1598 Acc: 0.7661
[[ 262  339]
 [ 236 1621]]

val Loss: 5.0704 Acc: 0.5556
[[  0 160]
 [  0 200]]

val_acc_loss = 0.4444444444444444


Epoch 2/49
----------
train Loss: 0.9628 Acc: 0.7600
[[ 250  351]
 [ 239 1618]]

val Loss: 3.6160 Acc: 0.5556
[[  0 160]
 [  0 200]]

val_acc_loss = 0.4444444444444444


Epoch 3/49
----------
train Loss: 0.8782 Acc: 0.7543
[[ 226  375]
 [ 229 1628]]

val Loss: 3.1870 Acc: 0.5556
[[  0 160]
 [  0 200]]

val_acc_loss = 0.4444444444444444


Epoch 4/49
----------
train Loss: 0.8382 Acc: 0.7535
[[ 215  386]
 [ 220 1637]]

val Loss: 2.9672 Acc: 0.5556
[[  0 160]
 [  0 200]]

val_acc_loss = 0.4444444444444444


Epoch 5/49
----------
train Loss: 

MemoryError: Traceback (most recent call last):
  File "C:\Users\rahul\AppData\Local\conda\conda\envs\py35\lib\site-packages\torch\utils\data\dataloader.py", line 42, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "C:\Users\rahul\AppData\Local\conda\conda\envs\py35\lib\site-packages\torch\utils\data\dataloader.py", line 42, in <listcomp>
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "C:\Users\rahul\AppData\Local\conda\conda\envs\py35\lib\site-packages\torchvision\datasets\folder.py", line 124, in __getitem__
    img = self.transform(img)
  File "C:\Users\rahul\AppData\Local\conda\conda\envs\py35\lib\site-packages\torchvision\transforms\transforms.py", line 42, in __call__
    img = t(img)
  File "C:\Users\rahul\AppData\Local\conda\conda\envs\py35\lib\site-packages\torchvision\transforms\transforms.py", line 579, in __call__
    return transform(img)
  File "C:\Users\rahul\AppData\Local\conda\conda\envs\py35\lib\site-packages\torchvision\transforms\transforms.py", line 42, in __call__
    img = t(img)
  File "C:\Users\rahul\AppData\Local\conda\conda\envs\py35\lib\site-packages\torchvision\transforms\transforms.py", line 232, in __call__
    return self.lambd(img)
  File "C:\Users\rahul\AppData\Local\conda\conda\envs\py35\lib\site-packages\torchvision\transforms\transforms.py", line 562, in <lambda>
    transforms.append(Lambda(lambda img: F.adjust_hue(img, hue_factor)))
  File "C:\Users\rahul\AppData\Local\conda\conda\envs\py35\lib\site-packages\torchvision\transforms\functional.py", line 485, in adjust_hue
    np_h = np.array(h, dtype=np.uint8)
MemoryError


In [None]:
# Average scores over the books for each epoch
ave_val_scores = {t : {} for t in all_epoch_scores}
for t in all_epoch_scores:
    for epoch in range(num_training_epochs):
        count = 0
        cum = 0
        for book in all_epoch_scores[t]:
            if epoch in all_epoch_scores[t][book]:
                cum += all_epoch_scores[t][book][epoch]
                count += 1
        if count != 0:
            ave_val_scores[t][epoch] = (cum/count)

print(ave_val_scores)

best_epochs = {t : max(ave_val_scores[t], key=ave_val_scores[t].get) for t in ave_val_scores}
print(best_epochs)

In [None]:
weights_path = "resnet18_half_frozen_5epochs_transfer-state.pt"
torch.save(model_resnet18.state_dict(), weights_path)
print("saved model state (weights) to " + weights_path)

In [None]:
def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    for i, data in enumerate(dataloaders['test']):
        inputs, labels = data
        if use_gpu:
            inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
        else:
            inputs, labels = Variable(inputs), Variable(labels)

        outputs = model(inputs)
        _, preds = torch.max(outputs.data, 1)

        for j in range(inputs.size()[0]):
            images_so_far += 1
            ax = plt.subplot(num_images//2, 2, images_so_far)
            ax.axis('off')
            ax.set_title('predicted: {}'.format(class_names[preds[j]]))
            imshow(inputs.cpu().data[j])

            if images_so_far == num_images:
                model.train(mode=was_training)
                return
    model.train(mode=was_training)

visualize_model(model_resnet18)
print("visualizing model")

In [None]:
print("running on testing dataset")
model_resnet18.train(False)  # Set model to evaluate mode

running_loss = 0.0
running_corrects = 0

criterion = nn.CrossEntropyLoss()

# Iterate over data.
for data in dataloaders["test"]:
    # get the inputs
    inputs, labels = data

    # wrap them in Variable
    if use_gpu:
        inputs = Variable(inputs.cuda())
        labels = Variable(labels.cuda())
    else:
        inputs, labels = Variable(inputs), Variable(labels)

    # zero the parameter gradients
#     optimizer.zero_grad()

    # forward
    outputs = model_resnet18(inputs)
    _, preds = torch.max(outputs.data, 1)
    loss = criterion(outputs, labels)

    # backward + optimize only if in training phase
#     if phase == 'train':
#         loss.backward()
#         optimizer.step()

    # statistics
    running_loss += loss.data[0] * inputs.size(0)
    running_corrects += torch.sum(preds == labels.data)

epoch_loss = running_loss / dataset_sizes["test"]
epoch_acc = running_corrects / dataset_sizes["test"]

print('{} Loss: {:.4f} Acc: {:.4f}'.format(
    "test", epoch_loss, epoch_acc))