# Main ML code below

In [1]:
import numpy as np

from IPython.display import Image
from IPython.display import Markdown

import matplotlib
import matplotlib.pyplot as plt

import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

import torchvision.models as models
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader

import torch.optim as optim

from torch.autograd import Variable

In [2]:
# License: BSD
# Author: Sasank Chilamkurthy

# material from notebook at: https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

plt.ion()   # interactive mode

# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = '../../stanford_dogs_new/'

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          data_transforms[x])
                for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                              shuffle=True, num_workers=4)
               for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated
        
# # Get a batch of training data
# inputs, classes = next(iter(dataloaders['train']))
# 
# # Make a grid from batch
# out = torchvision.utils.make_grid(inputs)
# 
# imshow(out, title=[class_names[x] for x in classes])

def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()
    
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            
            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title('predicted: {}'.format(class_names[preds[j]]))
                imshow(inputs.cpu().data[j])
                
                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

In [3]:
vgg19 = models.vgg19(pretrained=True)

for param in vgg19.parameters():
    param.requires_grad = False

print(vgg19)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padd

In [6]:
# ref: https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html

vgg19.classifier[6] = nn.Linear(in_features=4096, out_features=120, bias=True)

vgg19 = vgg19.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opposed to before.
optimizer_vgg19 = optim.SGD(vgg19.parameters(), lr=0.001, momentum=0.9)
# optimizer_vgg19 = optim.Adamax(vgg19.parameters())

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_vgg19, step_size=7, gamma=0.1)

In [7]:
vgg19_30 = train_model(vgg19, criterion, optimizer_vgg19, exp_lr_scheduler, num_epochs=30)

# %


Epoch 0/59
----------
train Loss: 1.7241 Acc: 0.5777
val Loss: 0.7040 Acc: 0.7997

Epoch 1/59
----------
train Loss: 1.5910 Acc: 0.6375
val Loss: 0.5949 Acc: 0.8358

Epoch 2/59
----------
train Loss: 1.6388 Acc: 0.6451
val Loss: 0.6576 Acc: 0.8392

Epoch 3/59
----------
train Loss: 1.6604 Acc: 0.6571
val Loss: 0.7583 Acc: 0.8270

Epoch 4/59
----------
train Loss: 1.7007 Acc: 0.6543
val Loss: 0.6774 Acc: 0.8406

Epoch 5/59
----------
train Loss: 1.7150 Acc: 0.6654
val Loss: 0.7652 Acc: 0.8304

Epoch 6/59
----------
train Loss: 1.7341 Acc: 0.6670
val Loss: 0.7492 Acc: 0.8377

Epoch 7/59
----------
train Loss: 1.4793 Acc: 0.6950
val Loss: 0.5792 Acc: 0.8553

Epoch 8/59
----------
train Loss: 1.4142 Acc: 0.7054
val Loss: 0.5701 Acc: 0.8611

Epoch 9/59
----------
train Loss: 1.3626 Acc: 0.7068
val Loss: 0.5571 Acc: 0.8582

Epoch 10/59
----------
train Loss: 1.3323 Acc: 0.7080
val Loss: 0.5375 Acc: 0.8621

Epoch 11/59
----------
train Loss: 1.3263 Acc: 0.7065
val Loss: 0.5335 Acc: 0.8635

Ep

In [8]:
torch.save(vgg19_30.state_dict(), 'vgg19_based_model2.pt')

## Load the Model and test it

In [9]:
vgg19_loaded = models.vgg19(pretrained=True)

for param in vgg19_loaded.parameters():
    param.requires_grad = False

vgg19_loaded.fc = nn.Linear(in_features=4096, out_features=120, bias=True)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
vgg19_loaded = vgg19_60_loaded.to(device)



vgg19_loaded.load_state_dict(torch.load('vgg19_based_model2.pt'), strict=False)
vgg19_loaded.eval()

RuntimeError: Error(s) in loading state_dict for VGG:
	size mismatch for classifier.6.weight: copying a param with shape torch.Size([120, 4096]) from checkpoint, the shape in current model is torch.Size([1000, 4096]).
	size mismatch for classifier.6.bias: copying a param with shape torch.Size([120]) from checkpoint, the shape in current model is torch.Size([1000]).

## train vgg19 with Adamax optimizer

In [None]:
# ref: https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html

vgg19.classifier[6] = nn.Linear(in_features=4096, out_features=120, bias=True)

vgg19 = vgg19.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opposed to before.
# optimizer_vgg19 = optim.SGD(vgg19.parameters(), lr=0.001, momentum=0.9)
optimizer_vgg19_adamax = optim.Adamax(vgg19.parameters())

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_vgg19_adamax, step_size=7, gamma=0.1)

In [None]:
vgg19_adamax_30ep = train_model(vgg19, criterion, optimizer_vgg19_adamax, exp_lr_scheduler, num_epochs=30)

# %


In [None]:
torch.save(vgg19_adamax_30ep.state_dict(), 'vgg19_adamax_30ep_based_model2.pt')

## Load the Adamax optimizer based vgg19 model and test it

In [None]:
vgg19_adamax_30ep_loaded = models.vgg19(pretrained=True)

for param in vgg19_adamax_30ep_loaded.parameters():
    param.requires_grad = False

vgg19_adamax_30ep_loaded.fc = nn.Linear(in_features=4096, out_features=120, bias=True)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
vgg19_adamax_30ep_loaded = vgg19_adamax_30ep_loaded.to(device)



vgg19_adamax_30ep_loaded.load_state_dict(torch.load('vgg19_adamax_30ep_based_model2.pt'), strcit=False)
vgg19_adamax_30ep_loaded.eval()