**Intel Image Classification With Pytorch and Transfer Learning**

In this notebook, we will preprocess the intel images, we will create augemented images to increase the training dataset.

Then we will use ResNet pre-trained model from torchvision and used the model to train in on our dataset





# Introducing adversarial attack to the original notebook

The first half of the code is from https://www.kaggle.com/asollie/intel-image-multiclass-pytorch-94-test-acc

You can refer to my github to see the full explaination of the code
https://github.com/andylow1704/Adversarial_Attack

# Load Packages

In [None]:
from pathlib import Path
import numpy as np
import cv2
import pandas as pd
from tqdm import tqdm
import PIL.Image as Image
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from matplotlib.ticker import MaxNLocator
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from glob import glob
import shutil
from collections import defaultdict

import torch, torchvision
from torch import nn, optim #Torch NN(Conv, Pooling, etc.), Optimization.
from torch.optim import lr_scheduler #Optimization LR Scheduler.
import torch.nn.functional as F #NN Functional.
import torchvision.transforms as T #Torchvision Transforms.
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import models #Torchvistion Models.

%matplotlib inline

sns.set(style='whitegrid', palette='muted', font_scale=1.2)

HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF"]

sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))

rcParams['figure.figsize'] = 15, 10

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

# Train Folders

**We will get each label folder and we can see that we have 6 folders**

* buildings = 0 
* forest = 1
* glacier = 2
* mountain = 3
* sea = 4
* street = 5 

In [None]:
train_folders = sorted(glob('../input/intel-image-classification/seg_train/seg_train/*'))
len(train_folders)

# Load & View Images

**Here we build 3 helpers to load and view images**

In [None]:
def load_image(img_path, resize=True):
    img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
    
    if resize:
        img = cv2.resize(img, (64,64), interpolation = cv2.INTER_AREA)
    
    return img

def show_image(img_path):
    img = load_image(img_path)
    plt.imshow(img)
    plt.axis('off')
    
def show_sign_grid(image_paths):
    images = [load_image(img) for img in image_paths]
    images = torch.as_tensor(images)
    images = images.permute(0, 3, 1, 2)
    grid_img = torchvision.utils.make_grid(images, nrow = 11)
    plt.figure(figsize = (24, 12))
    plt.imshow(grid_img.permute(1, 2, 0))
    plt.axis('off')

**Sample of images for all classes we have**

In [None]:
#All Classes.
sample_images = [np.random.choice(glob(f'{tf}/*jpg')) for tf in train_folders]
show_sign_grid(sample_images)

In [None]:
#Buildings Class.
img_path = glob(f'{train_folders[0]}/*jpg')[1]
show_image(img_path)

# Train set images class distribution.

* buildings = 0 
* forest = 1
* glacier = 2
* mountain = 3
* sea = 4
* street = 5 

In [None]:
#Class Classification.
class_names = ['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']

class_indices = [0, 1, 2, 3, 4, 5]

**We will copy all the images to new dir, the purpose is to make it easier to torchvision dataset helpers to utilize the images**

In [None]:
#Train & Valid Directory.
!rm -rf data

DATA_DIR = Path('data')

DATASETS = ['train', 'val']

for ds in DATASETS:
    for cls in class_names:
        (DATA_DIR / ds / cls).mkdir(parents=True, exist_ok=True)

**We are going to reserve 80% for train and 20% for validation for each class, then copy them to the correct folder**

In [None]:
#Class Distribution & Split.
for i, cls_index in enumerate(class_indices):
    image_paths = np.array(glob(f'{train_folders[cls_index]}/*jpg')) #train_folders[cls_index]를 image_paths에 저장.
    class_name = class_names[i] #class_names[i]를 class_name에 저장.
    print(f'{class_name}: {len(image_paths)}') #class_name과 len(image_paths)을 pirnt.
    np.random.shuffle(image_paths)
    
    ds_split = np.split(
        image_paths,
        indices_or_sections = [int(.8 * len(image_paths)), int(.9 * len(image_paths))]
    )
    
    dataset_data = zip(DATASETS, ds_split)
    for ds, images in dataset_data:
        for img_path in images:
            shutil.copy(img_path, f'{DATA_DIR}/{ds}/{class_name}/')

# Requirement of ResNet

**Distribution of classes are good, the total per class ratio is not so high**

**We will apply some image augmentation techniques to artifically increase the size of dataset, we will apply some random resizing, rotation and horizontal flips, then we normalize the tensors using present values for each channel, this is requirement of ResNet**

**Requirement of ResNet에 맞게 변경하는 작업.**

In [None]:
#Transforms.
mean_nums = [0.485, 0.456, 0.406]
std_nums = [0.229, 0.224, 0.225]

transforms = {'train': T.Compose([
    T.RandomResizedCrop(size = 256),
    T.RandomRotation(degrees = 15),
    T.RandomHorizontalFlip(),
    T.ToTensor(),
    T.Normalize(mean_nums, std_nums)]),
    'val': T.Compose([
    T.Resize(size = 256),
    T.CenterCrop(size = 224),
    T.ToTensor(),
    T.Normalize(mean_nums, std_nums)]),
}

**Now we will create pytorch dataset for image dataset and dataloaders**

In [None]:
#Image Datasets.
image_datasets = {
    d: ImageFolder(f'{DATA_DIR}/{d}', transforms[d]) for d in DATASETS
}

#Data Loaders.
data_loaders = {
    d: DataLoader(image_datasets[d], batch_size=4, shuffle=True, num_workers=4) for d in DATASETS
}

In [None]:
print(len(data_loaders['train']))

**We will store each class total images for later use**

In [None]:
#Dataset Sizes.
dataset_sizes = {d: len(image_datasets[d]) for d in DATASETS}
class_names = image_datasets['train'].classes

dataset_sizes

**Lets have a look at some sample images with all the transformations**

In [None]:
#Transformations Sample Images.
def imshow(inp, title=None):
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([mean_nums])
    std = np.array([std_nums])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.axis('off')
    
inputs, classes = next(iter(data_loaders['train']))
out = torchvision.utils.make_grid(inputs)

imshow(out, title=[class_names[x] for x in classes])

> # Using pretrained model to classify the images

We will use the pre-trained ResNet to classify this images
1. We will import the model (import all weights and arch except we will change the output layer as number of output class is different from ResNet dataset)
2. Convert it into training mode
3. Train the model on new data
4. Evaluate
5. Hopefully celebrate :)

In [None]:
#Create Model
def create_model(n_classes):
    #resnet34, resnet152, wide_resnet101_2, resnext101_32x8d
    model = models.resnext101_32x8d(pretrained = True) 
    
    n_features = model.fc.in_features
    model.fc = nn.Linear(n_features, n_classes)
    
    return model.to(device)

In [None]:
base_model = create_model(len(class_names))

In [None]:
#Print Model Layer.
base_model

**We will create 3 helpers function to encapsualte train and eval func**

In [None]:
def train_epoch(model, data_loader, loss_fn, optimizer, device, scheduler, n_examples):
    model = model.train() #Convert to train mode
    losses = []
    correct_predictions = 0
    
    for inputs, labels in data_loader:
        inputs = inputs.to(device) #Push array to gpu
        labels = labels.to(device)
        
        outputs = model(inputs) #get prob of output per class
        
        _, preds = torch.max(outputs, dim=1) # get max of pred
        loss = loss_fn(outputs, labels) # get loss
        
        correct_predictions += torch.sum(preds==labels)
        
        losses.append(loss.item())
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    
    scheduler.step()
    
    return correct_predictions.double() / n_examples, np.mean(losses)

def eval_model(model, data_loader, loss_fn, device, n_examples):
    model = model.eval() #Evaluation mode
    
    losses = []
    correct_predictions = 0
    
    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            outputs = model(inputs)
            
            _, preds = torch.max(outputs, dim=1)
            
            loss = loss_fn(outputs, labels)
            
            correct_predictions += torch.sum(preds==labels)
            
            losses.append(loss.item())
    
    return correct_predictions.double() / n_examples, np.mean(losses) 

**Evaluation is simple, we don't even do gradient calculations**

In [None]:
def train_model(model, data_loaders, dataset_sizes, device, n_epochs=5):
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
    loss_fn = nn.CrossEntropyLoss().to(device)
    
    history = defaultdict(list)
    best_accuracy = 0
    
    for epoch in range(n_epochs):
        print(f'Epoch {epoch + 1}/{n_epochs}')
        print('-' * 10)
        
        train_acc, train_loss = train_epoch(model, data_loaders['train'], loss_fn, 
                                            optimizer, device, scheduler, dataset_sizes['train'])
        
        print(f'Train loss {train_loss} accuracy {train_acc}')
        
        val_acc, val_loss = eval_model(model, data_loaders['val'], loss_fn, device, dataset_sizes['val'])
        
        print(f'Val loss {val_loss} accuracy {val_acc}')
        print()
        
        history['train_acc'].append(train_acc)
        history['train_loss'].append(train_loss)
        history['val_acc'].append(val_acc)
        history['val_loss'].append(val_loss)
        
        if val_acc > best_accuracy:
            torch.save(model.state_dict(), 'best_model_state.bin')
            best_accuracy = val_acc
            
    print(f'Best val accuracy: {best_accuracy}')
    
    model.load_state_dict(torch.load('best_model_state.bin'))
    
    return model, history

In [None]:
%%time

base_model, history = train_model(base_model, data_loaders, dataset_sizes, device, n_epochs=10)

In [None]:
#Visualize Training History.
def plot_training_history(history):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 6))
    ax1.plot(history['train_loss'], label='train loss')
    ax1.plot(history['val_loss'], label='validation loss')
    
    ax1.xaxis.set_major_locator(MaxNLocator(integer=True))
    ax1.set_ylim([-0.05, 1.05])
    ax1.legend()
    ax1.set_ylabel('Loss')
    ax1.set_xlabel('Epoch')
    
    ax2.plot(history['train_acc'], label='train accuracy')
    ax2.plot(history['val_acc'], label='validation accuracy')
    
    ax2.xaxis.set_major_locator(MaxNLocator(integer=True))
    ax2.set_ylim([-0.05, 1.05])
    ax2.legend()
    ax2.set_ylabel('Accuracy')
    ax2.set_xlabel('Epoch')
    
    fig.suptitle('Training History')
    
plot_training_history(history)

# **Get test data from dataset folder for evaluation**

In [None]:
test_folders = sorted(glob('../input/intel-image-classification/seg_test/seg_test/*'))
len(test_folders)

In [None]:
#Show Random Sample Image.
sample_images = [np.random.choice(glob(f'{tf}/*jpg')) for tf in test_folders]
show_sign_grid(sample_images)

**Here i repeat the same data preprocess step as like the train dataset**

In [None]:
#Class Classification.
class_names = ['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']

class_indices = [0, 1, 2, 3, 4, 5]

#Test Directory.
DATASETS = ['test']

for ds in DATASETS:
    for cls in class_names:
        (DATA_DIR / ds / cls).mkdir(parents=True, exist_ok=True)

#Class Distribution & Split.
for i, cls_index in enumerate(class_indices):
    image_paths = np.array(glob(f'{test_folders[cls_index]}/*jpg'))
    class_name = class_names[i]
    print(f'{class_name}: {len(image_paths)}')
    np.random.shuffle(image_paths)
    
    ds_split = np.split(
        image_paths,
        indices_or_sections=[int(.8 * len(image_paths)), int(.9 * len(image_paths))]
    )
    
    dataset_data = zip(DATASETS, ds_split)
    for ds, images in dataset_data:
        for img_path in images:
            shutil.copy(img_path, f'{DATA_DIR}/{ds}/{class_name}/')

#Transforms.  
mean_nums = [0.485, 0.456, 0.406]
std_nums = [0.229, 0.224, 0.225]

transforms = {'test': T.Compose([
    T.Resize(size = 256),
    T.CenterCrop(size= 224),
    T.ToTensor(),
    T.Normalize(mean_nums, std_nums)]),
}

#Image Datasets.
image_datasets = {
    d: ImageFolder(f'{DATA_DIR}/{d}', transforms[d]) for d in DATASETS
}

#Data Loaders.
data_loaders = {
    d: DataLoader(image_datasets[d], batch_size=4, shuffle=True, num_workers=4) for d in DATASETS
}

#Dataset Sizes.
dataset_sizes = {d: len(image_datasets[d]) for d in DATASETS}
class_names = image_datasets['test'].classes

dataset_sizes

# Evaluation

In [None]:
#Show Predictions.
def show_predictions(model, class_names, n_images=6):
    model = model.eval()
    images_handeled = 0
    plt.figure()
    
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(data_loaders['test']):
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            outputs = model(inputs)
            
            _, preds = torch.max(outputs, 1)
            
            for j in range(inputs.shape[0]):
                images_handeled += 1
                ax = plt.subplot(2, n_images//2, images_handeled)
                ax.set_title(f'predicted: {class_names[preds[j]]}')
                imshow(inputs.cpu().data[j])
                ax.axis('off')
                
                if images_handeled == n_images:
                    return
                
show_predictions(base_model, class_names, n_images=8)

**So based on the above figure, it seems like there is 0 wrong prediction,and 8 is correct, that is not bad, now lets see the classification report to understand the bigger view of model performance**

In [None]:
#Classification Report.
def get_predictions(model, data_loader):
    model = model.eval()
    predictions = []
    real_values = []
    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            outputs = model(inputs)
            
            _, preds = torch.max(outputs, 1)
            predictions.extend(preds)
            real_values.extend(labels)
    predictions = torch.as_tensor(predictions).cpu()
    real_values = torch.as_tensor(real_values).cpu()
    
    return predictions, real_values

In [None]:
# Testing on unmodified images

y_pred, y_test = get_predictions(base_model, data_loaders['test'])

print(classification_report(y_test, y_pred, target_names=class_names))

# Introducing Adversarial attack on test dataset 

In [None]:
# FGSM attack code
def fgsm_attack(image, epsilon, data_grad):
    # Collect the element-wise sign of the data gradient
    sign_data_grad = data_grad.sign()
    # Create the perturbed image by adjusting each pixel of the input image
    perturbed_image = image + epsilon*sign_data_grad
    # Adding clipping to maintain [0,1] range
    perturbed_image = torch.clamp(perturbed_image, 0, 1)
    # Return the perturbed image
    return perturbed_image

In [None]:
## Full modified images 

def get_predictions_adv(model, data_loader,epsilon):
    
    model = model.eval()
    predictions = []
    real_values = []
    loss_fn = nn.CrossEntropyLoss().to(device) # Will not be using this 
    
      
    for inputs, labels in data_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        # Set requires_grad attribute of tensor. Important for Attack
        inputs.requires_grad = True
            
        # Forward pass the data through the model
        outputs = model(inputs)
        
        
        # Calculate the loss
        loss = loss_fn(outputs, labels)

        # Zero all existing gradients
        model.zero_grad()

        # Calculate gradients of model in backward pass
        loss.backward()

        # Collect datagrad
        data_grad = inputs.grad.data

        # Call FGSM Attack
        perturbed_data = fgsm_attack(inputs, epsilon, data_grad)
        
        # Re-classify the perturbed image
        outputs = model(perturbed_data)
        
        ## New output  
        
        _, preds = torch.max(outputs, dim=1) # get max of pred
        
        predictions.extend(preds)
        real_values.extend(labels)
             
            
    predictions = torch.as_tensor(predictions).cpu()
    real_values = torch.as_tensor(real_values).cpu()
    
    return predictions, real_values

In [None]:
## Testing model on full modified images 

from sklearn.metrics import accuracy_score

epsilons = [0, .05, .1, .15, .2, .25, .3]
accuracy = [] 

# Run test for each epsilon
for eps in epsilons:
    print("For Epsilon =" +str(eps))
    y_pred, y_test = get_predictions_adv(base_model, data_loaders['test'],eps)
    
    acc = accuracy_score(y_pred,y_test)
    accuracy.append(acc)
    
    print(classification_report(y_test, y_pred, target_names=class_names))


In [None]:
## Plot epsilon accuracy graph

plt.figure(figsize=(5,5))
plt.plot(epsilons, accuracy, "*-")
plt.yticks(np.arange(0, 1.1, step=0.1))
plt.xticks(np.arange(0, .35, step=0.05))
plt.title("Accuracy vs Epsilon")
plt.xlabel("Epsilon")
plt.ylabel("Accuracy")
plt.show()

# Testing on mixture of modified and unmodified images

In [None]:
## Test sets with combination of modified images and unmodified images 
import random

def get_predictions_random(model, data_loader):
    
    model = model.eval()
    predictions = []
    real_values = []
    
    loss_fn = nn.CrossEntropyLoss().to(device)
      
    for inputs, labels in data_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
    
        # Get probability 
        prob = random.uniform(0,1)
        
        if prob <0.5:
            
            ##Generate random epsilon
            epsilon = random.uniform(0,0.3)
            
            # Set requires_grad attribute of tensor. Important for Attack
            inputs.requires_grad = True

            # Forward pass the data through the model
            outputs = model(inputs)


            # Calculate the loss
            loss = loss_fn(outputs, labels)

            # Zero all existing gradients
            model.zero_grad()

            # Calculate gradients of model in backward pass
            loss.backward()

            # Collect datagrad
            data_grad = inputs.grad.data

            # Call FGSM Attack
            perturbed_data = fgsm_attack(inputs, epsilon, data_grad)

            # Re-classify the perturbed image
            outputs = model(perturbed_data)

            ## New output  
            _, preds = torch.max(outputs, 1)
        
        
            predictions.extend(preds)
            real_values.extend(labels)
            
        else: 
            outputs = model(inputs)
            
            _, preds = torch.max(outputs, 1)
            predictions.extend(preds)
            real_values.extend(labels)
            
            
    predictions = torch.as_tensor(predictions).cpu()
    real_values = torch.as_tensor(real_values).cpu()
    
    return predictions, real_values


In [None]:
# Report using combination of modified and unmodified images

y_pred, y_test = get_predictions_random(base_model, data_loaders['test'])
print(classification_report(y_test, y_pred, target_names=class_names))

# Testing on full randomly modified images

In [None]:
## Test sets with full modified images

def get_predictions_adv2(model, data_loader):
    
    model = model.eval()
    predictions = []
    real_values = []
    
    loss_fn = nn.CrossEntropyLoss().to(device)
      
    for inputs, labels in data_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
    
    
            
        ##Generate random epsilon
        epsilon = random.uniform(0,0.3)
            
        # Set requires_grad attribute of tensor. Important for Attack
        inputs.requires_grad = True

        # Forward pass the data through the model
        outputs = model(inputs)


        # Calculate the loss
        loss = loss_fn(outputs, labels)

        # Zero all existing gradients
        model.zero_grad()

        # Calculate gradients of model in backward pass
        loss.backward()

        # Collect datagrad
        data_grad = inputs.grad.data

        # Call FGSM Attack
        perturbed_data = fgsm_attack(inputs, epsilon, data_grad)

        # Re-classify the perturbed image
        outputs = model(perturbed_data)

        ## New output  
        _, preds = torch.max(outputs, 1)
        
        
        predictions.extend(preds)
        real_values.extend(labels)
            
            
            
            
    predictions = torch.as_tensor(predictions).cpu()
    real_values = torch.as_tensor(real_values).cpu()
    
    return predictions, real_values

In [None]:
# Report for the full modified images 

y_pred, y_test = get_predictions_adv2(base_model, data_loaders['test'])
print(classification_report(y_test, y_pred, target_names=class_names))

# Training a model with mixture of modified and unmodified images

In [None]:
def train_epoch(model, data_loader, loss_fn, optimizer, device, scheduler, n_examples):
    model = model.train() #Convert to train mode
    losses = []
    correct_predictions = 0
    
    for inputs, labels in data_loader:
        
        prob = random.uniform(0,1) ## generating probability
        
        inputs = inputs.to(device) #Push array to gpu
        labels = labels.to(device)
        
        if prob < 0.5:
            
            epsilon = random.uniform(0,0.3) ## generating random epsilon 
        
            # Set requires_grad attribute of tensor. Important for Attack
            inputs.requires_grad = True

            # Forward pass the data through the model
            outputs = model(inputs) 


            # Calculate the loss
            loss = loss_fn(outputs, labels)

            # Zero all existing gradients
            model.zero_grad()

            # Calculate gradients of model in backward pass
            loss.backward()

            # Collect datagrad
            data_grad = inputs.grad.data

            # Call FGSM Attack
            perturbed_data = fgsm_attack(inputs, epsilon, data_grad)
            
            # Re-classify the perturbed image
            outputs = model(perturbed_data)
           
            
            _, preds = torch.max(outputs, dim=1) # get max of pred
            correct_predictions += torch.sum(preds==labels)
                
            
            loss = loss_fn(outputs, labels) # get loss
            losses.append(loss.item())
        
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            
        else : 

            outputs = model(inputs) #get prob of output per class


            loss = loss_fn(outputs, labels) # get loss
            losses.append(loss.item())
            
            _, preds = torch.max(outputs, dim=1) # get max of pred
            correct_predictions += torch.sum(preds==labels)
            

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

    scheduler.step()
    
    return correct_predictions.double() / n_examples, np.nanmean(losses)



def eval_model(model, data_loader, loss_fn, device, n_examples):
    model = model.eval() #Evaluation mode
    losses = []
    correct_predictions = 0
    
    
    for inputs, labels in data_loader:
        
        prob = random.uniform(0,1) ## generating probability
        
        inputs = inputs.to(device)
        labels = labels.to(device)
    
        
        if prob <0.5:
            
            epsilon = random.uniform(0,0.3) ## generating random epsilon 
        
            # Set requires_grad attribute of tensor. Important for Attack
            inputs.requires_grad = True

            # Forward pass the data through the model
            outputs = model(inputs)
            
        
            # Calculate the loss
            loss = loss_fn(outputs, labels)

            # Zero all existing gradients
            model.zero_grad()

            # Calculate gradients of model in backward pass
            loss.backward()

            # Collect datagrad
            data_grad = inputs.grad.data

            # Call FGSM Attack
            perturbed_data = fgsm_attack(inputs, epsilon, data_grad)
            
            # Re-classify the perturbed image
            outputs = model(perturbed_data)

            _, preds = torch.max(outputs, dim=1) 

            correct_predictions += torch.sum(preds==labels)
            
            loss = loss_fn(outputs,labels)
            losses.append(loss.item())
        
        else:
        
            outputs = model(inputs)
      
            loss = loss_fn(outputs,labels)
            losses.append(loss.item())
            
            _, preds = torch.max(outputs, dim=1)
            
            correct_predictions += torch.sum(preds==labels)
          
        
    return correct_predictions.double() / n_examples, np.nanmean(losses) 

In [None]:
def train_model(model, data_loaders, dataset_sizes, device, n_epochs=5):
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
    loss_fn = nn.CrossEntropyLoss().to(device) 
    history = defaultdict(list)
    best_accuracy = 0
    
    for epoch in range(n_epochs):
        print(f'Epoch {epoch + 1}/{n_epochs}')
        print('-' * 10)
        
        train_acc, train_loss = train_epoch(model, data_loaders['train'], loss_fn, 
                                            optimizer, device, scheduler, dataset_sizes['train'])
        
        print(f'Train loss {train_loss} accuracy {train_acc}')
        
        val_acc, val_loss = eval_model(model, data_loaders['val'], loss_fn, device, dataset_sizes['val'])
        
        print(f'Val loss {val_loss} accuracy {val_acc}')
        print()
        
        history['train_acc'].append(train_acc)
        history['train_loss'].append(train_loss)
        history['val_acc'].append(val_acc)
        history['val_loss'].append(val_loss)
        
        if val_acc > best_accuracy:
            torch.save(model.state_dict(), 'best_model_state.bin')
            best_accuracy = val_acc
            
    print(f'Best val accuracy: {best_accuracy}')
    
    model.load_state_dict(torch.load('best_model_state.bin'))
    
    return model, history

In [None]:
%%time

base_model, history = train_model(base_model, data_loaders, dataset_sizes, device, n_epochs=10)

In [None]:
#Visualize Training History.
def plot_training_history(history):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 6))
    ax1.plot(history['train_loss'], label='train loss')
    ax1.plot(history['val_loss'], label='validation loss')
    
    ax1.xaxis.set_major_locator(MaxNLocator(integer=True))
    ax1.set_ylim([-0.05, 1.05])
    ax1.legend()
    ax1.set_ylabel('Loss')
    ax1.set_xlabel('Epoch')
    
    ax2.plot(history['train_acc'], label='train accuracy')
    ax2.plot(history['val_acc'], label='validation accuracy')
    
    ax2.xaxis.set_major_locator(MaxNLocator(integer=True))
    ax2.set_ylim([-0.05, 1.05])
    ax2.legend()
    ax2.set_ylabel('Accuracy')
    ax2.set_xlabel('Epoch')
    
    fig.suptitle('Training History')
    
plot_training_history(history)

# Testing model using a combination of unmodified and modified images

In [None]:
## Result(Mixture of modified and unmodified images)

y_pred, y_test = get_predictions_random(base_model, data_loaders['test'])
print(classification_report(y_test, y_pred, target_names=class_names))

# Testing model using full set of unmodified images

In [None]:
## Result(Full randomly modified images)

y_pred, y_test = get_predictions_adv2(base_model, data_loaders['test'])
print(classification_report(y_test, y_pred, target_names=class_names))