# AIM5004__Assignment #2

* 2022 , 

* Training Convolutional Neural Networks (CNN). You are going to write codes in python with whichever deep learning libraries you prefer to use, e.g. pytorch, tensorflow, keras, jax, mxnet, and so on. (10pts)

- - -

## Question - a

(a) Download CIFAR-10 dataset from https://www.cs.toronto.edu/~kriz/cifar.html
and report the statistics of the dataset, e.g. how many training (and testing) images,
the size of each image, the number of class and the number of images per each classes.
Also show random 5 images from each classes. Report the mean and standard deviation
of the training datasets for each color channels (R,G,B). (1 pts)

In [None]:
import os
import time
import random
import collections
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms

In [None]:
from torchvision import models
from efficientnet_pytorch import EfficientNet

### Arguments settings

In [None]:
class Args:
    # arugments
    epochs=5
    bs=4
    lr=0.001
    momentum=0.9
    num_channels=3  # due to RGB channels(image)
    num_classes=10  # total 10 classes in CIFAR-10
    verbose='store_true'
    seed=710674

args = Args()    

np.random.seed(args.seed)
random.seed(args.seed)
torch.manual_seed(args.seed)

In [None]:
##Setting torch environment

if torch.cuda.is_available():
    DEVICE = torch.device('cuda')
else:
    DEVICE = torch.device('cpu')
    
print('Using PyTorch version:', torch.__version__, ' Device: ', DEVICE)

### Raw Data Loading

In [None]:
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True)

### Mean & Standard Deviation

In [None]:
data = train_dataset.data / 255

In [None]:
mean = data.mean(axis = (0,1,2)) 
std = data.std(axis = (0,1,2))
print(f"Mean : {mean}   STD: {std}") 

In [None]:
print("The mean value of the Channel 1 is", mean[0])
print("and the standard deviation of channel 1 is", std[0])
print("The mean value of the Channel 2 is", mean[1])
print("and the standard deviation of channel 2 is", std[1])
print("The mean value of the Channel 3 is", mean[2])
print("and the standard deviation of channel 3 is", std[2])

### Prepare Final Dataset

In [None]:
## Data Transforms
data_transforms = transforms.Compose([
    transforms.ToTensor()
])

data_transforms_nor = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.49139968, 0.48215841, 0.44653091], [0.24703223, 0.24348513, 0.26158784])
])

In [None]:
## Loading dataset again without normalize.
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=data_transforms)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=data_transforms)

In [None]:
## Loading dataset again with normalize.
train_dataset_nor = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=data_transforms_nor)
test_dataset_nor = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=data_transforms_nor)

In [None]:
## Set trainloader and testloader for torch training
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.bs, shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.bs, shuffle=False, num_workers=4)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
## Checking train_dataset
train_dataset

In [None]:
## Checking test_dataset
test_dataset

### Basic information of CIFAR-10

In [None]:
print("The number of train dataset image is:", train_dataset.data.shape[0])
print("The number of test dataset image is:", test_dataset.data.shape[0])
print("The number of train dataset label is:", len(collections.Counter(train_dataset.targets).keys()))
print("The number of train dataset label is:", len(collections.Counter(test_dataset.targets).keys()))
print("The input image size of CIFAR 10 dataset is", train_dataset.data.shape[1], "x", train_dataset.data.shape[2])
print("The number of dataset in each label for train_dataset is ", collections.Counter(train_dataset.targets))
print("The number of dataset in each label for test_dataset is ", collections.Counter(test_dataset.targets))

### Sample images

- - -

## Question - b

(b) Design a CNN architecture and write the training codes with the following hyperparameters. Provide a training loss curve (x-axis: the number of training iteration, y-axis: loss value) and a testing accuracy curve (x-axis: the number of training iteration, y-axis: classification accuracy on testing dataset). (2 pts)

(1) CNN architecture:
>[conv1] → input channels: 3, output channels: 6, kernel size: 5, padding: 0, stride: 1\
[max pooling] → kernel size: 2, stride: 2\
[conv2] → input channels: 6, output channels: 16, kernel size: 5, padding: 0, stride: 1\
[fully connected layer1] → output channels: 120\
[fully connected layer2] → output channels: 84\
[fully connected layer3] → output channels: 10

(2) activation function: ReLU\
(3) loss function: cross entropy loss\
(4) optimization algorithm: SGD\
(5) learning rate: 0.001\
(6) momentum: 0.9\
(7) batch size: 4\
(8) The number of training epoch: 5

In [None]:
dataiter = iter(train_loader)
images, labels = dataiter.next()
print(labels)

In [None]:
print("Batch size is", args.bs)
print("Learning rate is", args.lr)
print("Momentum is ", args.momentum)

### CNN model architecture

In [None]:
## CIFAR-10 CNN model architecture design
class CIFAR_CNN(nn.Module):
    def __init__(self):
        super(CIFAR_CNN, self).__init__()
        ## convolution layers
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, stride=1, padding=0)
        ##### 28x28
        self.pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)
        ##### 14x14
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0)
        ##### 10x10
        
        ## fully connected layers
        self.fc1 = nn.Linear(10*10*16, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, args.num_classes)
    
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool1(x)
        x = self.conv2(x)
        x = F.relu(x)
        
        x = x.view(-1, 10*10*16)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        return x

In [None]:
model = CIFAR_CNN().to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = args.lr, momentum=args.momentum)
print(model)

In [None]:
## Set trainloader and testloader for torch training
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.bs, shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.bs, shuffle=False, num_workers=4)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

### Training procedures

In [None]:
# Function for checking model performance during CNN model

def train(model, train_loader, optimizer, log_interval):
    model.train()
    print(optimizer.param_groups[0]['lr'])
    for batch_idx, (image, label) in enumerate(train_loader):
        image = image.to(DEVICE)
        label = label.to(DEVICE)
        optimizer.zero_grad()
        output = model(image)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()

        if batch_idx % log_interval == 0:
            print("Train Epoch: {} [{}/{} ({:.0f}%)]\tTrain Loss: {:.6f}".format(
                epoch, batch_idx * len(image), 
                len(train_loader.dataset), 100. * batch_idx / len(train_loader), 
                loss.item()))

In [None]:
# Function for checking model performance during the learning process

def evaluate(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad():
        for image, label in test_loader:
            image = image.to(DEVICE)
            label = label.to(DEVICE)
            output = model(image)
            test_loss += criterion(output, label).item()
            prediction = output.max(1, keepdim = True)[1]
            correct += prediction.eq(label.view_as(prediction)).sum().item()
    
    test_loss /= (len(test_loader)) 
    test_accuracy = 100. * correct / len(test_loader.dataset)
    
    return test_loss, test_accuracy

In [None]:
# Checking train, val loss and accuracy
los_total = []
acc_total = []


for epoch in range(1, args.epochs):
    train(model, train_loader, optimizer, log_interval = 200)
    val_loss, validation_accuracy = evaluate(model, test_loader)
    print("\n[EPOCH: {}], \tTest Loss: {:.4f}, \tTest Accuracy: {:.2f} % \n".format(
        epoch, val_loss, validation_accuracy))
    
    los_total.append(val_loss)
    acc_total.append(validation_accuracy)

### Accuracy and Loss plots

In [None]:
## Accuracy Graphs
plt.rc('font', family='Times New Roman', serif='Times')
plt.rc('xtick', labelsize=16)
plt.rc('ytick', labelsize=16)
plt.rc('axes', labelsize=20)

fig, ax = plt.subplots()
fig.subplots_adjust(left=.15, bottom=-1.16, right=1.99, top=.97)

## edit the plot list here
plt.plot(range(args.epochs -1), acc_total)


ax.set_ylabel('Accuracy')
ax.set_xlabel('Number of Epochs')
ax.legend(['CNN Accuracy'],fontsize=15)
sns.set_style('whitegrid')

In [None]:
## Loss Graphs
plt.rc('font', family='Times New Roman', serif='Times')
plt.rc('xtick', labelsize=16)
plt.rc('ytick', labelsize=16)
plt.rc('axes', labelsize=20)

fig, ax = plt.subplots()
fig.subplots_adjust(left=.15, bottom=-1.16, right=1.99, top=.97)

## edit the plot list here
plt.plot(range(args.epochs -1), los_total, '-r')


ax.set_ylabel('Loss')
ax.set_xlabel('Number of Epochs')
ax.legend(['CNN Loss'],fontsize=15)
sns.set_style('whitegrid')

- - - 

## Question - c

(c) Normalize the inputs with mean and standard deviation computed in (1). Compare the training loss and testing accuracy curves by drawing two curves in a same plot. (1pts)

### Normalize

In [None]:
## Loading dataset again with normalize.
train_dataset_nor = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=data_transforms_nor)
test_dataset_nor = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=data_transforms_nor)

In [None]:
## Set trainloader and testloader for torch training
train_loader_nor = torch.utils.data.DataLoader(train_dataset_nor, batch_size=args.bs, shuffle=True, num_workers=4)
test_loader_nor = torch.utils.data.DataLoader(test_dataset_nor, batch_size=args.bs, shuffle=False, num_workers=4)

In [None]:
model = CIFAR_CNN().to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = args.lr, momentum=args.momentum)
print(model)

### Training procedures

In [None]:
# Function for checking model performance during CNN model

def train(model, train_loader_nor, optimizer, log_interval):
    model.train()
    print(optimizer.param_groups[0]['lr'])
    for batch_idx, (image, label) in enumerate(train_loader_nor):
        image = image.to(DEVICE)
        label = label.to(DEVICE)
        optimizer.zero_grad()
        output = model(image)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()

        if batch_idx % log_interval == 0:
            print("Train Epoch: {} [{}/{} ({:.0f}%)]\tTrain Loss: {:.6f}".format(
                epoch, batch_idx * len(image), 
                len(train_loader_nor.dataset), 100. * batch_idx / len(train_loader_nor), 
                loss.item()))

In [None]:
# Function for checking model performance during the learning process

def evaluate(model, test_loader_nor):
    model.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad():
        for image, label in test_loader_nor:
            image = image.to(DEVICE)
            label = label.to(DEVICE)
            output = model(image)
            test_loss += criterion(output, label).item()
            prediction = output.max(1, keepdim = True)[1]
            correct += prediction.eq(label.view_as(prediction)).sum().item()
    
    test_loss /= (len(test_loader_nor)) 
    test_accuracy = 100. * correct / len(test_loader_nor.dataset)
    
    return test_loss, test_accuracy

In [None]:
## Checking train, val loss and accuracy
los_total_nor = []
acc_total_nor = []


for epoch in range(1, args.epochs):
    train(model, train_loader_nor, optimizer, log_interval = 200)
    val_loss, validation_accuracy = evaluate(model, test_loader_nor)
    print("\n[EPOCH: {}], \tTest Loss: {:.4f}, \tTest Accuracy: {:.2f} % \n".format(
        epoch, val_loss, validation_accuracy))
    
    los_total_nor.append(val_loss)
    acc_total_nor.append(validation_accuracy)

### Accuracy and Loss plots

In [None]:
## Accuracy Graphs
plt.rc('font', family='Times New Roman', serif='Times')
plt.rc('xtick', labelsize=16)
plt.rc('ytick', labelsize=16)
plt.rc('axes', labelsize=20)

fig, ax = plt.subplots()
fig.subplots_adjust(left=.15, bottom=-1.16, right=1.99, top=.97)

## edit the plot list here
plt.plot(range(args.epochs -1), acc_total)
plt.plot(range(args.epochs -1), acc_total_nor)


ax.set_ylabel('Accuracy')
ax.set_xlabel('Number of Epochs')
ax.legend(['Basic Accuracy', 'Normalize Accuracy'],fontsize=15)
sns.set_style('whitegrid')

In [None]:
## Loss Graphs
plt.rc('font', family='Times New Roman', serif='Times')
plt.rc('xtick', labelsize=16)
plt.rc('ytick', labelsize=16)
plt.rc('axes', labelsize=20)

fig, ax = plt.subplots()
fig.subplots_adjust(left=.15, bottom=-1.16, right=1.99, top=.97)

## edit the plot list here
plt.plot(range(args.epochs -1), los_total)
plt.plot(range(args.epochs -1), los_total_nor)


ax.set_ylabel('Loss')
ax.set_xlabel('Number of Epochs')
ax.legend(['Basic Loss', 'Normalize Loss'],fontsize=15)
sns.set_style('whitegrid')

 - - -

## Question - d

(d) Train with a MLP architecture that has the same number of layers (4 layers, each layers have 128 hidden units, ReLU activation function). Compare the training loss and testing accuracy by drawing two curves in a same plot. (2 pts)

In [None]:
## Set trainloader and testloader for torch training
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.bs, shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.bs, shuffle=False, num_workers=4)

### MLP model architecture

In [None]:
class CIFAR_MLP(nn.Module):
    def __init__(self):
        super(CIFAR_MLP, self).__init__()
        self.layer1 = nn.Linear(32 * 32 * 3, 128)
        self.layer2 = nn.Linear(128, 128)
        self.layer3 = nn.Linear(128, 128)
        self.layer4 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(-1, 32 * 32 * 3)
        x = self.layer1(x)
        x = F.relu(x)
        x = self.layer2(x)
        x = F.relu(x)
        x = self.layer3(x)
        x = F.relu(x)
        x = self.layer4(x)
        x = F.log_softmax(x, dim=1)
        return x

In [None]:
model_MNIST = CIFAR_MLP()
model_MLP = model_MNIST.to(DEVICE)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model_MLP.parameters(), lr = args.lr, momentum=args.momentum)
print(model_MLP)

### Training procedure

In [None]:
# Checking train, val loss and accuracy
los_total_mlp = []
acc_total_mlp = []

for epoch in range(1, args.epochs):
    train(model_MLP, train_loader, optimizer, log_interval = 200)
    val_loss, validation_accuracy = evaluate(model, test_loader)
    print("\n[EPOCH: {}], \tTest Loss: {:.4f}, \tTest Accuracy: {:.2f} % \n".format(
        epoch, val_loss, validation_accuracy))
    
    los_total_mlp.append(val_loss)
    acc_total_mlp.append(validation_accuracy)

### Accuracy and Loss plots

In [None]:
## Accuracy Graphs
plt.rc('font', family='Times New Roman', serif='Times')
plt.rc('xtick', labelsize=16)
plt.rc('ytick', labelsize=16)
plt.rc('axes', labelsize=20)

fig, ax = plt.subplots()
fig.subplots_adjust(left=.15, bottom=-1.16, right=1.99, top=.97)

## edit the plot list here
plt.plot(range(args.epochs -1), acc_total)
plt.plot(range(args.epochs -1), acc_total_mlp)


ax.set_ylabel('Accuracy')
ax.set_xlabel('Number of Epochs')
ax.legend(['Basic Accuracy', 'MLP Accuracy'],fontsize=15)
sns.set_style('whitegrid')

In [None]:
## Loss Graphs
plt.rc('font', family='Times New Roman', serif='Times')
plt.rc('xtick', labelsize=16)
plt.rc('ytick', labelsize=16)
plt.rc('axes', labelsize=20)

fig, ax = plt.subplots()
fig.subplots_adjust(left=.15, bottom=-1.16, right=1.99, top=.97)

## edit the plot list here
plt.plot(range(args.epochs -1), los_total)
plt.plot(range(args.epochs -1), los_total_mlp)


ax.set_ylabel('Loss')
ax.set_xlabel('Number of Epochs')
ax.legend(['Basic Loss', 'MLP Loss'],fontsize=15)
sns.set_style('whitegrid')

 - - -

## Question - e

(e) Train with Adam optimizer and compare the training loss and testing accuracy by drawing two curves in a same plot. (2 pts)

### Adam Optimizer 

In [None]:
model = CIFAR_CNN().to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = args.lr)
print(model)

### Training procedures

In [None]:
# Checking train, val loss and accuracy
los_total_adam = []
acc_total_adam = []


for epoch in range(1, args.epochs):
    train(model, train_loader, optimizer, log_interval = 200)
    val_loss, validation_accuracy = evaluate(model, test_loader)
    print("\n[EPOCH: {}], \tTest Loss: {:.4f}, \tTest Accuracy: {:.2f} % \n".format(
        epoch, val_loss, validation_accuracy))
    
    los_total_adam.append(val_loss)
    acc_total_adam.append(validation_accuracy)

### Accuracy and Loss plots

In [None]:
## Accuracy Graphs
plt.rc('font', family='Times New Roman', serif='Times')
plt.rc('xtick', labelsize=16)
plt.rc('ytick', labelsize=16)
plt.rc('axes', labelsize=20)

fig, ax = plt.subplots()
fig.subplots_adjust(left=.15, bottom=-1.16, right=1.99, top=.97)

## edit the plot list here
plt.plot(range(args.epochs -1), acc_total)
plt.plot(range(args.epochs -1), acc_total_adam)


ax.set_ylabel('Accuracy')
ax.set_xlabel('Number of Epochs')
ax.legend(['SGD Accuracy', 'ADAM Accuracy'],fontsize=15)
sns.set_style('whitegrid')

In [None]:
## Loss Graphs
plt.rc('font', family='Times New Roman', serif='Times')
plt.rc('xtick', labelsize=16)
plt.rc('ytick', labelsize=16)
plt.rc('axes', labelsize=20)

fig, ax = plt.subplots()
fig.subplots_adjust(left=.15, bottom=-1.16, right=1.99, top=.97)

## edit the plot list here
plt.plot(range(args.epochs -1), los_total)
plt.plot(range(args.epochs -1), los_total_adam)


ax.set_ylabel('Loss')
ax.set_xlabel('Number of Epochs')
ax.legend(['SGD Loss', 'ADAM Loss'],fontsize=15)
sns.set_style('whitegrid')

- - -

## Question - f

(f) Change the hyperparameters and network architectures to achieve better training loss and testing accuracy curves. Provide the final architecture and hyperparameters that you used. (2 pts)

### Arguments settings

In [None]:
class Args:
    # arugments
    epochs=10
    bs=16
    lr=0.001
    momentum=0.9
    num_channels=3  # due to RGB channels(image)
    num_classes=10  # total 10 classes in CIFAR-10
    verbose='store_true'
    seed=710674

args = Args()    

np.random.seed(args.seed)
random.seed(args.seed)
torch.manual_seed(args.seed)

### Data Transforms

In [None]:
data_transforms_my = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.RandomHorizontalFlip(),
#     transforms.RandomVerticalFlip(),
    transforms.ColorJitter(contrast=(0.3,1), saturation=(0.3,1)),
    transforms.ToTensor(),
    transforms.Normalize([0.49139968, 0.48215841, 0.44653091], [0.24703223, 0.24348513, 0.26158784])
])

In [None]:
## Loading dataset again with normalize.
train_dataset_my = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=data_transforms_my)
test_dataset_my = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=data_transforms_my)

In [None]:
## Set trainloader and testloader for torch training
train_loader_my = torch.utils.data.DataLoader(train_dataset_my, batch_size=args.bs, shuffle=True, num_workers=4)
test_loader_my = torch.utils.data.DataLoader(test_dataset_my, batch_size=args.bs, shuffle=False, num_workers=4)

In [None]:
dataiter = iter(train_loader_my)
images, labels = dataiter.next()
print(labels)

### My model Architecture

In [None]:
## Designing simple CNN model architecture.
class CNN_my(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(CNN_my, self).__init__()

        def conv_batch(input_size, output_size, stride):
            return nn.Sequential(
                nn.Conv2d(input_size, output_size, 3, stride, 1, bias=False),
                nn.BatchNorm2d(output_size),
                nn.ReLU(inplace=True)
                )

        def conv_depth(input_size, output_size, stride):
            return nn.Sequential(
                nn.Conv2d(input_size, input_size, 3, stride, 1, groups=input_size, bias=False),
                nn.BatchNorm2d(input_size),
                nn.ReLU(inplace=True),
                
                nn.Conv2d(input_size, output_size, 1, 1, 0, bias=False),
                nn.BatchNorm2d(output_size),
                nn.ReLU(inplace=True),
                )

        self.model = nn.Sequential(
            conv_batch(3, 32, 2),
            conv_depth(32, 64, 1),
            conv_depth(64, 128, 2),
            conv_depth(128, 128, 1),
            conv_depth(128, 256, 2),
            conv_depth(256, 256, 1),
            conv_depth(256, 512, 2),
            conv_depth(512, 512, 1),
            conv_depth(512, 512, 1),
            conv_depth(512, 1024, 2),
            conv_depth(1024, 1024, 1),
            nn.AdaptiveAvgPool2d(1)
        )
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.model(x)
        x = x.view(-1, 1024)
        x = self.fc1(x)
        x = self.fc2(x)
        return x


### Pre-trained models

In [None]:
# model_res = models.resnet18(num_classes=2, pretrained=True)
model_eff3 = EfficientNet.from_pretrained('efficientnet-b3', num_classes=args.num_classes)
model_resnet18 = models.resnet18(pretrained=True)
model_mobnetv2 = models.mobilenet_v2(pretrained=True)

In [None]:
model_resnet18.eval()

In [None]:
model_resnet18.fc = nn.Linear(in_features = 512, out_features = 256)
model_mobnetv2.classifier = nn.Linear(in_features = 1280, out_features=args.num_classes)

In [None]:
add_layer = nn.Linear(in_features = 256, out_features = args.num_classes)

In [None]:
model_resnet.fc = nn.Sequential(model_resnet18, add_layer)

In [None]:
model_resnet

### Finalize the training model

In [None]:
# model_my = CNN_my(args.num_channels, num_classes = args.num_classes).to(DEVICE)
# model_my = CIFAR_LeNet().to(DEVICE)
model_my = model_eff3.to(DEVICE)
# model_my = model_mobnetv2.to(DEVICE)
# model_my = model_resnet18.to(DEVICE)

### Optimizer and Scheduler

In [None]:
# Setting Optimizer and Objective Function

optimizer = torch.optim.Adam(model_my.parameters(), lr = args.lr)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.001, total_steps=args.epochs, anneal_strategy='cos')
criterion = nn.CrossEntropyLoss() ## setup the loss function

### Training procedures

In [None]:
# Function for checking model performance during CNN model

def train(model_my, train_loader_my, optimizer, log_interval):
    model_my.train()
    print(optimizer.param_groups[0]['lr'])
    for batch_idx, (image, label) in enumerate(train_loader_my):
        image = image.to(DEVICE)
        label = label.to(DEVICE)
        optimizer.zero_grad()
        output = model_my(image)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()

        if batch_idx % log_interval == 0:
            print("Train Epoch: {} [{}/{} ({:.0f}%)]\tTrain Loss: {:.6f}".format(
                epoch, batch_idx * len(image), 
                len(train_loader_my.dataset), 100. * batch_idx / len(train_loader_my), 
                loss.item()))

In [None]:
# Function for checking model performance during the learning process

def evaluate(model_my, test_loader_my):
    model_my.eval()
    test_loss = 0
    correct = 0

    with torch.no_grad():
        for image, label in test_loader_my:
            image = image.to(DEVICE)
            label = label.to(DEVICE)
            output = model_my(image)
            test_loss += criterion(output, label).item()
            prediction = output.max(1, keepdim = True)[1]
            correct += prediction.eq(label.view_as(prediction)).sum().item()
    
    test_loss /= (len(test_loader_my)) 
    test_accuracy = 100. * correct / len(test_loader_my.dataset)
    
    return test_loss, test_accuracy

In [None]:
# Checking train, val loss and accuracy
los_total_my = []
acc_total_my = []


for epoch in range(1, args.epochs):
    train(model_my, train_loader_my, optimizer, log_interval = 200)
    val_loss, validation_accuracy = evaluate(model_my, test_loader_my)
    print("\n[EPOCH: {}], \tTest Loss: {:.4f}, \tTest Accuracy: {:.2f} % \n".format(
        epoch, val_loss, validation_accuracy))
    
    los_total_my.append(val_loss)
    acc_total_my.append(validation_accuracy)

### Accuracy and Loss plots

In [None]:
## Accuracy Graphs
plt.rc('font', family='Times New Roman', serif='Times')
plt.rc('xtick', labelsize=16)
plt.rc('ytick', labelsize=16)
plt.rc('axes', labelsize=20)

fig, ax = plt.subplots()
fig.subplots_adjust(left=.15, bottom=-1.16, right=1.99, top=.97)

color ='tab:blue'
ax.set_ylabel('Accuracy')
ax.set_xlabel('Number of Epochs')
plt.plot(range(args.epochs -1), acc_total_my)
ax.legend(['My Model Accuracy'],fontsize=15, loc='upper right')
sns.set_style('whitegrid')

ax1 = ax.twinx()
color = 'tab:red'
ax1.set_ylabel('Loss')
ax1.plot(range(args.epochs -1), los_total_my, color = color)
ax1.legend(['My Model Loss'], fontsize=15, loc='lower right')
sns.set_style('whitegrid')