In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # select the device to be cuda, in order to speed up the process
print('PyTorch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))
print(device)

In [None]:
transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),])

transform_val = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])




trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform) # Data augmentation is only done on training images
valset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_val)


trainloader = torch.utils.data.DataLoader(
    trainset,
    batch_size=4,
    shuffle=True,
    num_workers=100 )# load the dataset with 16 batch size and suffle the dataset
valloader = torch.utils.data.DataLoader(
    valset,
    batch_size=1,
    num_workers=100 )# load the dataset with 16 batch size and suffle the dataset
print(len(valset))
print(len(trainset))

In [None]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=6,kernel_size=5, stride=1, padding=0)
        self.pool = nn.AvgPool2d(2,2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.ReLU = nn.ReLU()
    def forward(self, x):
        x = self.pool(self.ReLU(self.conv1(x))) # conv1 -> ReLU -> maxpool -> conv2 -> ReLU -> maxpool -> conv3 -ReLU -> maxpool
        x = self.pool(self.ReLU(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = self.ReLU(self.fc1(x))
        x = self.ReLU(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()
in1 = torch.ones(16,3,32,32)

out = net(in1)

net.to(device)

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(net.parameters())

In [None]:
!pip install livelossplot==0.1.2 # command to install livelossplot

In [None]:
import torch, time, copy, sys, os
import matplotlib.pyplot as plt
from livelossplot import PlotLosses
import torch.optim.lr_scheduler as lr_scheduler
import math

liveloss = PlotLosses() # to plot loss and accuracy

epochs = 10 # number of epochs

lf = lambda x: (((1 + math.cos(x * math.pi / epochs)) / 2) ** 1.0) * 0.95 + 0.05  # cosine schedule
# scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.6) # ReduceLROnPlateau scheduler
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # LambdaLR scheduler

print("starting")
best_acc = 0.0 #best accuracy
for epoch in range(epochs):  # loop over the dataset multiple times for training
    print("epoch: ", epoch)
    running_loss1 = 0.0 # running training loss
    running_loss2 = 0.0 # running validation loss
    running_corrects1 = 0 # running training corrects
    running_corrects2 = 0 # running validation corrects
    
    for i, data in enumerate(trainloader, 0):
#         print("iteration: ", i)
        inputs, labels = data # get the input images and labels
        inputs, labels = inputs.to(device), labels.to(device) # push the inputs and images to CUDA
        
        optimizer.zero_grad() # zero the parameter gradients
        
        outputs = net(inputs) # forward pass
        _, preds = torch.max(outputs, 1) # predicting the outputs
#         print("out: ", outputs.shape, "labels: ", labels.shape)
        loss = criterion(outputs, labels) # calculating the loss
        loss.backward() # backward
        optimizer.step() # optimizer step
        
        running_loss1 += loss.item() # summing each loss of data
        running_corrects1 += torch.sum(preds == labels.data) # summing each corrects of data
    
    tr_loss = running_loss1 / len(trainset) # calculating the average training loss
    tr_acc = running_corrects1.double() / len(trainset) # calculating the average training accuracy
    
    for i, data in enumerate(valloader, 0): # loop over the dataset multiple times for validation

        inputs, labels = data # get the input images and labels
        inputs, labels = inputs.to(device), labels.to(device) # push the inputs and images to CUDA
        
        optimizer.zero_grad() # zero the parameter gradients
        
        outputs = net(inputs) # forward pass
        _, preds = torch.max(outputs, 1) # predicting the outputs
        loss = criterion(outputs, labels) # calculating the loss
        running_loss2 += loss.item() # summing each loss of data
        running_corrects2 += torch.sum(preds == labels.data) # summing each corrects of data
    
    val_loss = running_loss2 / len(valset) # calculating the average validation loss
    val_acc = running_corrects2.double() / len(valset) # calculating the average validation accuracy
    if val_acc > best_acc: #early pass
        best_acc = val_acc
#         best_model = copy.deepcopy(net.state_dict()) #deepcopy of the model if the model reaches its maximum accuracy
    
    scheduler.step() # step of learning rate scheduler, if validation loss is not decreasing, the learning rate will be decreased.
    liveloss.update({  #update the liveloss plot
    'loss': tr_loss,
    'val_loss': val_loss,
    'accuracy': tr_acc,
    'val_accuracy': val_acc
    })
    
    liveloss.draw() #draw the results
    print("epoch: ", epoch, "train loss: ", tr_loss, "val loss: ", val_loss, "train acc: ", tr_acc.item(), "val acc: ", val_acc.item(), "best accuracy: ", best_acc.item()) # printing the results
    #print( "train loss: ", tr_loss,"accuracy", tr_acc)
    print()
    
print('Finished Training')