Importing the packages

In [12]:
import torch 
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

Loading the CIFAR-10 dataset

In [13]:
# Device will determine whether to run the training on GPU or CPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [36]:
from datasets import load_dataset

dataset_train = load_dataset(
    'cifar10',
    split='train', # training dataset
    ignore_verifications=True  # set to True if seeing splits Error
)

dataset_train

Found cached dataset cifar10 (C:/Users/rajes/.cache/huggingface/datasets/cifar10/plain_text/1.0.0/447d6ec4733dddd1ce3bb577c7166b986eaa4c538dcd9e805ba61f35674a9de4)


Dataset({
    features: ['img', 'label'],
    num_rows: 50000
})

In [37]:
# checking the number of classes
num_classes = len(set(dataset_train['label']))
num_classes

10

In [38]:
#Training the model
dataset_val = load_dataset(
    'cifar10',
    split='test', # test dataset
    ignore_verifications=True  # set to True if seeing splits Error
)

dataset_val

Found cached dataset cifar10 (C:/Users/rajes/.cache/huggingface/datasets/cifar10/plain_text/1.0.0/447d6ec4733dddd1ce3bb577c7166b986eaa4c538dcd9e805ba61f35674a9de4)


Dataset({
    features: ['img', 'label'],
    num_rows: 10000
})

In [39]:
# image size
img_size = 32

# setting the preprocessor variable
preprocess = transforms.Compose([
    transforms.Resize((img_size,img_size)),
    transforms.ToTensor()
])

In [40]:
from tqdm.auto import tqdm

inputs_train = []

for record in tqdm(dataset_train):
    image = record['img']
    label = record['label']

    # convert from grayscale to RGB
    if image.mode == 'L':
        image = image.convert("RGB")
        
    # prepocessing
    input_tensor = preprocess(image)
    
    # append to batch list
    inputs_train.append([input_tensor, label]) 

  0%|          | 0/50000 [00:00<?, ?it/s]

In [41]:
print(len(inputs_train), inputs_train[0][0].shape)

50000 torch.Size([3, 32, 32])


In [42]:
inputs_train[0] #checking whether all values are normalized between 0 and 1

[tensor([[[0.6980, 0.6980, 0.6980,  ..., 0.6667, 0.6588, 0.6471],
          [0.7059, 0.7020, 0.7059,  ..., 0.6784, 0.6706, 0.6588],
          [0.6941, 0.6941, 0.6980,  ..., 0.6706, 0.6627, 0.6549],
          ...,
          [0.4392, 0.4431, 0.4471,  ..., 0.3922, 0.3843, 0.3961],
          [0.4392, 0.4392, 0.4431,  ..., 0.4000, 0.4000, 0.4000],
          [0.4039, 0.3922, 0.4039,  ..., 0.3608, 0.3647, 0.3569]],
 
         [[0.6902, 0.6902, 0.6902,  ..., 0.6588, 0.6510, 0.6392],
          [0.6980, 0.6941, 0.6980,  ..., 0.6706, 0.6627, 0.6510],
          [0.6863, 0.6863, 0.6902,  ..., 0.6627, 0.6549, 0.6471],
          ...,
          [0.4196, 0.4275, 0.4314,  ..., 0.3804, 0.3686, 0.3725],
          [0.4000, 0.4039, 0.4039,  ..., 0.3725, 0.3647, 0.3608],
          [0.3765, 0.3647, 0.3725,  ..., 0.3294, 0.3373, 0.3294]],
 
         [[0.7412, 0.7412, 0.7412,  ..., 0.7059, 0.6941, 0.6824],
          [0.7490, 0.7451, 0.7490,  ..., 0.7137, 0.7059, 0.6941],
          [0.7373, 0.7373, 0.7412,  ...,

In [43]:
import numpy as np

np.random.seed(0)#setting seed to confirm that the validation set is always used

idx = np.random.randint(0, len(inputs_train), 512)
idx.shape

(512,)

In [44]:
# concatenating the image tensors
tensors = torch.concat([inputs_train[i][0] for i in idx], axis=1)
tensors.shape

torch.Size([3, 16384, 32])

In [45]:
# merging values
tensors = tensors.swapaxes(0, 1).reshape(3, -1).T
tensors.shape

torch.Size([524288, 3])

Calculating mean and standard deviation

In [46]:
mean = torch.mean(tensors, axis=0)
mean

tensor([0.4670, 0.4735, 0.4662])

In [47]:
std = torch.std(tensors, axis=0)
std

tensor([0.2496, 0.2489, 0.2521])

In [48]:
del tensors

In [49]:
#Normalizing the tensors
preprocess = transforms.Compose([transforms.Normalize(mean=mean, std=std)])

for i in tqdm(range(len(inputs_train))):
    # prepocessing
    input_tensor = preprocess(inputs_train[i][0])
    inputs_train[i][0] = input_tensor  # replace with normalized tensor

  0%|          | 0/50000 [00:00<?, ?it/s]

In [50]:
# merging the two preprocessing steps from before
preprocess = transforms.Compose([
    transforms.Resize((img_size,img_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

In [51]:
from tqdm.auto import tqdm

inputs_val = []
i = 0
for record in tqdm(dataset_val):
    image = record['img']
    label = record['label']

    # convert from grayscale to RBG
    if image.mode == 'L':
        image = image.convert("RGB")
        
    # prepocessing
    input_tensor = preprocess(image)
    inputs_val.append((input_tensor, label)) # append to batch list

  0%|          | 0/10000 [00:00<?, ?it/s]

To increase efficiency, I'm dividing the images into batches for training

In [52]:
# define batch size
batch_size = 64

In [53]:
dloader_train = torch.utils.data.DataLoader(
    inputs_train, batch_size=batch_size, shuffle=True
)

In [54]:
dloader_val = torch.utils.data.DataLoader(
    inputs_val, batch_size=batch_size, shuffle=False
)

Now to build the Neural Network!

In [55]:
# creating a CNN class
class ConvNeuralNet(nn.Module):
	#  determine what layers and their order in CNN object 
    def __init__(self, num_classes):
        super(ConvNeuralNet, self).__init__()
        self.conv_layer1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=4, padding=1)
        self.relu1 = nn.ReLU()
        self.max_pool1 = nn.MaxPool2d(kernel_size=3, stride=2)

        self.conv_layer2 = nn.Conv2d(in_channels=64, out_channels=192, kernel_size=4, padding=1)
        self.relu2 = nn.ReLU()
        self.max_pool2 = nn.MaxPool2d(kernel_size=3, stride=2)

        self.conv_layer3 = nn.Conv2d(in_channels=192, out_channels=384, kernel_size=3, padding=1)
        self.relu3 = nn.ReLU()
        
        self.conv_layer4 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, padding=1)
        self.relu4 = nn.ReLU()

        self.conv_layer5 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
        self.relu5 = nn.ReLU()
        self.max_pool5 = nn.MaxPool2d(kernel_size=3, stride=2)
        
        self.dropout6 = nn.Dropout(p=0.5)
        self.fc6 = nn.Linear(1024, 512)
        self.relu6 = nn.ReLU()
        self.dropout7 = nn.Dropout(p=0.5)
        self.fc7 = nn.Linear(512, 256)
        self.relu7 = nn.ReLU()
        self.fc8 = nn.Linear(256, num_classes)
    
    # progresses data across layers    
    def forward(self, x):
        out = self.conv_layer1(x)
        out = self.relu1(out)
        out = self.max_pool1(out)
        
        out = self.conv_layer2(out)
        out = self.relu2(out)
        out = self.max_pool2(out)

        out = self.conv_layer3(out)
        out = self.relu3(out)

        out = self.conv_layer4(out)
        out = self.relu4(out)

        out = self.conv_layer5(out)
        out = self.relu5(out)
        out = self.max_pool5(out)
        
        out = out.reshape(out.size(0), -1)
        
        out = self.dropout6(out)
        out = self.fc6(out)
        out = self.relu6(out)

        out = self.dropout7(out)
        out = self.fc7(out)
        out = self.relu7(out)

        out = self.fc8(out)  # final logits
        return out

In [60]:
# setting up the model for training
model = ConvNeuralNet(num_classes).to(device)

loss_func = nn.CrossEntropyLoss()

# set learning rate 
lr = 0.1

# setting optimizer as SGD 
optimizer = torch.optim.SGD(model.parameters(), lr=lr) 

In [61]:
# training and validating the network
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
	# loading in the data in batches
    for i, (images, labels) in enumerate(dloader_train):  
        # move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)
        
        # forward propagation
        outputs = model(images)
        loss = loss_func(outputs, labels)
        
        # backward propagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    # at end of epoch I'm checking validation loss and accuracy on validation set
    with torch.no_grad():
        model.eval()
        correct = 0
        total = 0
        all_val_loss = []
        for images, labels in dloader_val:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            total += labels.size(0)
            # calculate predictions
            predicted = torch.argmax(outputs, dim=1)
            # calculate actual values
            correct += (predicted == labels).sum().item()
            # calculate the loss
            all_val_loss.append(loss_func(outputs, labels).item())
        # calculate val-loss
        mean_val_loss = sum(all_val_loss) / len(all_val_loss)
        # calculate val-accuracy
        mean_val_acc = 100 * (correct / total)

    print(
        'Epoch [{}/{}], train_loss: {:.4f}, val-loss: {:.4f}, val-acc: {:.1f}%'.format(
            epoch+1, num_epochs, loss.item(), mean_val_loss, mean_val_acc
        )
    )

Epoch [1/20], train_loss: 1.8053, val-loss: 1.8845, val-acc: 24.6%
Epoch [2/20], train_loss: 1.5106, val-loss: 1.5195, val-acc: 42.7%
Epoch [3/20], train_loss: 1.0853, val-loss: 2.3137, val-acc: 37.5%
Epoch [4/20], train_loss: 0.9178, val-loss: 1.1954, val-acc: 59.0%
Epoch [5/20], train_loss: 0.7322, val-loss: 0.9406, val-acc: 67.7%
Epoch [6/20], train_loss: 1.3147, val-loss: 1.0267, val-acc: 66.3%
Epoch [7/20], train_loss: 0.3104, val-loss: 0.8386, val-acc: 72.5%
Epoch [8/20], train_loss: 0.3476, val-loss: 0.9990, val-acc: 70.1%
Epoch [9/20], train_loss: 0.5926, val-loss: 1.0962, val-acc: 68.7%
Epoch [10/20], train_loss: 0.3866, val-loss: 0.8438, val-acc: 75.7%
Epoch [11/20], train_loss: 0.3246, val-loss: 0.7067, val-acc: 78.5%
Epoch [12/20], train_loss: 0.3917, val-loss: 1.0653, val-acc: 70.1%
Epoch [13/20], train_loss: 0.1026, val-loss: 0.8092, val-acc: 78.9%
Epoch [14/20], train_loss: 0.0129, val-loss: 0.7691, val-acc: 79.7%
Epoch [15/20], train_loss: 0.2551, val-loss: 0.9009, val-