In [3]:
#importing libraries
import torch
from torch.nn import Module
from torch.nn import Conv2d
from torch.nn import Linear
from torch.nn import MaxPool2d
from torch.nn import ReLU
from torch.nn import LogSoftmax
from torch import flatten

In [4]:
import numpy as np
import matplotlib.pyplot as plt

In [5]:
print("CUDA Available:", torch.cuda.is_available())
print("Number of GPUs:", torch.cuda.device_count())
if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

CUDA Available: True
Number of GPUs: 1
GPU Name: NVIDIA GeForce RTX 4050 Laptop GPU
Using device: cuda


In [25]:
class VGG(Module):
    def __init__(self, input_channels,classes):
        super(VGG, self).__init__()

        self.conv1 = Conv2d(input_channels,64,3,1,1)
        self.conv2 = Conv2d(64,64,3,1,1)
        self.pool1 = MaxPool2d(2,2)

        self.conv3 = Conv2d(64,128,3,1,1)
        self.conv4 = Conv2d(128,128,3,1,1)
        self.pool2 = MaxPool2d(2,2)

        self.conv5 = Conv2d(128,256,3,1,1)
        self.conv6 = Conv2d(256,256,3,1,1)
        self.pool3 = MaxPool2d(2,2)

        self.conv7 = Conv2d(256,512,3,1,1)
        self.conv8 = Conv2d(512,512,3,1,1)
        self.pool4 = MaxPool2d(2,2)

        self.conv9 = Conv2d(512,512,3,1,1)
        self.conv10 = Conv2d(512,512,3,1,1)
        self.pool5 = MaxPool2d(2,2)

        self.flatten = torch.nn.Flatten(start_dim=1)
        self.fc1 = Linear(512*7*7,4096)
        self.fc2 = Linear(4096,4096)
        self.fc3 = Linear(4096,classes)    

        self.relu = ReLU()
        self.logsoftmax = LogSoftmax(dim=1)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.pool1(x)

        x = self.relu(self.conv3(x))
        x = self.relu(self.conv4(x))
        x = self.pool2(x)

        x = self.relu(self.conv5(x))
        x = self.relu(self.conv6(x))
        x = self.pool3(x)

        x = self.relu(self.conv7(x))
        x = self.relu(self.conv8(x))
        x = self.pool4(x)

        x = self.relu(self.conv9(x))
        x = self.relu(self.conv10(x))
        x = self.pool5(x)

        print('dimensions of x before passing in fully connected layers', np.shape(x))

     
        x = self.flatten(x)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)

        return self.logsoftmax(x)


In [None]:
#checking the output dimensions

model1 = VGG(3, 10) # 3 channels for RGB images
model1 = model1.to(device)
model1.forward(torch.rand(1,3,224,224).to(device))

dimensions of x before passing in fully connected layers torch.Size([1, 512, 7, 7])


tensor([[-2.3060, -2.3073, -2.2922, -2.3110, -2.3107, -2.3073, -2.2857, -2.2972,
         -2.3009, -2.3078]], device='cuda:0', grad_fn=<LogSoftmaxBackward0>)

In [None]:
from sklearn.metrics import classification_report
from torch.utils.data import random_split
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor
from torchvision.datasets import KMNIST
from torchvision.datasets import Kitti
from torch.optim import Adam
from torch import nn
import matplotlib.pyplot as plt
import numpy as np
import argparse
import torch
import time

# define training hyperparameters
INIT_LR = 1e-3
BATCH_SIZE = 64
EPOCHS = 10
# define the train and val splits
TRAIN_SPLIT = 0.75
VAL_SPLIT = 1 - TRAIN_SPLIT

trainData = Kitti(root = 'data', train=True, download = True, transform = ToTensor())
testData = Kitti(root = 'data', train=False, download = True, transform = ToTensor())

numTrainSamples = int(TRAIN_SPLIT * len(trainData)) 
numValSamples = int(VAL_SPLIT * len(trainData))

(trainData, valData) = random_split(trainData,
	[numTrainSamples, numValSamples],
	generator=torch.Generator().manual_seed(42))

In [None]:
trainDataLoader = DataLoader(trainData, batch_size = BATCH_SIZE, shuffle = True)
valDataLoader = DataLoader(valData, batch_size = BATCH_SIZE)
testDataLoader = DataLoader(testData, batch_size = BATCH_SIZE)

trainSteps = len(trainDataLoader.dataset) // BATCH_SIZE
valSteps = len(valDataLoader.dataset) // BATCH_SIZE

model = VGG(3, classes=len(trainData.dataset.classes).to(device)) # 3 channels for RGB images

In [None]:
opt = Adam(model.parameters(), lr=INIT_LR)
lossFn = nn.NLLLoss()
H = {
	"train_loss": [],
	"train_acc": [],
	"val_loss": [],
	"val_acc": []
}

In [None]:
#need to understand this part of the code
#training mode
for e in range(0, EPOCHS):
	model.train()
	totalTrainLoss = 0
	totalValLoss = 0
	trainCorrect = 0
	valCorrect = 0
	for (x, y) in trainDataLoader:
		(x, y) = (x.to(device), y.to(device))
		pred = model(x)
		loss = lossFn(pred, y)

		opt.zero_grad()
		loss.backward()
		opt.step()
		
		totalTrainLoss += loss
		trainCorrect += (pred.argmax(1) == y).type(
			torch.float).sum().item()