In [28]:
import numpy as np
import pandas as pd
import os

In [29]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
import cv2

In [30]:
device = torch.device("cuda")

In [31]:
class Dataset(torch.utils.data.Dataset): #Inherits from torch.utils.data.Dataset
    
    def __init__(self):
        #default directory where data is loaded
        self.filepath = '/kaggle/input/car-steering-angle-prediction/driving_dataset/'
        self.filenames = os.listdir(self.filepath)
        angles = []
        with open(self.filepath + 'angles.txt') as f:
                b = []
                for line in f:
                    filename , angle = line.split()
                    angles.append([filename, np.float(angle)])
                    b.append(np.float(angle))
        #Mean and std of angle values for normalization:
        self.avg_angle = np.mean(b)
        self.std_angle = np.std(b)
        self.angles = angles
        
    def __len__(self):
        return len(self.angles)

    def __getitem__(self, index):
        filename, angle = self.angles[index]
        img = cv2.imread(self.filepath + filename)
        #Resizing images to (66, 200)
        resized = cv2.resize(img, (66, 200), interpolation = cv2.INTER_AREA)
        #return the image converted to a numpy array its corresponding steering angle
        return torch.from_numpy(resized.transpose()).float(), torch.tensor((angle-self.avg_angle)/self.std_angle, dtype = torch.float).reshape(-1)

In [32]:
all_data = Dataset()

In [33]:
all_data.__len__()

45406

In [34]:
train_loader = torch.utils.data.DataLoader(all_data, batch_size = 40, shuffle = False, sampler = list(range(0,25570)), num_workers=2, pin_memory=True)
val_loader = torch.utils.data.DataLoader(all_data, batch_size = 20, shuffle = False, sampler = list(range(25570,35406)),num_workers=2,pin_memory=True)
test_loader = torch.utils.data.DataLoader(all_data, batch_size = 20, shuffle = False, sampler = list(range(35406,45406)), num_workers=2,pin_memory=True)

In [35]:
print(train_loader.__len__(), val_loader.__len__(), test_loader.__len__())

640 492 500


In [36]:
class ConvNet(nn.Module):

    def __init__(self):
        super().__init__()
        
        #Convolution Layers:
        self.conv1 = nn.Conv2d(3, 24, 5, stride=2)
        self.conv2 = nn.Conv2d(24, 36, 5, stride=2)
        self.conv3 = nn.Conv2d(36, 48, 5, stride=2)
        self.conv4 = nn.Conv2d(48, 64, 3)
        self.conv5 = nn.Conv2d(64, 64, 3)
        
        #Fully Connected Layers:
        self.fc1 = nn.Linear(64 * 18, 100)
        self.fc2 = nn.Linear(100, 50)
        self.fc3 = nn.Linear(50, 10)
        self.out = nn.Linear(10, 1)

    def forward(self, x):
        
        x = transforms.Normalize(x.mean(), x.std())(x)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        
        x = torch.flatten(x, 1)
        
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.out(x)
        
        return x

In [37]:
# defining the model
net = ConvNet().to(device)

# defining the optimizer
optimizer = torch.optim.SGD(net.parameters(), lr=0.001)

# defining the loss function
criterion = torch.nn.MSELoss().to(device)
    
print(net)

ConvNet(
  (conv1): Conv2d(3, 24, kernel_size=(5, 5), stride=(2, 2))
  (conv2): Conv2d(24, 36, kernel_size=(5, 5), stride=(2, 2))
  (conv3): Conv2d(36, 48, kernel_size=(5, 5), stride=(2, 2))
  (conv4): Conv2d(48, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv5): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=1152, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=50, bias=True)
  (fc3): Linear(in_features=50, out_features=10, bias=True)
  (out): Linear(in_features=10, out_features=1, bias=True)
)


In [38]:
for i in range(5):
    
    #Training step:
    
    net.train().to(device)
    
    train_loss = 0
    for data, output in train_loader:
        
        # Clear the gradients
        optimizer.zero_grad()
        
        # Forward Pass
        pred = net(data.to(device))
        
        # Find the Loss
        loss = criterion(pred.to(device),output.to(device))
        
        # Calculate gradients 
        loss.backward()
        
        # Update Weights
        optimizer.step()
        
        # Calculate Loss
        train_loss = train_loss + loss.item()
    
    #Validation step:
    
    net.eval().to(device)
    
    val_loss = 0
    
    for data, output in val_loader:
        
        pred = net(data.to(device))
        loss = criterion(pred.to(device),output.to(device))
        val_loss = val_loss + loss.item()

    print(f'Epoch {i+1} - Training Loss: {train_loss / len(train_loader)} \t Validation Loss: {val_loss / len(val_loader)}')

Epoch 1 - Training Loss: 1.066382220253331 	 Validation Loss: 1.1508616501568751
Epoch 2 - Training Loss: 1.0583330885728173 	 Validation Loss: 1.1438412735738464
Epoch 3 - Training Loss: 1.0507213294128745 	 Validation Loss: 1.1265163017956739
Epoch 4 - Training Loss: 1.0376251713540114 	 Validation Loss: 1.1071344910251206
Epoch 5 - Training Loss: 1.026349757817266 	 Validation Loss: 1.090212330337155


In [39]:
net.eval().to(device)

test_loss = 0
    
for data, output in test_loader:
    pred = net(data.to(device))
    loss = criterion(pred.to(device),output.to(device))
    test_loss = test_loss + loss.item()

print(f'Test Loss: {test_loss / len(test_loader)}')

Test Loss: 0.7164886553969118


# **Bonus Exercise - Part 1:**

Chose learning rate, batch size and l2 regularization term as hyperparameters to be tuned. So, new training method with random search is as follows:

In [40]:
def random_search(n_iter, n_epoch):
    
    best_val_loss = np.inf
    
    for j in range(n_iter):
    
        # defining hyperparameters

        batch = np.int(np.random.randint(10,50,1))
        learning_rate = np.float(np.random.uniform(low=0.0005, high=0.005, size=1))
        reg_term = np.float(np.random.uniform(low=0.000005, high=0.05, size=1))
        
        print(f'Batch Size: {batch} - Learning Rate: {learning_rate} - Regularization Term: {reg_term}')

        train_loader = torch.utils.data.DataLoader(all_data, batch_size = 2*batch, shuffle = False, sampler = list(range(0,25570)), num_workers=2, pin_memory=True)
        val_loader = torch.utils.data.DataLoader(all_data, batch_size = batch, shuffle = False, sampler = list(range(25570,35406)),num_workers=2,pin_memory=True)
        test_loader = torch.utils.data.DataLoader(all_data, batch_size = batch, shuffle = False, sampler = list(range(35406,45406)), num_workers=2,pin_memory=True)

        # defining the model
        net = ConvNet().to(device)

        # defining the optimizer
        optimizer = torch.optim.SGD(net.parameters(), lr = learning_rate, weight_decay = reg_term)

        # defining the loss function
        criterion = torch.nn.MSELoss().to(device)
           

        for i in range(n_epoch):
    
            #Training step:

            net.train().to(device)

            train_loss = 0
            for data, output in train_loader:

                # Clear the gradients
                optimizer.zero_grad()

                # Forward Pass
                pred = net(data.to(device))

                # Find the Loss
                loss = criterion(pred.to(device),output.to(device))

                # Calculate gradients 
                loss.backward()

                # Update Weights
                optimizer.step()

                # Calculate Loss
                train_loss = train_loss + loss.item()

            #Validation step:

            net.eval().to(device)

            val_loss = 0

            for data, output in val_loader:

                pred = net(data.to(device))
                loss = criterion(pred.to(device),output.to(device))
                val_loss = val_loss + loss.item()

            print(f'Iteration {j+1} - Epoch {i+1} - Training Loss: {train_loss / len(train_loader)} \t Validation Loss: {val_loss / len(val_loader)}')

        if (np.isnan(val_loss) == False) and (val_loss / len(val_loader) < best_val_loss):
            best_batch_size = batch
            best_learning_rate = learning_rate
            best_reg_term = reg_term
            best_val_loss = val_loss / len(val_loader)
    
    return best_batch_size, best_learning_rate, best_reg_term, best_val_loss
        

In [41]:
opt_batch_size, opt_learning_rate, opt_reg_term, final_val_loss = random_search(5, 5)

Batch Size: 19 - Learning Rate: 0.0042309220429979835 - Regularization Term: 0.0061103124046041945
Iteration 1 - Epoch 1 - Training Loss: 1.0370597895030058 	 Validation Loss: 1.1112174195010527
Iteration 1 - Epoch 2 - Training Loss: 1.0011677599962783 	 Validation Loss: 1.097430596907673
Iteration 1 - Epoch 3 - Training Loss: 1.0305084532128157 	 Validation Loss: 1.1535406459073516
Iteration 1 - Epoch 4 - Training Loss: 1.0587839913925217 	 Validation Loss: 1.1537386206289908
Iteration 1 - Epoch 5 - Training Loss: 1.0590404319499143 	 Validation Loss: 1.1535680291928276
Batch Size: 33 - Learning Rate: 0.004917055680739997 - Regularization Term: 0.03341269623580725
Iteration 2 - Epoch 1 - Training Loss: 1.052062285079993 	 Validation Loss: 1.1317440553804443
Iteration 2 - Epoch 2 - Training Loss: 1.0501440556451513 	 Validation Loss: 1.1072102683231493
Iteration 2 - Epoch 3 - Training Loss: 1.038136799845721 	 Validation Loss: 1.0927837379167735
Iteration 2 - Epoch 4 - Training Loss: 1

In [42]:
print(opt_batch_size, opt_learning_rate, opt_reg_term, final_val_loss)

21 0.0025298616762345905 0.010866788417482457 1.0502943363561825


In [43]:
# Retraining the model with the best found hyperparameters

train_loader = torch.utils.data.DataLoader(all_data, batch_size = 2*opt_batch_size, shuffle = False, sampler = list(range(0,35406)), num_workers=2, pin_memory=True)
test_loader = torch.utils.data.DataLoader(all_data, batch_size = opt_batch_size, shuffle = False, sampler = list(range(35406,45406)), num_workers=2,pin_memory=True)

# defining the model
net = ConvNet().to(device)

# defining the optimizer
optimizer = torch.optim.SGD(net.parameters(), lr = opt_learning_rate, weight_decay = opt_reg_term)

# defining the loss function
criterion = torch.nn.MSELoss().to(device)


for i in range(5):

    #Training step:

    net.train().to(device)

    train_loss = 0
    for data, output in train_loader:

        # Clear the gradients
        optimizer.zero_grad()

        # Forward Pass
        pred = net(data.to(device))

        # Find the Loss
        loss = criterion(pred.to(device),output.to(device))

        # Calculate gradients 
        loss.backward()

        # Update Weights
        optimizer.step()

        # Calculate Loss
        train_loss = train_loss + loss.item()

    print(f'Epoch {i+1} - Training Loss: {train_loss / len(train_loader)}')

Epoch 1 - Training Loss: 1.0571911428814627
Epoch 2 - Training Loss: 1.0083178714058452
Epoch 3 - Training Loss: 0.9876358370082918
Epoch 4 - Training Loss: 0.9771924934206748
Epoch 5 - Training Loss: 0.9694529743965252


In [44]:
# Testing the final model

net.eval().to(device)

test_loss = 0
    
for data, output in test_loader:
    pred = net(data.to(device))
    loss = criterion(pred.to(device),output.to(device))
    test_loss = test_loss + loss.item()

print(f'Test Loss: {test_loss / len(test_loader)}')

Test Loss: 0.7135797410300251


# **Bonus Exercise - Part 2:**

In [45]:
# defining batches
train_loader = torch.utils.data.DataLoader(all_data, batch_size = 40, shuffle = False, sampler = list(range(0,25570)), num_workers=2, pin_memory=True)
val_loader = torch.utils.data.DataLoader(all_data, batch_size = 20, shuffle = False, sampler = list(range(25570,35406)),num_workers=2,pin_memory=True)
test_loader = torch.utils.data.DataLoader(all_data, batch_size = 20, shuffle = False, sampler = list(range(35406,45406)), num_workers=2,pin_memory=True)

# defining the model
net = ConvNet().to(device)

# defining the optimizer
optimizer = torch.optim.SGD(net.parameters(), lr=0.001)

# defining the loss function
criterion = torch.nn.MSELoss().to(device)

In [46]:
for i in range(5):
    
    #Training step:
    
    net.train().to(device)
    
    train_loss = 0
    
    for old_data, output in train_loader:
        
        # 5x5 square cutout in the train data
        data = old_data.clone()
        index1 = np.int(np.random.randint(0,66,1))
        index2 = np.int(np.random.randint(0,200,1))
        data[:, :, max(index1-2,0) : min(index1+3,66), max(index2-2,0) : min(index2+3,200)] = torch.zeros(min(old_data.shape[0],40), 3, min(5,index1+3,(65-index1)+3), min(5,index2+3,(199-index2)+3))
        
        # Clear the gradients
        optimizer.zero_grad()
        
        # Forward Pass
        pred = net(data.to(device))
        
        # Find the Loss
        loss = criterion(pred.to(device),output.to(device))
        
        # Calculate gradients 
        loss.backward()
        
        # Update Weights
        optimizer.step()
        
        # Calculate Loss
        train_loss = train_loss + loss.item()
    
    #Validation step:
    
    net.eval().to(device)
    
    val_loss = 0
    
    for data, output in val_loader:

        pred = net(data.to(device))
        loss = criterion(pred.to(device),output.to(device))
        val_loss = val_loss + loss.item()

    print(f'Epoch {i+1} - Training Loss: {train_loss / len(train_loader)} \t Validation Loss: {val_loss / len(val_loader)}')

Epoch 1 - Training Loss: 1.053992056946504 	 Validation Loss: 1.1421603677653462
Epoch 2 - Training Loss: 1.0495558118153667 	 Validation Loss: 1.124252471767974
Epoch 3 - Training Loss: 1.0357076325575576 	 Validation Loss: 1.1064435720897852
Epoch 4 - Training Loss: 1.0227593964508828 	 Validation Loss: 1.0890764149909098
Epoch 5 - Training Loss: 1.0151835768234378 	 Validation Loss: 1.068841765852166


In [47]:
# Testing the final model

net.eval().to(device)

test_loss = 0
    
for data, output in test_loader:
    pred = net(data.to(device))
    loss = criterion(pred.to(device),output.to(device))
    test_loss = test_loss + loss.item()

print(f'Test Loss: {test_loss / len(test_loader)}')

Test Loss: 0.7063281774220741


# **Bonus Exercise - Part 3:**

In [48]:
# defining the model
net = ConvNet().to(device)

# defining the optimizer
optimizer = torch.optim.SGD(net.parameters(), lr=0.001)

# defining the loss function
criterion = torch.nn.MSELoss().to(device)

In [49]:
# defining batches
train_loader1 = torch.utils.data.DataLoader(all_data, batch_size = 40, shuffle = False, sampler = list(range(0,12785)), num_workers=2, pin_memory=True)
train_loader2 = torch.utils.data.DataLoader(all_data, batch_size = 40, shuffle = False, sampler = list(range(12785,25570)), num_workers=2, pin_memory=True)
val_loader = torch.utils.data.DataLoader(all_data, batch_size = 20, shuffle = False, sampler = list(range(25570,35406)),num_workers=2,pin_memory=True)
test_loader = torch.utils.data.DataLoader(all_data, batch_size = 20, shuffle = False, sampler = list(range(35406,45406)), num_workers=2,pin_memory=True)

In [50]:
def cnn_with_mixup(n_epoch, alpha):
    
    for i in range(n_epoch):
    
        #Training step:

        net.train().to(device)

        train_loss = 0

        for (x1, y1), (x2, y2) in zip(train_loader1, train_loader2):

            lam = np.random.beta(alpha, alpha)
            data = lam * x1 + (1. - lam) * x2
            output = lam * y1 + (1. - lam) * y2

            # Clear the gradients
            optimizer.zero_grad()

            # Forward Pass
            pred = net(data.to(device))

            # Find the Loss
            loss = criterion(pred.to(device),output.to(device))

            # Calculate gradients 
            loss.backward()

            # Update Weights
            optimizer.step()

            # Calculate Loss
            train_loss = train_loss + loss.item()

        #Validation step:

        net.eval().to(device)

        val_loss = 0

        for data, output in val_loader:

            pred = net(data.to(device))
            loss = criterion(pred.to(device),output.to(device))
            val_loss = val_loss + loss.item()

        print(f'Epoch {i+1} - Training Loss: {train_loss / len(train_loader1)} \t Validation Loss: {val_loss / len(val_loader)}')
        
    return net, train_loss / len(train_loader1), val_loss / len(val_loader)

In [51]:
net1, final_train_loss, final_val_loss = cnn_with_mixup(5, 0.2)

Epoch 1 - Training Loss: 1.2089003334803237 	 Validation Loss: 1.1546923892642738
Epoch 2 - Training Loss: 0.6386410997775783 	 Validation Loss: 1.1505882738166973
Epoch 3 - Training Loss: 0.7138454163559913 	 Validation Loss: 1.1484468858746597
Epoch 4 - Training Loss: 1.0064464310776657 	 Validation Loss: 1.148122296361047
Epoch 5 - Training Loss: 0.7765527388572877 	 Validation Loss: 1.144294100031698


In [52]:
net2, final_train_loss, final_val_loss = cnn_with_mixup(5, 0.4)

Epoch 1 - Training Loss: 0.5392096665537792 	 Validation Loss: 1.1396616287792112
Epoch 2 - Training Loss: 0.7760286033865669 	 Validation Loss: 1.1357071537585197
Epoch 3 - Training Loss: 0.6529489944172099 	 Validation Loss: 1.1271209725482476
Epoch 4 - Training Loss: 0.5602694084223344 	 Validation Loss: 1.1213893235166679
Epoch 5 - Training Loss: 0.9030237046434195 	 Validation Loss: 1.0945210009631705


In [53]:
# Testing the final model with the second mixup model

net2.eval().to(device)

test_loss = 0
    
for data, output in test_loader:
    pred = net2(data.to(device))
    loss = criterion(pred.to(device),output.to(device))
    test_loss = test_loss + loss.item()

print(f'Test Loss: {test_loss / len(test_loader)}')

Test Loss: 0.6949187648328813


Random search gave the lowest validation error while mixup method gave the fastest result and the lowest test error. Mixup method and cutout method could give better results with better hyperparameter preferences.