**Task 1**

In [1]:
import torch
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms
import glob
import PIL.Image
import os
import numpy as np
from google.colab import drive

In [3]:
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
data_path = '/content/drive/MyDrive/dataset/apex'

Feature Extraction

In [5]:
def get_x(path, width):
    "Gets the x value from the image filename"
    return (float(int(path.split("_")[0])) - width / 2) / (width / 2)


def get_y(path, height):
    "Gets the y value from the image filename"
    return (float(int(path.split("_")[1])) - height / 2) / (height / 2)

Data Augmentation

In [6]:
class XYDataset(torch.utils.data.Dataset):

    def __init__(self, directory, random_hflips=False):
        self.directory = directory
        self.random_hflips = random_hflips
        self.image_paths = glob.glob(os.path.join(self.directory, '*.jpg'))
        self.color_jitter = transforms.ColorJitter(0.3, 0.3, 0.3, 0.3)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]

        image = PIL.Image.open(image_path)
        width, height = image.size
        x = float(get_x(os.path.basename(image_path), width))
        y = float(get_y(os.path.basename(image_path), height))

        if float(np.random.rand(1)) > 0.5:
            image = transforms.functional.hflip(image)
            x = -x

        image = self.color_jitter(image)
        image = transforms.functional.resize(image, (224, 224))
        image = transforms.functional.to_tensor(image)
        image = image.numpy()[::-1].copy()
        image = torch.from_numpy(image)
        image = transforms.functional.normalize(image, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

        return image, torch.tensor([x, y]).float()

Train-Test Split

In [7]:
dataset = XYDataset(data_path, random_hflips=False)


test_percent = 0.1
num_test = int(test_percent * len(dataset))
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [len(dataset) - num_test, num_test])

In [8]:
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=8,
    shuffle=True,
    num_workers=0
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=8,
    shuffle=True,
    num_workers=0
)

Load Base Model

In [9]:
model = models.resnet18(weights='ResNet18_Weights.DEFAULT')

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 114MB/s]


In [10]:
model.fc = torch.nn.Linear(512, 2)
device = torch.device('cuda')
model = model.to(device)

NUM_EPOCHS = 50
BEST_MODEL_PATH = '/best_steering_model_xy.pth'
best_loss = 1e9

In [12]:
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9, weight_decay=0.0005)

for epoch in range(NUM_EPOCHS):

    model.train()
    train_loss = 0.0
    for images, labels in iter(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = F.mse_loss(outputs, labels)
        train_loss += float(loss)
        loss.backward()
        optimizer.step()
    train_loss /= len(train_loader)

    model.eval().to(device)
    test_loss = 0.0
    for images, labels in iter(test_loader):
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        loss = F.mse_loss(outputs, labels)
        test_loss += float(loss)
    test_loss /= len(test_loader)

    print(f'Epoch: {epoch+1}, Train Loss: {train_loss:.6f}, Test Loss: {test_loss:.6f}')
    if test_loss < best_loss:
        torch.save(model.state_dict(), BEST_MODEL_PATH)
        best_loss = test_loss

  if float(np.random.rand(1)) > 0.5:


Epoch: 1, Train Loss: 0.090614, Test Loss: 0.124570
Epoch: 2, Train Loss: 0.063452, Test Loss: 0.046816
Epoch: 3, Train Loss: 0.060435, Test Loss: 0.044821
Epoch: 4, Train Loss: 0.054387, Test Loss: 0.047325
Epoch: 5, Train Loss: 0.050689, Test Loss: 0.022985
Epoch: 6, Train Loss: 0.047439, Test Loss: 0.019307
Epoch: 7, Train Loss: 0.052934, Test Loss: 0.049293
Epoch: 8, Train Loss: 0.043158, Test Loss: 0.040233
Epoch: 9, Train Loss: 0.045555, Test Loss: 0.028758
Epoch: 10, Train Loss: 0.040277, Test Loss: 0.070047
Epoch: 11, Train Loss: 0.041903, Test Loss: 0.073603
Epoch: 12, Train Loss: 0.037127, Test Loss: 0.027684
Epoch: 13, Train Loss: 0.032783, Test Loss: 0.050832
Epoch: 14, Train Loss: 0.032636, Test Loss: 0.048398
Epoch: 15, Train Loss: 0.031210, Test Loss: 0.025205
Epoch: 16, Train Loss: 0.029074, Test Loss: 0.016974
Epoch: 17, Train Loss: 0.023225, Test Loss: 0.020892
Epoch: 18, Train Loss: 0.031718, Test Loss: 0.017106
Epoch: 19, Train Loss: 0.024615, Test Loss: 0.017229
Ep