# Changelog
1. 02.06, Jacek - add dataset, dataloader, model prototype, training frames
2. 03.06, Jacek - fix seed(s), add train-test split, full train-loop, resnet18

In [None]:
import os
import re
from copy import deepcopy
from typing import List, Tuple

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from PIL import Image
from sklearn.model_selection import train_test_split

In [None]:
# GLOBALS

# define your path below
# DATASET_PATH = ... # Ania
# DATASET_PATH = ... # Witek
DATASET_PATH = 'dataset/'  # Jacek

# hyperparameters
BATCH_SIZE = 64
EPOCHS = 100
LEARNING_RATE = 1e-3
NUM_WORKERS = 8  # in collab shouldn't exceed 2
TRAIN_SET_SIZE = 0.8

In [None]:
torch.manual_seed(42)
np.random.seed(42)

In [None]:
class AugmentedDataset(torch.utils.data.Dataset):
    def __init__(self, path: str) -> None:
        self.path = path
        self.dirs = [d for d in os.listdir(self.path) if os.path.isdir(os.path.join(self.path, d))]
        self.items = self._get_file_target_map()

    def _get_file_target_map(self) -> List[Tuple[str, Tuple[float, float]]]:
        result = []
        for dir in self.dirs:
            labels = pd.read_csv(f'{self.path}/{dir}.csv', header=None, index_col=0, names=['speed', 'turn'])
            for file_name in os.listdir(os.path.join(self.path, dir)):
                file_path = os.path.join(self.path, dir, file_name)
                photo_id = int(re.search(r'\d+', file_name).group())
                if photo_id in labels.index:
                    target = labels.loc[photo_id]
                    result.append((file_path, (target['speed'], target['turn'])))
        return result

    def __len__(self):
        return len(self.items)

    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
        path, target = self.items[idx]
        img = torch.tensor(np.asarray(Image.open(path)), dtype=torch.float32)
        target = torch.tensor(target, dtype=torch.float32)
        return img, target

In [None]:
dataset = AugmentedDataset(DATASET_PATH)
train_dataset, test_dataset = deepcopy(dataset), deepcopy(dataset)
train_dataset.items, test_dataset.items = train_test_split(dataset.items, train_size=TRAIN_SET_SIZE, shuffle=True,
                                                           random_state=42)

In [None]:
train = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
test = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)

In [None]:
x, y = train_dataset[0]
print(f'x: {type(x)}, {x.shape}')
print(f'y: {type(y)}, {y.shape}')

In [None]:
x, y = next(iter(train))
print(f'x: {type(x)}, {x.shape}')
print(f'y: {type(y)}, {y.shape}')

In [None]:
class Model(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.resnet18 = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=False)
        self.resnet18.fc = nn.Linear(in_features=512, out_features=2, bias=True)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = torch.movedim(x, -1, -3)
        x = self.resnet18(x)
        return x

In [None]:
model = Model()
model.cuda()
model = torch.jit.script(model)
model(next(iter(train))[0].to(torch.device('cuda'))).shape

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.MSELoss()

In [None]:
for epoch in range(EPOCHS):
    print(f'epoch {epoch}')
    train_loss_history, test_loss_history = [], []

    model.train()
    for i, (x_train, y_train) in enumerate(train):
        print(i)
        x_train = x_train.to(torch.device('cuda'))
        y_train = y_train.to(torch.device('cuda'))
        optimizer.zero_grad()
        y_hat = model(x_train)
        loss = criterion(y_hat, y_train)
        loss.backward()
        optimizer.step()
        train_loss_history.append(float(loss.detach().cpu()))
    print(f'train loss: {sum(train_loss_history) / len(train_loss_history)}')

    model.eval()
    with torch.no_grad():
        for x_test, y_test in test:
            x_test = x_test.to(torch.device('cuda'))
            y_test = y_test.to(torch.device('cuda'))
            y_hat = model(x_test)
            loss = criterion(y_hat, y_test)
            test_loss_history.append(float(loss.detach().cpu()))
    print(f'test loss: {sum(test_loss_history) / len(test_loss_history)}')