# SSY340 Project - Trajectory Estimation

### Fill in group number and member names:

In [30]:
NAME1 = "Bingcheng Chen" 
NAME2 = "Arvin Rokni"
GROUP = "Project groups 64"

## 1. Loading the data

In [31]:
import model_cbc
import matplotlib.pyplot as plt
import torch.nn.functional as F
import torch
from torch import nn
import numpy as np
from tqdm import tqdm 
import dataloader
from torch.utils.data import DataLoader

## 2. Pre-processing

In [32]:
# defining model save location
save_location = "./models"
# defining dataset locations
dataset_folder = "./trajectory-prediction-transformers/datasets"
dataset_name = "raw"
# setting validation size. if val_size = 0, split percentage is 80-20
val_size = 0
# length of sequence given to encoder
gt = 8
# length of sequence given to decoder
horizon = 12


train_dataset, _ = dataloader.create_dataset(dataset_folder, dataset_name, val_size, \
    gt, horizon, delim="\t", train=True)
val_dataset, _ = dataloader.create_dataset(dataset_folder, dataset_name, val_size, \
    gt, horizon, delim="\t", train=False)
# test_dataset, _ = dataloader.create_dataset(dataset_folder, dataset_name, val_size, \
#     gt, horizon, delim="\t", train=False, eval=True)

In [33]:
val_dataset[10]

{'src': tensor([[ 4.9693e+00,  8.3395e+00,  0.0000e+00,  0.0000e+00],
         [ 5.1330e+00,  8.3350e+00,  1.6374e-01, -4.5338e-03],
         [ 5.2968e+00,  8.3307e+00,  1.6374e-01, -4.2963e-03],
         [ 5.3915e+00,  8.3490e+00,  9.4709e-02,  1.8377e-02],
         [ 5.4565e+00,  8.3774e+00,  6.5034e-02,  2.8400e-02],
         [ 5.5218e+00,  8.4058e+00,  6.5244e-02,  2.8400e-02],
         [ 5.5868e+00,  8.4342e+00,  6.5033e-02,  2.8400e-02],
         [ 5.5988e+00,  8.4497e+00,  1.1997e-02,  1.5512e-02]]),
 'trg': tensor([[ 5.5575e+00,  8.4521e+00, -4.1251e-02,  2.3870e-03],
         [ 5.5163e+00,  8.4548e+00, -4.1251e-02,  2.6255e-03],
         [ 5.4750e+00,  8.4571e+00, -4.1251e-02,  2.3861e-03],
         [ 5.4340e+00,  8.4595e+00, -4.1041e-02,  2.3870e-03],
         [ 5.3927e+00,  8.4622e+00, -4.1251e-02,  2.6245e-03],
         [ 5.3515e+00,  8.4645e+00, -4.1251e-02,  2.3870e-03],
         [ 5.3102e+00,  8.4669e+00, -4.1251e-02,  2.3861e-03],
         [ 5.2690e+00,  8.4696e+00, -4.

In [34]:
val_dataset[10]['src'].shape

torch.Size([8, 4])

## 3. Data loaders

In [35]:
# defining batch size
batch_size = 64

# creating torch dataloaders
train_loader = DataLoader(train_dataset, batch_size, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size, shuffle=True, num_workers=0)

## 4. Training

#### 4.1 create a model

In [36]:
transformer_model = model_cbc.Transformer(encoder_input_size=4, decoder_input_size=4, embedding_size=512, num_heads=8, num_layers=6, feedforward_size=2048)

In [37]:
transformer_model

Transformer(
  (encoder_embedding): Embedding(4, 512)
  (decoder_embedding): Embedding(4, 512)
  (positional_encoding): PositionalEncoding()
  (encoder_layers): ModuleList(
    (0-5): 6 x EncoderLayer(
      (self_attn): MultiHeadAttention(
        (W_q): Linear(in_features=512, out_features=512, bias=True)
        (W_k): Linear(in_features=512, out_features=512, bias=True)
        (W_v): Linear(in_features=512, out_features=512, bias=True)
        (W_o): Linear(in_features=512, out_features=512, bias=True)
      )
      (feed_forward): PositionWiseFeedForward(
        (fc1): Linear(in_features=512, out_features=2048, bias=True)
        (fc2): Linear(in_features=2048, out_features=512, bias=True)
        (relu): ReLU()
      )
      (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
  )
  (decoder_layers): ModuleList(
    (0-5): 6 x DecoderLayer(
      (

#### 4.2 The training loop

In [38]:
def train_epoch(model, optimizer, loss_fn, train_loader, val_loader, device, print_every):

    model.train()
    train_loss_batches = []
    num_batches = len(train_loader)

    for batch_index, data in enumerate(train_loader):

        encoder_input = data['src']
        decoder_input = data['trg']

        optimizer.zero_grad()
        predictions = transformer_model.forward(encoder_input, decoder_input) # add mask ?
        loss = loss_fn(predictions, decoder_input)
        
        loss.backward()
        optimizer.step()
        train_loss_batches.append(loss.item())

        # If you want to print your progress more often than every epoch you can
        # set `print_every` to the number of batches you want between every status update.
        # Note that the print out will trigger a full validation on the full val. set => slows down training
        if print_every is not None and batch_index % print_every == 0:
            val_loss = validate(model, loss_fn, val_loader, device)
            model.train()
            print(f"\tBatch {batch_index}/{num_batches}: "
                  f"\tTrain loss: {sum(train_loss_batches[-print_every:])/print_every:.3f}, "
                  f"\tVal. loss: {val_loss:.3f}, ")

    return model, train_loss_batches

def validate(model, loss_fn, val_loader, device):                 
    with torch.no_grad():
        batch_val_loss=0
        validation_loss = []
        gt = []
        pr = []
        val_mad = []
        val_fad = []
        model.eval()

        for id_b, data in enumerate(val_loader):

            # input to encoder input
            val_input = data['src']
            dec_inpt = data['trg']

            # prediction till horizon lenght
            for i in range(12):
                # getting model prediction
                model_output = transformer_model.forward(val_input, dec_inpt)
                # appending the predicition to decoder input for next cycle
                dec_inp = torch.cat((dec_inp, model_output[:, -1:, :]), 1)

            # calculating loss using pairwise distance of all predictions
            val_loss = F.pairwise_distance()
            batch_val_loss += val_loss.item()
        
        validation_loss.append(batch_val_loss/len(val_loader))

        # calculating mad and fad evaluation metrics
        gt = np.concatenate(gt, 0)
        pr = np.concatenate(pr, 0)
        mad, fad, _ = dataloader.distance_metrics(gt, pr)
        val_mad.append(mad)
        val_fad.append(fad)

        return  mad, fad
        # print("Epoch {}/{}....Validation mad = {:.4f}, Validation fad = {:.4f}".format(epoch+1, epochs, mad, fad))

def training_loop(model, optimizer, loss_fn, train_loader, val_loader, num_epochs, print_every):
    print("Starting training")
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    train_losses, val_mad, val_fad = [], [], []

    for epoch in range(1, num_epochs+1):
        model, train_loss = train_epoch(model, optimizer, loss_fn,train_loader, val_loader, device, print_every)
        val_mad, val_fad = validate(model, loss_fn, val_loader, device)
        print(f"Epoch {epoch}/{num_epochs}: "
              f"Train loss: {sum(train_loss)/len(train_loss):.3f}, "
              f"Val. mad: {val_mad:.3f}, "
              f"Val. fad.: {val_fad:.3f}")
        train_losses.extend(train_loss)
        val_mad.append(val_mad)
        val_fad.append(val_fad)

    return model, train_losses, val_mad, val_fad

#### 4.3 Train the model

In [39]:
loss_fn = nn.MSELoss()

optimizer = torch.optim.SGD(transformer_model.parameters(), lr=1e-4, momentum=0.9, weight_decay=1e-3, nesterov=True)
num_epochs = 1

first_model, first_train_losses, first_val_mad, first_val_fad = training_loop(transformer_model, optimizer, loss_fn, train_loader, val_loader, num_epochs=num_epochs, print_every=10)


Starting training


RuntimeError: The size of tensor a (4) must match the size of tensor b (12) at non-singleton dimension 4

#### 4.4 save the model

## 5. Visualisation

## 6. Evaluation on test set