# Transformer Application

## Configuration

### Hyperparameters

In [None]:
import torch.nn as nn

# HYPERPARAMETERS
BATCH_SIZE = 32
LEARNING_RATE = 0.01

#transformer
NUM_HEADS = 1 #TODO should be larger
NUM_ENCODER_LAYERS = 8
NUM_DECODER_LAYERS = 8
DROPOUT=0.1

# Methods
# - Optimizer SGD
loss_function = nn.MSELoss()  #TODO Manual Loss

### Constants

In [None]:
# CONSTANTS
FEATURE_DIM = 269 #TODO This is a prime number, that is infeasible for Multi-Head-Attention

## Load Prepared Tensors from Disk
Run file `prototype_dataset.ipynb` first

In [None]:
import torch

train_sequence_input = torch.load('data/prototype_dataset/train_sequence_input.pt')
train_sequence_output = torch.load('data/prototype_dataset/train_sequence_output.pt')
test_sequence_input = torch.load('data/prototype_dataset/test_sequence_input.pt')
test_sequence_output = torch.load('data/prototype_dataset/test_sequence_output.pt')

## Build Dataloader with Batches

In [None]:
from torch.utils.data import DataLoader, TensorDataset

train_dataloader = DataLoader(TensorDataset(train_sequence_input.float(), train_sequence_output.float()),
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              drop_last=True)
val_dataloader = DataLoader(TensorDataset(test_sequence_input.float(), test_sequence_output.float()),
                            batch_size=BATCH_SIZE,
                            shuffle=False,
                            drop_last=True)

## Initialize

In [None]:
# Enable anomaly detection
torch.autograd.set_detect_anomaly(True)
torch.__version__

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#device = "cpu"
print(device)

In [None]:
from torch import nn
from AnimationTransformer import AnimationTransformer

model = AnimationTransformer(
    dim_model=FEATURE_DIM,
    num_heads=NUM_HEADS,
    num_encoder_layers=NUM_ENCODER_LAYERS,
    num_decoder_layers=NUM_DECODER_LAYERS,
    dropout_p=DROPOUT
).to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)

## Training

In [ ]:
from AnimationTransformer import fit

train_loss_list, validation_loss_list = fit(model,
                                            optimizer,
                                            loss_function,
                                            train_dataloader,
                                            val_dataloader,
                                            epochs=5,
                                            device=device)