# Train the models! ðŸ‘Ÿ

This notebook imports training methods and models from the `src/` directory in order to train the models.

In [1]:
# setup imports
from src.utils import device, check_dir
from src.train import run_train
from src import models
from torchinfo import summary
import torch


print(device)  # check the device

torch.manual_seed(0)  # make training reproducable

cuda


<torch._C.Generator at 0x1adb8d010f0>

## RNN - Recurrent Neural Network

In [2]:
rnn_model = models.RNN(
    hidden_size=2048,
)

summary(rnn_model)

Layer (type:depth-idx)                   Param #
RNN                                      --
â”œâ”€RNN: 1-1                               4,435,968
â”œâ”€Linear: 1-2                            2,049
â”œâ”€Sigmoid: 1-3                           --
Total params: 4,438,017
Trainable params: 4,438,017
Non-trainable params: 0

In [None]:
_m, _h = run_train(
    # fixed
    model=rnn_model,
    sequence_len=8,
    data_as_sequence=True,
    output_path=check_dir("rnn"),
    # adjust
    epochs=50,
    init_learning_rate=1e-5,
    weight_decay=1e-5,
    # enable for final training
    data_use_all=True,
)

# LSTM - Long Short-Term Memory

In [2]:
lstm_model = models.LSTM(
    hidden_size=2048,
)

summary(lstm_model)

Layer (type:depth-idx)                   Param #
LSTM                                     --
â”œâ”€LSTM: 1-1                              17,743,872
â”œâ”€Linear: 1-2                            2,049
â”œâ”€Sigmoid: 1-3                           --
Total params: 17,745,921
Trainable params: 17,745,921
Non-trainable params: 0

In [None]:
_m, _h = run_train(
    # fixed
    model=lstm_model,
    sequence_len=8,
    data_as_sequence=True,
    output_path=check_dir("lstm"),
    # adjust
    epochs=50,
    init_learning_rate=1e-4,
    weight_decay=1e-5,
    # enable for final training
    data_use_all=True,
)

# GRU - Gated Recurrent Unit

In [4]:
gru_model = models.GRU(
    hidden_size=2048,
)

summary(gru_model)

Layer (type:depth-idx)                   Param #
GRU                                      --
â”œâ”€GRU: 1-1                               13,307,904
â”œâ”€Linear: 1-2                            2,049
â”œâ”€Sigmoid: 1-3                           --
Total params: 13,309,953
Trainable params: 13,309,953
Non-trainable params: 0

In [None]:
_m, _h = run_train(
    # fixed
    model=gru_model,
    sequence_len=8,
    data_as_sequence=True,
    output_path=check_dir("gru"),
    # adjust
    epochs=50,
    init_learning_rate=1e-4,
    weight_decay=1e-5,
    # enable for final training
    data_use_all=True,
)

# TCN - Temporal Convolutional Network

In [2]:
tcn_model = models.TCN(
    channels=[256, 128, 64],
)

summary(tcn_model)

Layer (type:depth-idx)                                  Param #
TCN                                                     --
â”œâ”€TCN: 1-1                                              --
â”‚    â””â”€ModuleList: 2-1                                  --
â”‚    â”‚    â””â”€TemporalBlock: 3-1                          411,904
â”‚    â”‚    â””â”€TemporalBlock: 3-2                          230,016
â”‚    â”‚    â””â”€TemporalBlock: 3-3                          57,664
â”‚    â””â”€Conv1d: 2-2                                      65
â”‚    â””â”€Sigmoid: 2-3                                     --
Total params: 699,649
Trainable params: 699,649
Non-trainable params: 0

In [None]:
_m, _h = run_train(
    # fixed
    model=tcn_model,
    sequence_len=8,
    data_as_sequence=True,
    output_path=check_dir("tcn"),
    # adjust
    epochs=50,
    init_learning_rate=(1e-4) / 2,
    # enable for final training
    data_use_all=True,
)

## TE - Tranformer Encoder

In [2]:
te_model = models.TE(
    hidden_size=3072,
    dropout=0.2,
)

summary(te_model)

Layer (type:depth-idx)                                  Param #
TE                                                      --
â”œâ”€TransformerEncoderLayer: 1-1                          --
â”‚    â””â”€MultiheadAttention: 2-1                          2,586,336
â”‚    â”‚    â””â”€NonDynamicallyQuantizableLinear: 3-1        862,112
â”‚    â””â”€Linear: 2-2                                      2,853,888
â”‚    â””â”€Dropout: 2-3                                     --
â”‚    â””â”€Linear: 2-4                                      2,851,744
â”‚    â””â”€LayerNorm: 2-5                                   1,856
â”‚    â””â”€LayerNorm: 2-6                                   1,856
â”‚    â””â”€Dropout: 2-7                                     --
â”‚    â””â”€Dropout: 2-8                                     --
â”œâ”€Linear: 1-2                                           929
â”œâ”€Sigmoid: 1-3                                          --
Total params: 9,158,721
Trainable params: 9,158,721
Non-trainable pa

In [None]:
_m, _h = run_train(
    # fixed
    model=te_model,
    sequence_len=8,
    data_as_sequence=False,
    output_path=check_dir("te"),
    # adjust
    epochs=50,
    init_learning_rate=1e-5,
    # enable for final training
    data_use_all=True,
)