In [1]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

import numpy as np
import matplotlib.pyplot as plt

import sys
sys.path.append("../../")


from src.DatasetManager import DatasetConvertor 
from src.context_free.config import ModelConfig, TrainingConfig, DatasetConfig
from src.context_free.preprocessing import PreparingDataset
from src.context_free.models import createModel
from src.context_free.training import trainModel
from src.context_free.evaluation import evaluateModel

In [2]:
direction = "forward"
lenWindow = 20
lenSource = 10
verbose = True

rawDatasetFolder = "../../data/raw"
model_folder = "../../data/models/context_free"

mapping = {"forward": "fr", "backward": "bk"}
dpdr_params = [
    {"dbParameter": 0.01, "alpha": 0.01, "mode": "fixed"}
]

In [None]:
datasetConverter = DatasetConvertor(rawDatasetFolder)
datasetConverter.processDataset(direction=direction, **dpdr_params[0])
thumbDataUnit = datasetConverter.getDataUnit(f"thumb_{mapping[direction]}")

datasetConfig = DatasetConfig.initialize(len_window=lenWindow, len_source=lenSource, data_augment=True)
trainData, testData = PreparingDataset(thumbDataUnit, datasetConfig, verbose=False)

# Match model dims to dataset feature sizes
modelConfig = ModelConfig()
model, device = createModel(modelConfig)

# Reduce training load to avoid OOM/errors
trainingConfig = TrainingConfig()
model, avg_train_loss_history, avg_test_loss_history = trainModel(
    model, trainData, testData, trainingConfig, verbose=verbose
)
model.save_checkpoint(f"{model_folder}/direction_{direction}_lenWindow_{lenWindow}.pth")

datasetConfig = DatasetConfig.initialize(
    len_window=lenWindow, len_source=lenSource, data_augment=False)
model, device = createModel(modelConfig)
model.load_checkpoint(f"{model_folder}/direction_{direction}_lenWindow_{lenWindow}.pth")
_, testData = PreparingDataset(thumbDataUnit, datasetConfig, verbose=False)
actual, predicted = evaluateModel(model, testData)

plt.plot(actual[0:100])
plt.plot(predicted[0:100])
plt.show()

Forward: Compression rate: 0.2231
Forward: Compression rate: 0.2456
Forward: Compression rate: 0.2491
Size of train loader: 8, Size of test loader: 6
Size of model: 1324289
Seq2Seq(
  (encoder): Encoder(
    (lstm_layers): ModuleList(
      (0): LSTM(1, 128, bidirectional=True)
      (1): LSTM(256, 128, bidirectional=True)
    )
    (ln_layers): ModuleList(
      (0-1): 2 x LayerNorm((256,), eps=1e-05, elementwise_affine=True)
    )
    (residual_projections): ModuleList(
      (0): Linear(in_features=1, out_features=256, bias=True)
      (1): Identity()
    )
  )
  (decoder): Decoder(
    (lstm_layers): ModuleList(
      (0): LSTM(1, 256)
      (1): LSTM(256, 256)
    )
    (ln_layers): ModuleList(
      (0-1): 2 x LayerNorm((256,), eps=1e-05, elementwise_affine=True)
    )
    (residual_projections): ModuleList(
      (0): Linear(in_features=1, out_features=256, bias=True)
      (1): Identity()
    )
    (fc_out): Linear(in_features=256, out_features=1, bias=True)
  )
)
Epoch [1/10],