In [1]:
%cd ..

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from ipywidgets import interact

import cufflinks as cf
cf.go_offline(connected=True)

import bokeh.io
bokeh.io.output_notebook()

np.random.seed(42)

/home/rosneft_user_2500/anomaly-detection


In [2]:
root_folder = %pwd
import sys
sys.path = [root_folder] + sys.path

# Загрузка данных

In [3]:
from sklearn.model_selection import train_test_split
from src.features.build_features import rolling_window

prediction_len = 1
window_len = 32
batch_size = 32

data = pd.read_csv('data/processed/tep_data.csv', index_col='Index')
print(f'Len of dataset: {data.shape[0]}')

Len of dataset: 12801


## Decomposition

In [4]:
from statsmodels.graphics.tsaplots import plot_pacf, plot_acf
from statsmodels.tsa.stattools import acf, pacf

@interact(component=(0, 40))
def myacf(component):
    plot_acf(data.values[:, component], lags=np.arange(0, 2000))

interactive(children=(IntSlider(value=20, description='component', max=40), Output()), _dom_classes=('widget-i…

In [90]:
import statsmodels.tsa.seasonal as seasonal
period = 750
decomposed = seasonal.seasonal_decompose(data.values,
                                         period=period,
                                         extrapolate_trend='freq')


@interact(comp=(0, 40))
def f(comp):
    plt.figure(figsize=(20, 10))
    plt.subplot(1, 2, 1)
    plt.title('Trend')
    plt.plot(decomposed.trend[:, comp])
    plt.subplot(1, 2, 2)
    plt.title('Seasonal')
    plt.plot(decomposed.seasonal[:, comp])

interactive(children=(IntSlider(value=20, description='comp', max=40), Output()), _dom_classes=('widget-intera…

# Обучение

In [6]:
from datetime import datetime
def get_log_path(name):
    return name + '_' + datetime.now().strftime('%Y-%m-%d-%H-%M')

## Тренд

### Train/test split

In [71]:
from src.models.torch.utils import to_dataloader

X_trend = rolling_window(decomposed.trend, window_len)[:-prediction_len]
y_trend = rolling_window(decomposed.trend, prediction_len, window_len)

X_tr, X_te, y_tr, y_te = train_test_split(X_trend, y_trend, train_size=0.7, shuffle=False)

train_set = to_dataloader(X_tr, y_tr, dict(batch_size=batch_size))
test_set = to_dataloader(X_te, y_te, dict(batch_size=batch_size))

### Обучение

In [9]:
import torch
from src.models.torch.models import LSTM, Trainer

config = dict(
    input_size=X_tr[0].shape[1],
    hidden_size=16,
    num_layers=1,
    batch_first=True,
    bidirectional=True,
)

device = torch.device('cpu')
model = LSTM(**config).to(device)
criterion = torch.nn.MSELoss()
optim = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optim,
                                                       patience=3,
                                                       threshold=0.01)

trainer = Trainer(
    model,
    criterion,
    optim,
    scheduler,
    device,
    get_log_path(
        f'logs/trend-{config["num_layers"]}-layers-{config["hidden_size"]}-hidden-{window_len}-len'
    ),
    stateful=True)

In [10]:
# # for test purpouses
# sz = 200
# xx = torch.rand(sz, window_len, data.shape[1])
# yy = torch.rand(sz, data.shape[1])
# xxdatayy = to_dataloader(xx, yy, dict(batch_size=batch_size))
# trainer.train(xxdatayy, xxdatayy, 5)

In [10]:
trainer.train(train_set, test_set, 15)


To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).

Epoch 0 of train: :   1%|          | 2/280 [00:00<00:22, 12.52it/s, loss=0.488]

Epoch 0/14
----------


Epoch 0 of train: : 100%|██████████| 280/280 [00:16<00:00, 16.90it/s, loss=0.0189]
Epoch 0 of val: : 100%|██████████| 120/120 [00:06<00:00, 17.15it/s, loss=0.317] 
Epoch 1 of train: :   1%|          | 2/280 [00:00<00:17, 15.90it/s, loss=0.185]

Loss: 0.0601

Epoch 1/14
----------


Epoch 1 of train: : 100%|██████████| 280/280 [00:16<00:00, 16.90it/s, loss=0.0142]
Epoch 1 of val: : 100%|██████████| 120/120 [00:07<00:00, 16.99it/s, loss=0.302] 
Epoch 2 of train: :   1%|          | 2/280 [00:00<00:14, 19.27it/s, loss=0.0812]

Loss: 0.0480

Epoch 2/14
----------


Epoch 2 of train: : 100%|██████████| 280/280 [00:15<00:00, 18.27it/s, loss=0.0146] 
Epoch 2 of val: : 100%|██████████| 120/120 [00:06<00:00, 18.34it/s, loss=0.187] 
Epoch 3 of train: :   1%|          | 2/280 [00:00<00:16, 17.01it/s, loss=0.0276]

Loss: 0.0357

Epoch 3/14
----------


Epoch 3 of train: : 100%|██████████| 280/280 [00:15<00:00, 18.50it/s, loss=0.0125] 
Epoch 3 of val: : 100%|██████████| 120/120 [00:06<00:00, 18.89it/s, loss=0.185] 
Epoch 4 of train: :   1%|          | 2/280 [00:00<00:14, 19.19it/s, loss=0.0203]

Loss: 0.0303

Epoch 4/14
----------


Epoch 4 of train: : 100%|██████████| 280/280 [00:15<00:00, 18.41it/s, loss=0.0121] 
Epoch 4 of val: : 100%|██████████| 120/120 [00:06<00:00, 18.52it/s, loss=0.228] 
Epoch 5 of train: :   1%|          | 2/280 [00:00<00:16, 16.73it/s, loss=0.0224]

Loss: 0.0328

Epoch 5/14
----------


Epoch 5 of train: : 100%|██████████| 280/280 [00:15<00:00, 18.50it/s, loss=0.0114] 
Epoch 5 of val: : 100%|██████████| 120/120 [00:06<00:00, 18.51it/s, loss=0.219] 
Epoch 6 of train: :   1%|          | 2/280 [00:00<00:18, 15.41it/s, loss=0.0382]

Loss: 0.0313

Epoch 6/14
----------


Epoch 6 of train: : 100%|██████████| 280/280 [00:15<00:00, 18.36it/s, loss=0.0102] 
Epoch 6 of val: : 100%|██████████| 120/120 [00:06<00:00, 18.35it/s, loss=0.193] 
Epoch 7 of train: :   1%|          | 2/280 [00:00<00:16, 16.52it/s, loss=0.0169]

Loss: 0.0278

Epoch 7/14
----------


Epoch 7 of train: : 100%|██████████| 280/280 [00:15<00:00, 18.58it/s, loss=0.0105] 
Epoch 7 of val: : 100%|██████████| 120/120 [00:06<00:00, 18.39it/s, loss=0.197] 
Epoch 8 of train: :   1%|          | 2/280 [00:00<00:14, 18.84it/s, loss=0.0187]

Loss: 0.0291

Epoch 8/14
----------


Epoch 8 of train: : 100%|██████████| 280/280 [00:15<00:00, 18.50it/s, loss=0.0101] 
Epoch 8 of val: : 100%|██████████| 120/120 [00:06<00:00, 18.39it/s, loss=0.185] 
Epoch 9 of train: :   1%|          | 2/280 [00:00<00:14, 19.49it/s, loss=0.016] 

Loss: 0.0269

Epoch 9/14
----------


Epoch 9 of train: : 100%|██████████| 280/280 [00:15<00:00, 18.66it/s, loss=0.00946]
Epoch 9 of val: : 100%|██████████| 120/120 [00:06<00:00, 19.02it/s, loss=0.186] 
Epoch 10 of train: :   1%|          | 2/280 [00:00<00:17, 15.88it/s, loss=0.0351]

Loss: 0.0280

Epoch 10/14
----------


Epoch 10 of train: : 100%|██████████| 280/280 [00:15<00:00, 18.45it/s, loss=0.0105] 
Epoch 10 of val: : 100%|██████████| 120/120 [00:06<00:00, 18.96it/s, loss=0.196] 
Epoch 11 of train: :   1%|          | 2/280 [00:00<00:15, 18.47it/s, loss=0.0172]

Loss: 0.0293

Epoch 11/14
----------


Epoch 11 of train: : 100%|██████████| 280/280 [00:14<00:00, 18.84it/s, loss=0.00875]
Epoch 11 of val: : 100%|██████████| 120/120 [00:06<00:00, 19.02it/s, loss=0.177] 
Epoch 12 of train: :   1%|          | 2/280 [00:00<00:14, 19.81it/s, loss=0.0145]

Loss: 0.0286

Epoch 12/14
----------


Epoch 12 of train: : 100%|██████████| 280/280 [00:14<00:00, 19.16it/s, loss=0.0101] 
Epoch 12 of val: : 100%|██████████| 120/120 [00:06<00:00, 19.36it/s, loss=0.249] 
Epoch 13 of train: :   1%|          | 2/280 [00:00<00:15, 18.37it/s, loss=0.0493]

Loss: 0.0356

Epoch 13/14
----------


Epoch 13 of train: : 100%|██████████| 280/280 [00:14<00:00, 18.95it/s, loss=0.00902]
Epoch 13 of val: : 100%|██████████| 120/120 [00:06<00:00, 18.87it/s, loss=0.161] 
Epoch 14 of train: :   1%|          | 3/280 [00:00<00:13, 21.14it/s, loss=0.0202]

Loss: 0.0254

Epoch 14/14
----------


Epoch 14 of train: : 100%|██████████| 280/280 [00:14<00:00, 18.69it/s, loss=0.00919]
Epoch 14 of val: : 100%|██████████| 120/120 [00:06<00:00, 18.92it/s, loss=0.149] 


Loss: 0.0247



In [11]:
torch.save(model, 'trend.pth')

### Предсказание

In [68]:
import torch
model = torch.load('trend.pth')

In [69]:
import matplotlib.pyplot as plt
from tqdm import tqdm

from src.models.torch.utils import to_dataloader, get_prev_states

def next_value_foreceast(model, data, window_len):
    model.eval()
    pred = torch.zeros((0, data.shape[1]))
    for i in tqdm(range(window_len, data.shape[0])):
        inp = torch.tensor(data[i - window_len:i]).float()
        inp = inp.view(1, *inp.size())
        states = get_prev_states(model, 1)
        pred = torch.cat((pred, model(inp, states)), dim=0)
    return pred

def forecast(model, prior, window_len, n):
    model.eval()
    inp = prior.view(1, window_len, -1)
    for i in tqdm(range(n)):
        states = get_prev_states(model, 1)
        out = model(inp[:, -window_len:], states)
        inp = torch.cat((inp, out.view(1, 1, -1)), dim=1)
    return inp

In [72]:
model.reset_states()
split_point = len(X_tr) + window_len
train_pred = next_value_foreceast(model, decomposed.trend[:split_point],
                                   window_len)
test_pred = next_value_foreceast(model, decomposed.trend[split_point-window_len:],
                                  window_len)

train_pred = train_pred.detach().numpy()
test_pred = test_pred.detach().numpy()


To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).

100%|██████████| 8938/8938 [01:27<00:00, 102.64it/s]
100%|██████████| 3831/3831 [00:34<00:00, 110.38it/s]


#### Weighted MSE - Optional

На некоторых компонентах плохое предсказание, поэтому им нужно получить больше веса

In [18]:
# from sklearn.metrics import mean_squared_error

# width = 3000
# err = mean_squared_error(test_pred[:width], decomposed.trend[split_point:split_point+width], multioutput='raw_values')
# plt.bar(np.arange(len(err)), err)

# def weighted_mse_loss(weights):
#     weights = torch.tensor(weights)
#     criterion = torch.nn.MSELoss(reduction='none')
#     def mse(input, target):
#         nonlocal weights, criterion
#         loss = criterion(input, target)
#         loss = loss * weights.expand_as(loss)
#         return loss.mean()
#     return mse

# criterion = weighted_mse_loss(err / err.sum())
# optim = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optim,
#                                                        patience=3,
#                                                        threshold=0.01)

# trainer = Trainer(
#     model,
#     criterion,
#     optim,
#     scheduler,
#     device,
#     get_log_path(
#         f'logs/trend-retraining-{config["num_layers"]}-layers-{config["hidden_size"]}-hidden-{window_len}-len'
#     ),
#     stateful=True)



# model.reset_states()
# trainer.train(train_set, test_set, 10)

# model.reset_states()
# split_point = len(X_tr) + window_len
# train_pred = next_value_prediction(model, decomposed.trend[:split_point],
#                                    window_len)
# test_pred = next_value_prediction(model, decomposed.trend[split_point:],
#                                   window_len)

# train_pred = train_pred.detach().numpy()
# test_pred = test_pred.detach().numpy()

### Plot

In [25]:
from ipywidgets import interact

@interact(component=(0, 40))
def plot(component):
    width = 3000
    global split_point
    plt.figure(figsize=(18, 8))
    plt.suptitle('Next value prediction of trend')
    
    plt.subplot(1, 2, 1)
    plt.title('Train')
    plt.plot(decomposed.trend[:width + window_len, component],
             label='real',
             alpha=0.7)
    plt.plot(list(range(window_len, window_len + width)),
             train_pred[:width, component],
             label='pred',
             alpha=0.8)

    plt.subplot(1, 2, 2)
    plt.title('Test')
    plt.plot(decomposed.trend[split_point:split_point + width, component],
             label='real',
             alpha=0.7)
    plt.plot(list(range(window_len, window_len + width)),
             test_pred[:width, component],
             label='pred',
             alpha=0.8)
    plt.legend()

interactive(children=(IntSlider(value=20, description='component', max=40), Output()), _dom_classes=('widget-i…

In [None]:
# inp = torch.cat(
#     (torch.tensor(decomposed.trend[len(X_tr) + 1 - window_len:len(X_tr)]).float(), torch.tensor(train_pred[-1, None, :])),
#     axis=0).view(1, window_len, data.shape[1])
# for i in tqdm(range(len(X_te))):
#     states = get_prev_states(model, 1)
#     out = model(inp[:, -window_len:], states)
#     inp = torch.cat((inp, out.view(1, 1, -1)), axis=1)
    
# pred = inp.squeeze().detach().numpy()[window_len:]

# @interact(component=(0, 40))
# def f(component):
#     sz = 100
#     plt.title(f'Next {sz} values prediction of trend')
#     plt.plot(pred[:sz, component], label='pred')
#     plt.plot(decomposed.trend[len(X_tr):len(X_tr)+sz, component], label='ground truth')
#     plt.legend()

## Сезонная 

### Преобразование фурье

In [26]:
X_seasonal = decomposed.seasonal

In [27]:
from scipy.signal import stft
from sklearn.preprocessing import StandardScaler

f, t, Zxx = stft(X_seasonal, axis=0, nperseg=window_len, noverlap=window_len-1, 
                 return_onesided=True, boundary=None, padded=False)

Zxx = np.swapaxes(Zxx, 0, 1)
Zxx = np.swapaxes(Zxx, 1, 2)
Zxx = np.concatenate((Zxx.real, Zxx.imag), axis=-1)

print(Zxx.shape)

ss = StandardScaler()
for i in range(Zxx.shape[0]):
    Zxx[i] = ss.fit_transform(Zxx[i])

(41, 12770, 34)


In [28]:
Zxx = np.swapaxes(Zxx, 0, 1)
print(Zxx.shape)

Zxx = Zxx.reshape(Zxx.shape[0], -1)

(12770, 41, 34)


In [29]:
# from sklearn.decomposition import PCA
# pca = PCA(svd_solver='full', whiten=True)
# pca.fit(Zxx)

# min_idx = np.argmax(np.where(np.isclose(np.cumsum(pca.explained_variance_ratio_), 1))[0])
# min_idx

# Zxx = pca.transform(Zxx)

In [30]:
X_seasonal = Zxx[:-prediction_len]
y_seasonal = Zxx[prediction_len:]

X_seas_tr, X_seas_te, y_seas_tr, y_seas_te = train_test_split(X_seasonal, y_seasonal, train_size=0.7, shuffle=False)

#### Train/test

In [31]:
from src.models.torch.models import LSTM, Trainer
from src.models.torch.utils import to_dataloader, get_prev_states
import torch

seas_train_set = to_dataloader(X_seas_tr, y_seas_tr, dict(batch_size=batch_size, shuffle=True))
seas_test_set = to_dataloader(X_seas_te, y_seas_te, dict(batch_size=batch_size, shuffle=True))

#### Обучение

In [34]:
import torch.nn as nn

latent_dim = 512

seas_model = nn.Sequential(
    nn.Linear(X_seasonal.shape[1], latent_dim),
    nn.Tanh(),
    nn.Linear(latent_dim, X_seasonal.shape[1])
)

In [35]:
from torchsummary import summary
summary(seas_model, input_size=(X_seasonal.shape[0], X_seasonal.shape[1]))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1           [-1, 12769, 512]         714,240
              Tanh-2           [-1, 12769, 512]               0
            Linear-3          [-1, 12769, 1394]         715,122
Total params: 1,429,362
Trainable params: 1,429,362
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 67.90
Forward/backward pass size (MB): 235.56
Params size (MB): 5.45
Estimated Total Size (MB): 308.92
----------------------------------------------------------------


In [36]:
from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Loss

optimizer = torch.optim.Adam(seas_model.parameters())
loss = nn.MSELoss()

trainer = create_supervised_trainer(seas_model, optimizer, loss)
evaluator = create_supervised_evaluator(seas_model, metrics={'loss': Loss(loss)})

@trainer.on(Events.EPOCH_COMPLETED)
def compute_metrics(engine):
    evaluator.run(seas_test_set)

In [37]:
from ignite.contrib.handlers.tensorboard_logger import *

# Create a logger
tb_logger = TensorboardLogger(log_dir=get_log_path('logs/seas'))

tb_logger.attach(trainer,
                 log_handler=OutputHandler(
                     tag="training",
                     output_transform=lambda loss: {'MSE': loss}),
                 event_name=Events.ITERATION_COMPLETED)

tb_logger.attach(evaluator,
                 log_handler=OutputHandler(
                     tag="validation",
                     metric_names=["loss"],
                     global_step_transform=global_step_from_engine(trainer)),
                 event_name=Events.EPOCH_COMPLETED)

tb_logger.attach(trainer,
                 log_handler=OptimizerParamsHandler(optimizer),
                 event_name=Events.ITERATION_STARTED)

tb_logger.attach(trainer,
                 log_handler=GradsHistHandler(seas_model),
                 event_name=Events.EPOCH_COMPLETED)

# We need to close the logger with we are done
tb_logger.close()

In [38]:
trainer.run(seas_train_set, max_epochs=30);

In [39]:
from tqdm import tqdm

def next_value_foreceast(model, data):
    model.eval()
    pred = torch.zeros((0, *data.shape[1:]))
    for i in tqdm(range(data.shape[0])):
        inp = torch.tensor(data[i]).float()
        inp = inp.view(1, *inp.size())
        pred = torch.cat((pred, model(inp)), dim=0)
    return pred

In [40]:
seas_forecast_tr = next_value_foreceast(seas_model, X_seas_tr).detach().numpy()
seas_forecast_te = next_value_foreceast(seas_model, X_seas_te).detach().numpy()

100%|██████████| 8938/8938 [02:57<00:00, 50.28it/s] 
100%|██████████| 3831/3831 [00:13<00:00, 284.55it/s]


In [23]:
# seas_forecast_tr = pca.inverse_transform(seas_forecast_tr)
# seas_forecast_te = pca.inverse_transform(seas_forecast_te)

In [41]:
seas_forecast_tr = seas_forecast_tr.reshape(-1, 41, 34)
seas_forecast_te = seas_forecast_te.reshape(-1, 41, 34)

In [45]:
from ipywidgets import interact
@interact(idx=(0, len(X_seas_te)))
def plot_diff_stft(idx):
    plt.figure(figsize=(6, 10))
    plt.title('Difference between true spectr and predicted')
#     diff = seas_forecast_te[idx] - pca.inverse_transform(X_seasonal[len(X_seas_tr) + idx]).reshape(41, 34)
    diff = seas_forecast_te[idx] - X_seasonal[len(X_seas_tr) + idx].reshape(41, 34)
    diff = np.abs(diff)
    plt.imshow(diff)
    print(diff.sum())
    plt.colorbar();

interactive(children=(IntSlider(value=1915, description='idx', max=3831), Output()), _dom_classes=('widget-int…

In [63]:
from scipy.signal import istft
def to_spectr(seas_forecast):
    spectr = np.split(seas_forecast, 2, axis=-1)
    spectr = spectr[0] + 1j * spectr[1]
    return istft(spectr, time_axis=0, freq_axis=2, nperseg=window_len, noverlap=window_len-1)[1]

seas_tr = to_spectr(seas_forecast_tr)
seas_te = to_spectr(seas_forecast_te)

In [67]:
from ipywidgets import interact

split_point = len(X_seasonal) + window_len

@interact(component=(0, 40))
def plot(component):
    width = 3000
    global split_point
    plt.figure(figsize=(18, 8))
    plt.suptitle('Next value prediction of Seasonal')
    
    plt.subplot(1, 2, 1)
    plt.title('Train')
    plt.plot(decomposed.seasonal[:width + window_len, component],
             label='real',
             alpha=0.7)
    plt.plot(list(range(window_len, window_len + width)),
             seas_tr[:width, component],
             label='pred',
             alpha=0.8)

    plt.subplot(1, 2, 2)
    plt.title('Test')
    plt.plot(decomposed.seasonal[split_point:split_point + width, component],
             label='real',
             alpha=0.7)
    plt.plot(list(range(window_len, window_len + width)),
             seas_te[:width, component],
             label='pred',
             alpha=0.8)
    plt.legend()

interactive(children=(IntSlider(value=20, description='component', max=40), Output()), _dom_classes=('widget-i…

### Residuals training

In [74]:
train_resid = np.array(y_tr).squeeze() - train_pred
test_resid = np.array(y_te).squeeze() - test_pred

In [97]:
@interact(component=(0, 40))
def plot_trend_resid(component):
    plt.figure(figsize=(20, 12))

    trend_pred = np.r_[train_pred, test_pred]
    residuals = np.r_[train_resid, test_resid]
    
    plt.subplot(1, 2, 1)
    plt.title('Trend')
    plt.plot(trend_pred[:, component], label='Next value prediction (LSTM)')
    plt.plot(decomposed.trend[:, component], label='Real')
    plt.legend()
    
    
    plt.subplot(1, 2, 2)
    plt.plot(residuals[:, component], label='Residuals (data - trend_pred)')
#     plt.plot(decomposed.seasonal[:, component] + decomposed.resid[:, component], 
#              label='Seasonal + Resid', 
#              alpha=0.6)
    plt.legend()

interactive(children=(IntSlider(value=20, description='component', max=40), Output()), _dom_classes=('widget-i…

In [34]:
from sklearn.preprocessing import StandardScaler
ss = StandardScaler().fit(train_resid)
train_resid_scaled = ss.transform(train_resid)
test_resid_scaled = ss.transform(test_resid)

In [41]:
train_set = to_dataloader(X_tr, train_resid_scaled, dict(batch_size=batch_size))
test_set = to_dataloader(X_te, test_resid_scaled, dict(batch_size=batch_size))

In [45]:
import torch
from src.models.torch.models import LSTM, Trainer

config = dict(
    input_size=X_tr[0].shape[1],
    hidden_size=16,
    num_layers=1,
    batch_first=True,
    bidirectional=True,
)

device = torch.device('cpu')
model = LSTM(**config).to(device)
criterion = torch.nn.MSELoss()
optim = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.01)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optim,
                                                       patience=3,
                                                       threshold=0.01)

trainer = Trainer(
    model,
    criterion,
    optim,
    scheduler,
    device,
    get_log_path(
        f'logs/resid-{config["num_layers"]}-layers-{config["hidden_size"]}-hidden-{window_len}-len'
    ),
    stateful=True)

In [46]:
trainer.train(train_set, test_set, 10)

Epoch 0 of train: :   1%|          | 2/280 [00:00<00:19, 14.35it/s, loss=3.22]

Epoch 0/9
----------


Epoch 0 of train: : 100%|██████████| 280/280 [00:15<00:00, 18.00it/s, loss=1.1]  
Epoch 0 of val: : 100%|██████████| 120/120 [00:06<00:00, 18.25it/s, loss=25.7]
Epoch 1 of train: :   1%|          | 2/280 [00:00<00:14, 19.41it/s, loss=2.03]

Loss: 4.3518

Epoch 1/9
----------


Epoch 1 of train: : 100%|██████████| 280/280 [00:16<00:00, 17.37it/s, loss=1.02] 
Epoch 1 of val: : 100%|██████████| 120/120 [00:06<00:00, 18.27it/s, loss=26.5]
Epoch 2 of train: :   1%|          | 2/280 [00:00<00:18, 15.17it/s, loss=2.96]

Loss: 4.4708

Epoch 2/9
----------


Epoch 2 of train: : 100%|██████████| 280/280 [00:16<00:00, 16.96it/s, loss=1.05] 
Epoch 2 of val: : 100%|██████████| 120/120 [00:06<00:00, 17.27it/s, loss=25.5]
Epoch 3 of train: :   1%|          | 2/280 [00:00<00:14, 19.13it/s, loss=1.86]

Loss: 4.4141

Epoch 3/9
----------


Epoch 3 of train: : 100%|██████████| 280/280 [00:15<00:00, 17.66it/s, loss=1.07] 
Epoch 3 of val: : 100%|██████████| 120/120 [00:06<00:00, 18.02it/s, loss=23.7]
Epoch 4 of train: :   1%|          | 2/280 [00:00<00:17, 16.09it/s, loss=2.92]

Loss: 4.2035

Epoch 4/9
----------


Epoch 4 of train: : 100%|██████████| 280/280 [00:15<00:00, 18.31it/s, loss=1.04] 
Epoch 4 of val: : 100%|██████████| 120/120 [00:06<00:00, 18.20it/s, loss=25]  
Epoch 5 of train: :   1%|          | 2/280 [00:00<00:15, 18.15it/s, loss=1.86]

Loss: 4.3363

Epoch 5/9
----------


Epoch 5 of train: : 100%|██████████| 280/280 [00:15<00:00, 18.52it/s, loss=1.06] 
Epoch 5 of val: : 100%|██████████| 120/120 [00:06<00:00, 18.50it/s, loss=24.9]
Epoch 6 of train: :   1%|          | 3/280 [00:00<00:13, 21.13it/s, loss=1.69]

Loss: 4.2944

Epoch 6/9
----------


Epoch 6 of train: : 100%|██████████| 280/280 [00:15<00:00, 18.61it/s, loss=1.06] 
Epoch 6 of val: : 100%|██████████| 120/120 [00:06<00:00, 18.46it/s, loss=25.1]
Epoch 7 of train: :   1%|          | 2/280 [00:00<00:15, 17.60it/s, loss=1.82]

Loss: 4.3325

Epoch 7/9
----------


Epoch 7 of train: : 100%|██████████| 280/280 [00:14<00:00, 18.77it/s, loss=1.08] 
Epoch 7 of val: : 100%|██████████| 120/120 [00:06<00:00, 18.62it/s, loss=26.5]
Epoch 8 of train: :   1%|          | 2/280 [00:00<00:13, 19.88it/s, loss=2.18]

Loss: 4.3813

Epoch 8/9
----------


Epoch 8 of train: : 100%|██████████| 280/280 [00:14<00:00, 18.86it/s, loss=1.1]  
Epoch 8 of val: : 100%|██████████| 120/120 [00:06<00:00, 19.02it/s, loss=24.7]
Epoch 9 of train: :   1%|          | 2/280 [00:00<00:15, 18.27it/s, loss=2.07]

Loss: 4.1734

Epoch 9/9
----------


Epoch 9 of train: : 100%|██████████| 280/280 [00:15<00:00, 18.57it/s, loss=1.1]  
Epoch 9 of val: : 100%|██████████| 120/120 [00:06<00:00, 18.97it/s, loss=24.1]


Loss: 4.1033

