In [1]:
%cd ..

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from ipywidgets import interact

import cufflinks as cf
cf.go_offline(connected=True)

import bokeh.io
bokeh.io.output_notebook()

np.random.seed(42)

D:\anomaly-detection


# Загрузка данных

In [2]:
from sklearn.model_selection import train_test_split
from src.features.build_features import rolling_window

prediction_len = 1
window_len = 32
batch_size = 32

data = pd.read_csv('data/processed/tep_data.csv', index_col='Index')
print(f'Len of dataset: {data.shape[0]}')

Len of dataset: 12801


## Decomposition

In [3]:
from statsmodels.graphics.tsaplots import plot_pacf, plot_acf
from statsmodels.tsa.stattools import acf, pacf

@interact(component=(0, 40))
def myacf(component):
    plot_acf(data.values[:, component], lags=np.arange(0, 2000))

interactive(children=(IntSlider(value=20, description='component', max=40), Output()), _dom_classes=('widget-i…

In [7]:
import statsmodels.tsa.seasonal as seasonal
period = 750
decomposed = seasonal.seasonal_decompose(data.values,
                                         period=period,
                                         extrapolate_trend='freq')


@interact(comp=(0, 40))
def f(comp):
    plt.figure(figsize=(20, 10))
    plt.subplot(1, 2, 1)
    plt.title('Trend')
    plt.plot(decomposed.trend[:, comp])
    plt.subplot(1, 2, 2)
    plt.title('Seasonal')
    plt.plot(decomposed.seasonal[:, comp])

interactive(children=(IntSlider(value=20, description='comp', max=40), Output()), _dom_classes=('widget-intera…

## Train/test split

In [9]:
X_trend = rolling_window(decomposed.trend, window_len)[:-prediction_len]
y_trend = rolling_window(decomposed.trend, prediction_len, window_len)

In [10]:
X_tr, X_te, y_tr, y_te = train_test_split(X_trend, y_trend, train_size=0.7, shuffle=False)

# Обучение

In [45]:
from src.models.torch.models import LSTM, Trainer
from src.models.torch.utils import to_dataloader, get_prev_states
import torch

train_set = to_dataloader(X_tr, y_tr, dict(batch_size=batch_size))
test_set = to_dataloader(X_te, y_te, dict(batch_size=batch_size))

In [71]:
del get_prev_states

In [37]:
from datetime import datetime
def get_log_path(name):
    return name + '_' + datetime.now().strftime('%Y-%m-%d-%H-%M')

In [41]:
config = dict(
    input_size=X_tr[0].shape[1],
    hidden_size=16,
    num_layers=1,
    batch_first=True,
    bidirectional=True,
)

device = torch.device('cpu')
model = LSTM(**config).to(device)
criterion = torch.nn.MSELoss()
optim = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optim,
                                                       patience=3,
                                                       threshold=0.01)

trainer = Trainer(
    model,
    criterion,
    optim,
    scheduler,
    device,
    get_log_path(
        f'logs/trend-{config["num_layers"]}-layers-{config["hidden_size"]}-hidden-{window_len}-len'
    ),
    stateful=True)

In [39]:
# # for test purpouses
# sz = 200
# xx = torch.rand(sz, window_len, data.shape[1])
# yy = torch.rand(sz, data.shape[1])
# xxdatayy = to_dataloader(xx, yy, dict(batch_size=batch_size))
# trainer.train(xxdatayy, xxdatayy, 5)

In [42]:
trainer.train(train_set, test_set, 10)

Epoch 0 of train: :   1%|▊                                                                              | 3/280 [00:00<00:13, 20.27it/s, loss=tensor(0.5058, grad_fn=<MseLossBackward>)]

Epoch 0/9
----------


Epoch 0 of train: : 100%|█████████████████████████████████████████████████████████████████████████████| 280/280 [00:13<00:00, 21.27it/s, loss=tensor(0.0210, grad_fn=<MseLossBackward>)]
Epoch 0 of val: : 100%|███████████████████████████████████████████████████████████████████████████████| 120/120 [00:05<00:00, 21.47it/s, loss=tensor(0.3273, grad_fn=<MseLossBackward>)]
Epoch 1 of train: :   1%|▌                                                                              | 2/280 [00:00<00:15, 18.35it/s, loss=tensor(0.1806, grad_fn=<MseLossBackward>)]

Loss: 0.0625

Epoch 1/9
----------


Epoch 1 of train: : 100%|█████████████████████████████████████████████████████████████████████████████| 280/280 [00:12<00:00, 21.91it/s, loss=tensor(0.0160, grad_fn=<MseLossBackward>)]
Epoch 1 of val: : 100%|███████████████████████████████████████████████████████████████████████████████| 120/120 [00:05<00:00, 20.09it/s, loss=tensor(0.2679, grad_fn=<MseLossBackward>)]
Epoch 2 of train: :   1%|▊                                                                              | 3/280 [00:00<00:11, 25.00it/s, loss=tensor(0.0905, grad_fn=<MseLossBackward>)]

Loss: 0.0471

Epoch 2/9
----------


Epoch 2 of train: : 100%|█████████████████████████████████████████████████████████████████████████████| 280/280 [00:12<00:00, 22.10it/s, loss=tensor(0.0134, grad_fn=<MseLossBackward>)]
Epoch 2 of val: : 100%|███████████████████████████████████████████████████████████████████████████████| 120/120 [00:05<00:00, 21.94it/s, loss=tensor(0.2384, grad_fn=<MseLossBackward>)]
Epoch 3 of train: :   1%|▊                                                                              | 3/280 [00:00<00:11, 25.00it/s, loss=tensor(0.0498, grad_fn=<MseLossBackward>)]

Loss: 0.0400

Epoch 3/9
----------


Epoch 3 of train: : 100%|█████████████████████████████████████████████████████████████████████████████| 280/280 [00:12<00:00, 22.02it/s, loss=tensor(0.0121, grad_fn=<MseLossBackward>)]
Epoch 3 of val: : 100%|███████████████████████████████████████████████████████████████████████████████| 120/120 [00:05<00:00, 21.99it/s, loss=tensor(0.2505, grad_fn=<MseLossBackward>)]
Epoch 4 of train: :   1%|▊                                                                              | 3/280 [00:00<00:11, 24.00it/s, loss=tensor(0.0299, grad_fn=<MseLossBackward>)]

Loss: 0.0390

Epoch 4/9
----------


Epoch 4 of train: : 100%|█████████████████████████████████████████████████████████████████████████████| 280/280 [00:12<00:00, 22.45it/s, loss=tensor(0.0112, grad_fn=<MseLossBackward>)]
Epoch 4 of val: : 100%|███████████████████████████████████████████████████████████████████████████████| 120/120 [00:05<00:00, 22.31it/s, loss=tensor(0.2367, grad_fn=<MseLossBackward>)]
Epoch 5 of train: :   1%|▊                                                                              | 3/280 [00:00<00:11, 23.62it/s, loss=tensor(0.0228, grad_fn=<MseLossBackward>)]

Loss: 0.0359

Epoch 5/9
----------


Epoch 5 of train: : 100%|█████████████████████████████████████████████████████████████████████████████| 280/280 [00:12<00:00, 22.21it/s, loss=tensor(0.0109, grad_fn=<MseLossBackward>)]
Epoch 5 of val: : 100%|███████████████████████████████████████████████████████████████████████████████| 120/120 [00:05<00:00, 22.09it/s, loss=tensor(0.2266, grad_fn=<MseLossBackward>)]
Epoch 6 of train: :   1%|▊                                                                              | 3/280 [00:00<00:11, 24.39it/s, loss=tensor(0.0177, grad_fn=<MseLossBackward>)]

Loss: 0.0338

Epoch 6/9
----------


Epoch 6 of train: : 100%|█████████████████████████████████████████████████████████████████████████████| 280/280 [00:12<00:00, 22.31it/s, loss=tensor(0.0106, grad_fn=<MseLossBackward>)]
Epoch 6 of val: : 100%|███████████████████████████████████████████████████████████████████████████████| 120/120 [00:05<00:00, 22.29it/s, loss=tensor(0.1958, grad_fn=<MseLossBackward>)]
Epoch 7 of train: :   1%|▊                                                                              | 3/280 [00:00<00:11, 23.81it/s, loss=tensor(0.0142, grad_fn=<MseLossBackward>)]

Loss: 0.0315

Epoch 7/9
----------


Epoch 7 of train: : 100%|█████████████████████████████████████████████████████████████████████████████| 280/280 [00:12<00:00, 21.59it/s, loss=tensor(0.0103, grad_fn=<MseLossBackward>)]
Epoch 7 of val: : 100%|███████████████████████████████████████████████████████████████████████████████| 120/120 [00:05<00:00, 20.12it/s, loss=tensor(0.2020, grad_fn=<MseLossBackward>)]
Epoch 8 of train: :   1%|▊                                                                              | 3/280 [00:00<00:13, 21.28it/s, loss=tensor(0.0143, grad_fn=<MseLossBackward>)]

Loss: 0.0319

Epoch 8/9
----------


Epoch 8 of train: : 100%|█████████████████████████████████████████████████████████████████████████████| 280/280 [00:12<00:00, 22.63it/s, loss=tensor(0.0101, grad_fn=<MseLossBackward>)]
Epoch 8 of val: : 100%|███████████████████████████████████████████████████████████████████████████████| 120/120 [00:05<00:00, 22.35it/s, loss=tensor(0.1663, grad_fn=<MseLossBackward>)]
Epoch 9 of train: :   1%|▊                                                                              | 3/280 [00:00<00:11, 24.19it/s, loss=tensor(0.0125, grad_fn=<MseLossBackward>)]

Loss: 0.0303

Epoch 9/9
----------


Epoch 9 of train: : 100%|█████████████████████████████████████████████████████████████████████████████| 280/280 [00:12<00:00, 22.12it/s, loss=tensor(0.0097, grad_fn=<MseLossBackward>)]
Epoch 9 of val: : 100%|███████████████████████████████████████████████████████████████████████████████| 120/120 [00:05<00:00, 22.23it/s, loss=tensor(0.1706, grad_fn=<MseLossBackward>)]


Loss: 0.0314



In [15]:
import matplotlib.pyplot as plt

In [15]:
# from __future__ import print_function
# from ipywidgets import interact, interactive, fixed, interact_manual
# import ipywidgets as widgets


# @interact(component=(0, data.shape[1] - 1), prediction_len=(1, 32))
# def plot(component, prediction_len):
#     model.eval()
#     plt.figure(figsize=(12, 12))
#     amount = 4  # need to be a square
#     idxs = np.random.randint(len(X_te), size=amount)
#     sample = torch.tensor(np.array(np.take(X_te, idxs, axis=0))).float()

#     for i in range(prediction_len):
#         predicted = model(sample[:, -window_len:])
#         sample = torch.cat((sample, predicted.view(amount, 1, -1)), dim=1)

#     for i in range(amount):
#         plt.subplot(amount // 2, amount // 2, i + 1)
#         plt.plot(list(range(window_len)),
#                  sample[i, :window_len, component].detach().numpy(),
#                  c='r')
#         plt.plot(list(range(window_len, window_len + prediction_len)),
#                  sample[i, window_len:, component].detach().numpy(),
#                  c='b')

In [85]:
from tqdm import tqdm

def next_value_prediction(model, data, window_len):
    pred = torch.zeros((1, data.shape[1]))
    for i in tqdm(range(window_len, data.shape[0]-1)):
        inp = torch.tensor(data[i - window_len:i]).float()
        inp = inp.view(1, *inp.size())
        states = get_prev_states(model, 1)
        pred = torch.cat((pred, model(inp, states)), dim=0)
    return pred

def next_test_value_prediction(model, prior, window_len):
    inp = prior.view(1, window_len, -1)
    for i in tqdm(range(len(X_te))):
        states = get_prev_states(model, 1)
        out = model(inp[:, -window_len:], states)
        inp = torch.cat((inp, out.view(1, 1, -1)), axis=1)
    return inp

In [104]:
model.reset_states()
train_pred = next_value_prediction(model, decomposed.trend[:len(X_tr)+window_len], window_len)
test_pred = next_value_prediction(model, decomposed.trend[len(X_tr)+window_len:], window_len)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8937/8937 [03:19<00:00, 44.80it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3800/3800 [02:02<00:00, 31.00it/s]


In [107]:
train_pred = train_pred.detach().numpy()
test_pred = test_pred.detach().numpy()

In [5]:
# np.save('tr-pred.npy', train_pred)
# np.save('te-pred.npy', test_pred)

train_pred = np.load('tr-pred.npy', )
test_pred = np.load('te-pred.npy',)

In [11]:
from ipywidgets import interact

@interact(component=(0, 40))
def plot(component):
    width = 3000
    plt.figure(figsize=(18, 8))
    plt.suptitle('Next value prediction of trend')
    
    plt.subplot(1, 2, 1)
    plt.title('Train')
    plt.plot(decomposed.trend[:width + window_len, component],
             label='real',
             alpha=0.7)
    plt.plot(list(range(window_len, window_len + width)),
             train_pred[:width, component],
             label='pred',
             alpha=0.8)

    plt.subplot(1, 2, 2)
    plt.title('Test')
    start = len(X_tr) + window_len
    plt.plot(decomposed.trend[start:start + width, component],
             label='real',
             alpha=0.7)
    plt.plot(list(range(window_len, window_len + width)),
             test_pred[:width, component],
             label='pred',
             alpha=0.8)
    plt.legend()

interactive(children=(IntSlider(value=20, description='component', max=40), Output()), _dom_classes=('widget-i…

In [88]:
# inp = torch.cat(
#     (torch.tensor(decomposed.trend[len(X_tr) + 1 - window_len:len(X_tr)]).float(), torch.tensor(train_pred[-1, None, :])),
#     axis=0).view(1, window_len, data.shape[1])
# for i in tqdm(range(len(X_te))):
#     states = get_prev_states(model, 1)
#     out = model(inp[:, -window_len:], states)
#     inp = torch.cat((inp, out.view(1, 1, -1)), axis=1)
    
# pred = inp.squeeze().detach().numpy()[window_len:]

# @interact(component=(0, 40))
# def f(component):
#     sz = 100
#     plt.title(f'Next {sz} values prediction of trend')
#     plt.plot(pred[:sz, component], label='pred')
#     plt.plot(decomposed.trend[len(X_tr):len(X_tr)+sz, component], label='ground truth')
#     plt.legend()