In [1]:
%load_ext autoreload
%autoreload 2

In [44]:
import numpy as np
import matplotlib.pyplot as plt
import torch 
from torch import nn
import torch.nn.functional as F 
from torch.utils.data import Dataset, DataLoader, random_split, ConcatDataset

import fairseq 
import sequitur as seq
from sequitur.models import LSTM_AE


import os 
import sys
sys.path.append('/Users/ivan_zorin/Documents/DEV/code/ntl/')

from data.data import SGCCDataset
import models
from models import *

In [3]:
data_path = '/Users/ivan_zorin/Documents/DEV/data/sgcc/data.csv'

normal_data = SGCCDataset(data_path, label=0, nan_ratio=0.5)
anomal_data = SGCCDataset(data_path, label=1, nan_ratio=1.0)

print(len(normal_data), len(anomal_data))

28778 3612


In [4]:
# data preparation for sequitur
train_data = [torch.tensor(sample[1]) for sample in normal_data]
test_data = [torch.tensor(sample[1]) for sample in anomal_data]


In [55]:
train_data[0][1].shape

(1034, 1)

In [18]:
model_kwargs = {
    'input_dim': 1,
    'encoding_dim': 256,
    'h_dims': [512, 512],
    'h_activ': F.relu,
    'out_activ': F.relu
}

# encoding_dim = 256

model = LSTM_AE(**model_kwargs)
model


x = train_data[0]
out = model(x)

out

LSTM_AE(
  (encoder): Encoder(
    (layers): ModuleList(
      (0): LSTM(1, 512, batch_first=True)
      (1): LSTM(512, 512, batch_first=True)
      (2): LSTM(512, 256, batch_first=True)
    )
  )
  (decoder): Decoder(
    (layers): ModuleList(
      (0): LSTM(256, 512, batch_first=True)
      (1-2): 2 x LSTM(512, 512, batch_first=True)
    )
  )
)

In [32]:
model2_kwargs = {
    'input_dim': 1,
    'encoding_dim': 256,
    'h_dims': [512, 512],
    'h_activ': 'relu',
    'out_activ': 'relu'
}



In [33]:
model2 = SequiturLSTMAE(model2_kwargs)
model

LSTM_AE(
  (encoder): Encoder(
    (layers): ModuleList(
      (0): LSTM(1, 512, batch_first=True)
      (1): LSTM(512, 512, batch_first=True)
      (2): LSTM(512, 256, batch_first=True)
    )
  )
  (decoder): Decoder(
    (layers): ModuleList(
      (0): LSTM(256, 512, batch_first=True)
      (1-2): 2 x LSTM(512, 512, batch_first=True)
    )
  )
)

In [34]:
out = model2(x)

In [37]:
out[0].shape, out[1].shape

(torch.Size([256]), torch.Size([1034, 1]))

In [None]:
encoder, decoder, embs, losses = seq.quick_train(LSTM_AE, train_data, encoding_dim=encoding_dim, verbose=True, lr=1e-05, epochs=100, denoise=True, kwargs=model_kwargs)

In [42]:
experiment_config = '/Users/ivan_zorin/Documents/DEV/code/ntl/configs/trainer_debug.yaml'
path_config = '/Users/ivan_zorin/Documents/DEV/code/ntl/configs/local_pathes.yaml'

config = load_config(experiment_config, path_config)

device = config.device


In [41]:
normal_dataset = SGCCDataset(path=config.data_path, label=0, scale=config.scale, nan_ratio=config.nan_ratio)
anomal_dataset = SGCCDataset(path=config.data_path, label=1, scale=config.scale)

train_data, val_data, test_normal_data = random_split(normal_dataset, [len(normal_dataset) - 2*len(anomal_dataset), len(anomal_dataset), len(anomal_dataset)])
test_data = ConcatDataset([test_normal_data, anomal_dataset])

train_loader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True)
val_loader = DataLoader(test_data, batch_size=config.batch_size, shuffle=config.debug)

In [None]:
batch = next(iter(train_loader))
y, x, _ = batch
x = x.to(device)
x.shape

In [65]:
class SequiturLSTMAE(LSTM_AE):
    def __init__(self, input_dim, encoding_dim, h_dims=[], h_activ=nn.Sigmoid(), out_activ=nn.Tanh()):

        h_activ = getattr(nn, h_activ)()
        out_activ = getattr(nn, out_activ)()
        
        super().__init__(input_dim, encoding_dim, h_dims, h_activ, out_activ)
        
    def forward(self, x):
        seq_len = x.shape[1]
        print(x.shape)
        z = self.encoder(x)
        x = self.decoder(z, seq_len)
        
        return z, x



model = SequiturLSTMAE(**config.model_kwargs).to(device)
model

SequiturLSTMAE(
  (encoder): Encoder(
    (layers): ModuleList(
      (0): LSTM(1, 512, batch_first=True)
      (1): LSTM(512, 512, batch_first=True)
      (2): LSTM(512, 256, batch_first=True)
    )
    (h_activ): ReLU()
    (out_activ): ReLU()
  )
  (decoder): Decoder(
    (layers): ModuleList(
      (0): LSTM(256, 512, batch_first=True)
      (1-2): 2 x LSTM(512, 512, batch_first=True)
    )
    (h_activ): ReLU()
  )
)

In [67]:
model.encoder

Encoder(
  (layers): ModuleList(
    (0): LSTM(1, 512, batch_first=True)
    (1): LSTM(512, 512, batch_first=True)
    (2): LSTM(512, 256, batch_first=True)
  )
  (h_activ): ReLU()
  (out_activ): ReLU()
)

In [69]:
model.encoder(x)

tensor([[0.0000, 0.0000, 0.0345,  ..., 0.0198, 0.0200, 0.0106],
        [0.0000, 0.0000, 0.0345,  ..., 0.0191, 0.0192, 0.0110],
        [0.0000, 0.0000, 0.0344,  ..., 0.0197, 0.0198, 0.0106],
        ...,
        [0.0000, 0.0000, 0.0344,  ..., 0.0196, 0.0197, 0.0107],
        [0.0000, 0.0000, 0.0345,  ..., 0.0199, 0.0200, 0.0106],
        [0.0000, 0.0000, 0.0344,  ..., 0.0193, 0.0194, 0.0108]],
       grad_fn=<SqueezeBackward0>)

In [70]:
z, x_hat = model(x)

# FIXME problem with dimensions in decoder's output. probably x_hat needs to be reshaped like [batch, len, fetaures]. currently it's as [batch * len, features]

torch.Size([8, 1034, 1])


In [71]:
z.shape

torch.Size([8, 256])

In [72]:
x_hat.shape

torch.Size([8272, 1])

In [76]:
model.decoder.dense_matrix.shape

torch.Size([512, 1])