In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np 
from numpy import ndarray
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as sk 
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler, robust_scale
from sktime.transformations.series.impute import Imputer

from functools import partial
from types import SimpleNamespace


import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

# LOCAL
# PROJECT_PATH = '/Users/ivan_zorin/Documents/DEV/code/ntl/'
# DATA_PATH = '/Users/ivan_zorin/Documents/DEV/data/sgcc/data.csv'
# LOG_DIR = '/Users/ivan_zorin/Documents/DEV/runs/debug/trainer'

# ZHORES
PROJECT_PATH = '/trinity/home/ivan.zorin/dev/code/ntl/'
DATA_PATH = '/trinity/home/ivan.zorin/dev/data/sgcc/data.csv'
LOG_DIR = '/trinity/home/ivan.zorin/dev/logs/debug/'


import sys
sys.path.append(PROJECT_PATH)
from ntl.data import SGCCDataset, data_train_test_split
from ntl.data import FillNA, Scale, Reshape, ToTensor, Cutout
from ntl.models import AE2dCNN
from ntl.trainer import ArgsTrainer
from ntl.utils import fix_seed

In [30]:
fix_seed(42)

transforms = [FillNA('drift'), 
            Cutout(256), 
            Scale('robust'), 
            Reshape((16, 16)),
            lambda x: x[None],
            ToTensor()
]
normal_data = SGCCDataset(DATA_PATH, label=0, nan_ratio=0.75, transforms=transforms, year=2016)
anomal_data = SGCCDataset(DATA_PATH, label=1, nan_ratio=1.0, transforms=transforms, year=2016)

train, test = data_train_test_split(normal_data, anomal_data)

train_loader = DataLoader(train, batch_size=1, drop_last=True, shuffle=True)
test_loader = DataLoader(test, batch_size=1, shuffle=False)


Random seed set as 42


In [31]:

model = AE2dCNN()
loss = nn.MSELoss(reduction='none')
optim = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optim, factor=0.5, patience=2)
logger = SummaryWriter(log_dir=LOG_DIR) 

config = SimpleNamespace(**{
    'debug': True,
    'n_debug_batches': np.nan,
    'log_step': 5,
    'n_epochs': 100
})

trainer = ArgsTrainer(
    train_loader=train_loader,
    val_loader=test_loader,
    model=model,
    loss=loss,
    optim=optim,
    scheduler=scheduler,
    config=config,
    logger=logger
)


In [29]:

trainer.train()

  0%|          | 0/8780 [00:00<?, ?it/s]



KeyboardInterrupt: 

In [17]:
emb = [np.random.rand(4, 1024), np.random.rand(1024)]
emb[0].shape, emb[1].shape

((4, 1024), (1024,))

In [26]:
emb[1][...].shape

(1024,)

In [21]:
emb2 = np.concatenate(emb)

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)

* normalize data to 0,1 scale | check
* fill nans | check
* cut-out some piece | check
* reshape into matrix (7x52, square, other?) | check
* use this matrix as input into AE | check


Two ways of transformation 
The first one 
1. cut-out some piece
2. fill na
3. scale 
4. reshape

The second one 
1. fill na
2. scale
3. cut-out some piece
5. reshape
