In [3]:
%load_ext tensorboard
%load_ext autoreload
%autoreload 2

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [4]:
import torch
from torch import nn
import torch.nn.functional as F
from torch import utils
from torch.utils.data import DataLoader

import numpy as np
from tqdm.auto import tqdm

import sys
# sys.path.append('/content/ntl/')
sys.path.append('/Users/ivan_zorin/Documents/DEV/code/ntl/')

from data.data import sgcc_train_test_split, SGCCDataset
from models import LSTMAE_old

In [10]:
data_path = '/Users/ivan_zorin/Documents/DEV/data/sgcc/data.csv'
scale = 'minmax'
nan_ratio = 0.7
batch_size = 32

input_size = 1
hidden_size = [64]
lr = 0.0001
factor = 0.5
patience = 3

N_epochs = 20

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [11]:
normal_dataset = SGCCDataset(path=data_path, label=0, scale=scale, nan_ratio=nan_ratio)
anomal_dataset = SGCCDataset(path=data_path, label=1, scale=scale)

train_data, val_data, test_normal_data = utils.data.random_split(normal_dataset, [len(normal_dataset) - 2*len(anomal_dataset), len(anomal_dataset), len(anomal_dataset)])
test_data = utils.data.ConcatDataset([test_normal_data, anomal_dataset])

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [12]:
model = LSTMAE_old(input_size, hidden_size)
optim = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optim, mode='min', factor=factor, patience=patience)
loss_fn = nn.L1Loss()
logger = torch.utils.tensorboard.SummaryWriter('~/Documents/DEV/runs/debug/')

In [None]:
train_len = len(train_loader)
val_len = len(val_loader)

for epoch in range(N_epochs):
    train_losses, val_losses = [], []
    train_embeddings, val_embeddings = [], []

    t = tqdm(train_loader)
    model.train()
    for i, batch in enumerate(t):
        y, x, _ = batch
        x = x.to(device)
        z, x_hat = model(x)
        loss = loss_fn(x, x_hat)
        
        optim.zero_grad()
        loss.backward()
        optim.step()
        
        train_losses.append(loss.item())
        train_embeddings.append(z.detach().cpu().numpy().squeeze())
        step = i + train_len * epoch
        logger.add_scalar('train/loss', loss.item(), step)
    
    train_embeddings = np.concatenate(train_embeddings)
    train_loss = torch.cat(train_losses).mean()
    logger.add_embedding('train/embs', train_embeddings, epoch)
    
    t = tqdm(val_loader)
    model.eval()
    for i, batch in enumerate(t):
        with torch.no_grad():
            y, x, _ = batch
            x = x.to(device)
            z, x_hat = model(x)
            loss = loss_fn(x, x_hat)
            
            val_losses.append(loss.item())
            val_embeddings.append(z.detach().cpu().numpy().squeeze())
            step = i + train_len * epoch
            logger.add_scalar('val/loss', loss.item(), step)
            
    val_embeddings = np.concatenate(val_embeddings)
    val_loss = torch.cat(val_losses).mean()
    logger.add_embedding('val/embs', val_embeddings, epoch)
    
    scheduler.step(val_loss)
    logger.add_scalars('loss', {'train': train_loss, 'val': val_loss}, epoch)
    
    