In [3]:
%load_ext tensorboard
%load_ext autoreload
%autoreload 2

%pip -q install lightning
%pip -q install wandb
%pip -q install fire

# clonning git repo
token = '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'
!git clone https://$token@github.com/izorin/ntl.git


# mounting google drive
from google.colab import drive
drive.mount('/content/drive')
import torch
from torch import nn
import torch.nn.functional as F
from torch import utils
from torch.utils.data import DataLoader

import numpy as np
from tqdm.auto import tqdm, trange

import sys
# sys.path.append('/Users/ivan_zorin/Documents/DEV/code/ntl/')
sys.path.append('/content/ntl/')


from data.data import sgcc_train_test_split, SGCCDataset
from models import LSTMAE_old

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [10]:
# configs
# data_path = '/Users/ivan_zorin/Documents/DEV/data/sgcc/data.csv'
data_path = '/content/drive/MyDrive/4. MODELS/NTL/data/data.csv'
run_path = '/content/runs/'
scale = 'minmax'
nan_ratio = 0.7
batch_size = 32

input_size = 1
hidden_size = [64]
lr = 0.0001
factor = 0.5
patience = 3

N_epochs = 20

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# data
normal_dataset = SGCCDataset(path=data_path, label=0, scale=scale, nan_ratio=nan_ratio)
anomal_dataset = SGCCDataset(path=data_path, label=1, scale=scale)

train_data, val_data, test_normal_data = utils.data.random_split(normal_dataset, [len(normal_dataset) - 2*len(anomal_dataset), len(anomal_dataset), len(anomal_dataset)])
test_data = utils.data.ConcatDataset([test_normal_data, anomal_dataset])

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

# model and train utils
model = LSTMAE_old(input_size, hidden_size).to(device)
optim = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optim, mode='min', factor=factor, patience=patience, verbose=True)
loss_fn = nn.L1Loss()
logger = torch.utils.tensorboard.SummaryWriter(run_path)

In [None]:
%tensorboard --logdir /content/runs

In [None]:
# training
train_len = len(train_loader)
val_len = len(val_loader)
train_iterator = tqdm(train_loader, leave=False, desc='Train')
val_iterator = tqdm(val_loader, leave=False, desc='Val')

for epoch in trange(N_epochs, total=N_epochs):
    train_losses, val_losses = [], []
    train_embeddings, val_embeddings = [], []

    model.train()
    for i, batch in enumerate(train_iterator):
        optim.zero_grad()
        y, x, _ = batch
        x = x.to(device)
        z, x_hat = model(x)
        loss = loss_fn(x, x_hat)
        
        loss.backward()
        optim.step()
        
        train_losses.append(loss.item())
        train_embeddings.append(z.detach().cpu().numpy().squeeze())
        step = i + train_len * epoch
        logger.add_scalar('train/loss', loss.item(), step)
    
    train_embeddings = np.concatenate(train_embeddings)
    train_loss = sum(train_losses) / len(train_losses)
    logger.add_embedding(tag='train/embs', mat=train_embeddings, global_step=epoch)
    
    model.eval()
    for i, batch in enumerate(val_iterator):
        with torch.no_grad():
            y, x, _ = batch
            x = x.to(device)
            z, x_hat = model(x)
            loss = loss_fn(x, x_hat)
            
            val_losses.append(loss.item())
            val_embeddings.append(z.detach().cpu().numpy().squeeze())
            step = i + train_len * epoch
            logger.add_scalar('val/loss', loss.item(), step)
            
    val_embeddings = np.concatenate(val_embeddings)
    val_loss = sum(val_losses) / len(val_losses)
    logger.add_embedding(tag='val/embs', mat=val_embeddings, global_step=epoch)
    
    scheduler.step(val_loss)
    logger.add_scalars('loss', {'train': train_loss, 'val': val_loss}, epoch)
    
    