In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import pyarrow.parquet as pq
import sys
import torch
import random
import torch.nn as nn
import torch.nn.functional as F
import math
from IPython.display import clear_output
import logging
from pathlib import Path
from functools import partial

import matplotlib.pyplot as plt
sys.path.append('../../../')

from configs.data_configs.rosbank import data_configs
from configs.model_configs.gen.rosbank import model_configs
from src.models.mTAND.model import MegaNet, MegaNetCE, MegaNetSupervised
from src.data_load.dataloader import create_data_loaders, create_test_loader
from src.trainers.trainer_mTAND import MtandTrainer

from src.create_embeddings import create_embeddings

from src.data_load import split_strategy
from src.data_load.data_utils import prepare_data
from src.data_load.splitting_dataset import (
    ConvertingTrxDataset,
    TargetDataset,
    DropoutTrxDataset,
    SplittingDataset,
    TargetEnumeratorDataset,
)
from src.data_load.dataloader import collate_splitted_rows, padded_collate, PaddedBatch
from torch.utils.data import DataLoader

from sklearn.linear_model import LogisticRegression

from src.models.preprocessors import FeatureProcessor

from torch.autograd import Variable

from src.models.gen_models import SeqGen

from sklearn.metrics import roc_auc_score

  from tqdm.autonotebook import tqdm


In [None]:
def random_generator(batch_size, z_dim, T_mb, max_seq_len):
  """Random vector generation.
  
  Args:
    - batch_size: size of the random vector
    - z_dim: dimension of random vector
    - T_mb: time information for the random vector
    - max_seq_len: maximum sequence length
    
  Returns:
    - Z_mb: generated random vector
  """
  Z_mb = list()
  for i in range(batch_size):
    temp = np.zeros([max_seq_len, z_dim])
    temp_Z = np.random.uniform(0., 1, [T_mb[i], z_dim])
    temp[:T_mb[i],:] = temp_Z
    Z_mb.append(temp)
  return torch.tensor(np.stack(Z_mb)).float()

def _weights_init(m):
    classname = m.__class__.__name__
    if isinstance(m, nn.Linear):
        init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0)
    elif classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('Norm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)
    elif classname.find("GRU") != -1:
      for name,param in m.named_parameters():
        if 'weight_ih' in name:
          init.xavier_uniform_(param.data)
        elif 'weight_hh' in name:
          init.orthogonal_(param.data)
        elif 'bias' in name:
          param.data.fill_(0)

class Encoder(nn.Module):
    """Embedding network between original feature space to latent space.

        Args:
          - input: input time-series features. (L, N, X) = (24, ?, 6)
          - h3: (num_layers, N, H). [3, ?, 24]

        Returns:
          - H: embeddings
    """
    def __init__(self, input_size, hidden_rnn, num_layers):
        super(Encoder, self).__init__()
        self.gru = GRU(input_size=input_size, hidden_size=hidden_rnn, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_rnn, hidden_rnn)
        self.sigmoid = nn.Sigmoid()
        # в оригинале стремная инициализация весов
        self.apply(_weights_init)

    def forward(self, x, sigmoid=True):
        e_outputs, _ = self.rnn(input)
        H = self.fc(e_outputs)
        if sigmoid:
            H = self.sigmoid(H)
        return H

class Recovery(nn.Module):
    """Recovery network from latent space to original space.

    Args:
      - H: latent representation
      - T: input time information

    Returns:
      - X_tilde: recovered data
    """
    def __init__(self, input_size, hidden_rnn, num_layers):
        super(Recovery, self).__init__()
        self.rnn = nn.GRU(input_size=hidden_rnn, hidden_size=input_size, num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(input_size, input_size)
        self.sigmoid = nn.Sigmoid()
        self.apply(_weights_init)

    def forward(self, input, sigmoid=True):
        r_outputs, _ = self.rnn(input)
        X_tilde = self.fc(r_outputs)
        if sigmoid:
            X_tilde = self.sigmoid(X_tilde)
        return X_tilde

class Generator(nn.Module):
    """Generator function: Generate time-series data in latent space.

    Args:
      - Z: random variables
      - T: input time information

    Returns:
      - E: generated embedding
    """
    def __init__(self, input_size, hidden_rnn, num_layers):
        super(Generator, self).__init__()
        self.rnn = nn.GRU(input_size=hidden_rnn, hidden_size=input_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(input_size, input_size)
        self.sigmoid = nn.Sigmoid()
        self.apply(_weights_init)

    def forward(self, input, sigmoid=True):
        g_outputs, _ = self.rnn(input)
        E = self.fc(g_outputs)
        if sigmoid:
            E = self.sigmoid(E)
        return E


class Supervisor(nn.Module):
    """Generate next sequence using the previous sequence.

    Args:
      - H: latent representation
      - T: input time information

    Returns:
      - S: generated sequence based on the latent representations generated by the generator
    """
    def __init__(self, hidden_rnn, num_layers):
        super(Supervisor, self).__init__()
        self.rnn = nn.GRU(input_size=hidden_rnn, hidden_size=hidden_rnn, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(input_size, input_size)
        self.sigmoid = nn.Sigmoid()
        self.apply(_weights_init)

    def forward(self, input, sigmoid=True):
        s_outputs, _ = self.rnn(input)
        S = self.fc(s_outputs)
        if sigmoid:
            S = self.sigmoid(S)
        return S


class Discriminator(nn.Module):
    """Discriminate the original and synthetic time-series data.

    Args:
      - H: latent representation
      - T: input time information

    Returns:
      - Y_hat: classification results between original and synthetic time-series
    """
    def __init__(self, hidden_rnn, num_layers):
        super(Discriminator, self).__init__()
        self.rnn = nn.GRU(input_size=hidden_rnn, hidden_size=hidden_rnn, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(input_size, 1)
        self.apply(_weights_init)

    def forward(self, input):
        d_outputs, _ = self.rnn(input)
        Y_hat = self.fc(d_outputs)
        return Y_hat


class TG(nn.Module):

    def __init__(self):
        super().__init__(input_suze, hidden_rnn, num_layers)
        self.encoder = Encoder(input_size=input_size, hidden_rnn=hidden_rnn, num_layers=num_layers)
        self.decoder = Recovery(input_size=input_size, hidden_rnn=hidden_rnn, num_layers=num_layers)
        self.supervisor = Supervisor(hidden_rnn=hidden_rnn, num_layers=num_layers)
        self.generator = Generator(input_size=input_size, hidden_rnn=hidden_rnn, num_layers=num_layers)
        self.discriminator = Discriminator(hidden_rnn=hidden_rnn, num_layers=num_layers)

    def train_embedder(self, x):
        latens = self.encoder(x)
        decoded = self.decoder(latens)

        mse = F.mse_loss(latens, x, reduction='none').sum(dim=[1,2]).mean()
        
        return mse

    def train_generator(self, x):
        bs, l, d = x.size()
        Z = random_generator(bs, d, [l]*bs, l)
        gen_latens = self.supervisor(self.generator(Z))
        latens = self.encoder(x)

        mse = F.mse_loss(latens, x, reduction='none').sum(dim=[1,2]).mean()
        return mse

    def train_joint(self, x):

    def train_discriminator(self, x):

        
    def generate(self, bs, d, lens, max_len):
        Z = random_generator(bs, d, lens, l)
        gen_latens, hn = self.generator(Z)
        return gen_latens
    
    def discriminate(self, gen_latens):
        d_scores = self.d(gen_latens)
        return d_scores

    def discriminator_loss(self, gen_latens):
        scores = self.discriminate(gen_latens)
        return scores.sum(dim=[1]).mean()
    
    def generation_loss(self, x, gen_latens):
        return F.mse_loss(x, recon_x, reduction='none').sum(dim=[1,2]).mean()

In [65]:
x = torch.rand(3, 5, 7) 
net = TG()
    

In [66]:
d_scores, gen_latens, decoded, x1 = net(x)

(torch.Size([3, 5, 7]), torch.Size([3, 5, 7]))

In [72]:
F.mse_loss(x1, decoded, reduction='none').sum(dim=[1,2]).mean()

tensor(14.1805, grad_fn=<MeanBackward0>)