# Imports

In [1]:
import numpy as np
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import plotly.express as px
import plotly.graph_objects as go

from utils import real_data_loading
from tqdm import tqdm

# from data_loading import sine_data_generation
# from utils import random_generator
# from data_loading import MinMaxScaler

from torch.utils.data import DataLoader


# from utils import extract_time


In [2]:
def train_test_divide (data_x, data_x_hat, data_t, data_t_hat, train_rate = 0.8):
  """Divide train and test data for both original and synthetic data.
  
  Args:
    - data_x: original data
    - data_x_hat: generated data
    - data_t: original time
    - data_t_hat: generated time
    - train_rate: ratio of training data from the original data
  """
  # Divide train/test index (original data)
  no = len(data_x)
  idx = np.random.permutation(no)
  train_idx = idx[:int(no*train_rate)]
  test_idx = idx[int(no*train_rate):]
    
  train_x = [data_x[i] for i in train_idx]
  test_x = [data_x[i] for i in test_idx]
  train_t = [data_t[i] for i in train_idx]
  test_t = [data_t[i] for i in test_idx]      
    
  # Divide train/test index (synthetic data)
  no = len(data_x_hat)
  idx = np.random.permutation(no)
  train_idx = idx[:int(no*train_rate)]
  test_idx = idx[int(no*train_rate):]
  
  train_x_hat = [data_x_hat[i] for i in train_idx]
  test_x_hat = [data_x_hat[i] for i in test_idx]
  train_t_hat = [data_t_hat[i] for i in train_idx]
  test_t_hat = [data_t_hat[i] for i in test_idx]
  
  return train_x, train_x_hat, test_x, test_x_hat, train_t, train_t_hat, test_t, test_t_hat


def extract_time (data):
  """Returns Maximum sequence length and each sequence length.
  
  Args:
    - data: original data
    
  Returns:
    - time: extracted time information
    - max_seq_len: maximum sequence length
  """
  time = list()
  max_seq_len = 0
  for i in range(len(data)):
    max_seq_len = max(max_seq_len, len(data[i][:,0]))
    time.append(len(data[i][:,0]))
    
  return time, max_seq_len

def random_generator (batch_size, z_dim, T_mb, max_seq_len):
  """Random vector generation.
  
  Args:
    - batch_size: size of the random vector
    - z_dim: dimension of random vector
    - T_mb: time information for the random vector
    - max_seq_len: maximum sequence length
    
  Returns:
    - Z_mb: generated random vector
  """
  Z_mb = list()
  for i in range(batch_size):
    temp = np.zeros([max_seq_len, z_dim])
    temp_Z = np.random.uniform(0., 1, [T_mb[i], z_dim])
    temp[:T_mb[i],:] = temp_Z
    Z_mb.append(temp_Z)
  return Z_mb


def batch_generator(data, time, batch_size):
  """Mini-batch generator.
  
  Args:
    - data: time-series data
    - time: time information
    - batch_size: the number of samples in each batch
    
  Returns:
    - X_mb: time-series data in each batch
    - T_mb: time information in each batch
  """
  no = len(data)
  idx = np.random.permutation(no)
  train_idx = idx[:batch_size]     
            
  X_mb = list(data[i] for i in train_idx)
  T_mb = list(time[i] for i in train_idx)
  
  return X_mb, T_mb

# Define Class for Module Construction

In [3]:
class Time_GAN_module(nn.Module):
    """
    Class from which a module of the Time GAN Architecture can be constructed, 
    consisting of a n_layer stacked RNN layers and a fully connected layer
    
    input_size = dim of data (depending if module operates on latent or non-latent space)
    """
    def __init__(self, input_size, output_size, hidden_dim, n_layers, activation=torch.sigmoid):
        super(Time_GAN_module, self).__init__()

        # Parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.sigma = activation

        #Defining the layers
        # RNN Layer
        self.rnn = nn.GRU(input_size, hidden_dim, n_layers, batch_first=True)   
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_size)
        
    def forward(self, x):
    
            batch_size = x.size(0)

            # Initializing hidden state for first input using method defined below
            hidden = self.init_hidden(batch_size)

            # Passing in the input and hidden state into the model and obtaining outputs
            out, hidden = self.rnn(x, hidden)
        
            # Reshaping the outputs such that it can be fit into the fully connected layer
            out = out.contiguous().view(-1, self.hidden_dim)
            out = self.fc(out)
            
            if self.sigma == nn.Identity:
                idendity = nn.Identity()
                return idendity(out)
                
            out = self.sigma(out)
            
            # HIDDEN STATES WERDEN IN DER PAPER IMPLEMENTIERUNG AUCH COMPUTED, ALLERDINGS NICHT BENUTZT?
            
            return out, hidden
    
    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        # We'll send the tensor holding the hidden state to the device we specified earlier as well
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
        return hidden

In [4]:
file = 'stock_data.csv'
seq_len = 24
data = real_data_loading(file, seq_len)
data = torch.Tensor(np.array(data))

In [5]:
file = 'GOOG.csv'
seq_len = 24
goog = real_data_loading(file, seq_len)
goog = torch.Tensor(np.array(goog))

In [6]:
input_size = 6 # sequence length = number of features
output_size = 24
hidden_dim = 24
n_layers = 3
gamma = 1

no, seq_len, dim = len(data), 24, 6

batch_size = 128
epoch = 50

In [6]:
net = nn.GRU(input_size, hidden_dim, n_layers, batch_first=True)
fc = nn.Linear(hidden_dim, output_size)
input = data[:batch_size,:,:]
h0 = torch.randn(n_layers, batch_size, hidden_dim)
output, hn = net(input, h0)

In [48]:
print("Model's state_dict")
for param_tensor in net.state_dict():
    print(param_tensor, '\t', net.state_dict()[param_tensor].size())

Model's state_dict
weight_ih_l0 	 torch.Size([72, 6])
weight_hh_l0 	 torch.Size([72, 24])
bias_ih_l0 	 torch.Size([72])
bias_hh_l0 	 torch.Size([72])
weight_ih_l1 	 torch.Size([72, 24])
weight_hh_l1 	 torch.Size([72, 24])
bias_ih_l1 	 torch.Size([72])
bias_hh_l1 	 torch.Size([72])
weight_ih_l2 	 torch.Size([72, 24])
weight_hh_l2 	 torch.Size([72, 24])
bias_ih_l2 	 torch.Size([72])
bias_hh_l2 	 torch.Size([72])


In [31]:
summary(Embedder, input_size=(batch_size, 24, 6))

Layer (type:depth-idx)                   Output Shape              Param #
Time_GAN_module                          [3072, 24]                --
├─GRU: 1-1                               [128, 24, 24]             9,504
├─Linear: 1-2                            [3072, 24]                600
Total params: 10,104
Trainable params: 10,104
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 31.04
Input size (MB): 0.07
Forward/backward pass size (MB): 1.18
Params size (MB): 0.04
Estimated Total Size (MB): 1.29

In [35]:
Embedder = Time_GAN_module(input_size=dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=n_layers)
Embedder

Time_GAN_module(
  (rnn): GRU(6, 24, num_layers=3, batch_first=True)
  (fc): Linear(in_features=24, out_features=24, bias=True)
)

In [36]:
Recovery = Time_GAN_module(input_size=hidden_dim, output_size=dim, hidden_dim=hidden_dim, n_layers=n_layers)
Recovery

Time_GAN_module(
  (rnn): GRU(24, 24, num_layers=3, batch_first=True)
  (fc): Linear(in_features=24, out_features=6, bias=True)
)

In [18]:
Generator = Time_GAN_module(input_size=dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=n_layers)
Generator

Time_GAN_module(
  (rnn): GRU(6, 24, num_layers=3, batch_first=True)
  (fc): Linear(in_features=24, out_features=24, bias=True)
)

In [19]:
Supervisor = Time_GAN_module(input_size=hidden_dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=n_layers-1)
Supervisor

Time_GAN_module(
  (rnn): GRU(24, 24, num_layers=2, batch_first=True)
  (fc): Linear(in_features=24, out_features=24, bias=True)
)

In [20]:
Discriminator = Time_GAN_module(input_size=hidden_dim, output_size=1, hidden_dim=hidden_dim, n_layers=n_layers, 
                               activation=nn.Identity)
Discriminator

Time_GAN_module(
  (rnn): GRU(24, 24, num_layers=3, batch_first=True)
  (fc): Linear(in_features=24, out_features=1, bias=True)
)

In [21]:
embedder_optimizer = optim.Adam(Embedder.parameters(), lr=0.001)
recovery_optimizer = optim.Adam(Recovery.parameters(), lr=0.001)
supervisor_optimizer = optim.Adam(Recovery.parameters(), lr=0.001)
discriminator_optimizer = optim.Adam(Discriminator.parameters(), lr=0.001)
generator_optimizer = optim.Adam(Generator.parameters(), lr=0.001)

In [7]:
loader = DataLoader(data, batch_size, shuffle=True, drop_last=True)
X = next(iter(loader))
H, _ = Embedder(X.float())
H_re = torch.reshape(H, (batch_size, seq_len, hidden_dim))
H.shape, H_re.shape

NameError: name 'Embedder' is not defined

# Embedder Training

In [27]:
Embedder = Time_GAN_module(input_size=dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=n_layers)
Recovery = Time_GAN_module(input_size=hidden_dim, output_size=dim, hidden_dim=hidden_dim, n_layers=n_layers)
embedder_optimizer = optim.Adam(Embedder.parameters(), lr=0.001)
recovery_optimizer = optim.Adam(Recovery.parameters(), lr=0.001)
epoch = 300

In [28]:
print('Start Embedding Network Training')
valLoss = []
trainLoss = []
for e in range(epoch): 
    for batch_index, X in enumerate(loader):
        Embedder.train()
        Recovery.train()
        
        MSE_loss = nn.MSELoss()
        
        H, _ = Embedder(X.float())
        H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

        X_tilde, _ = Recovery(H)
        X_tilde = torch.reshape(X_tilde, (batch_size, seq_len, dim))

        E_loss0 = 10 * torch.sqrt(MSE_loss(X, X_tilde))  

        Embedder.zero_grad()
        Recovery.zero_grad()

        E_loss0.backward(retain_graph=True)

        embedder_optimizer.step()
        recovery_optimizer.step()

        if e in range(1,epoch) and batch_index == 0:
            Embedder.eval()
            Recovery.eval()
            H, _ = Embedder(goog)
            H = torch.reshape(H, (goog.shape[0], seq_len, hidden_dim))
            X_tilde, _ = Recovery(H)
            X_tilde = torch.reshape(X_tilde, (goog.shape[0], seq_len, dim))
            valLoss.append(10 * torch.sqrt(MSE_loss(goog, X_tilde)).detach().numpy())
            H, _ = Embedder(data)
            H = torch.reshape(H, (data.shape[0], seq_len, hidden_dim))
            X_tilde, _ = Recovery(H)
            X_tilde = torch.reshape(X_tilde, (data.shape[0], seq_len, dim))
            trainLoss.append(10 * torch.sqrt(MSE_loss(data, X_tilde)).detach().numpy())
            print('step: '+ str(e) + '/' + str(epoch) + ', e_loss: ' + str(np.sqrt(E_loss0.detach().numpy())))

print('Finish Embedding Network Training')

Start Embedding Network Training
step: 1/300, e_loss: 1.4534404
step: 2/300, e_loss: 1.5355204
step: 3/300, e_loss: 1.330241
step: 4/300, e_loss: 1.1330954
step: 5/300, e_loss: 0.91477704
step: 6/300, e_loss: 0.67764413
step: 7/300, e_loss: 0.6088317
step: 8/300, e_loss: 0.5542601
step: 9/300, e_loss: 0.5431889
step: 10/300, e_loss: 0.5600908
step: 11/300, e_loss: 0.54986775
step: 12/300, e_loss: 0.5422837
step: 13/300, e_loss: 0.5432686
step: 14/300, e_loss: 0.5404009
step: 15/300, e_loss: 0.5210417
step: 16/300, e_loss: 0.47952184
step: 17/300, e_loss: 0.5238298
step: 18/300, e_loss: 0.51491195
step: 19/300, e_loss: 0.4887728
step: 20/300, e_loss: 0.49148282
step: 21/300, e_loss: 0.49025193
step: 22/300, e_loss: 0.5007103
step: 23/300, e_loss: 0.47561878
step: 24/300, e_loss: 0.471286
step: 25/300, e_loss: 0.49326468
step: 26/300, e_loss: 0.4748056
step: 27/300, e_loss: 0.4961755
step: 28/300, e_loss: 0.5065446
step: 29/300, e_loss: 0.48864236
step: 30/300, e_loss: 0.48656026
step: 3

In [26]:
fig = px.line(trainLoss)
fig.add_trace(go.Scatter(y=valLoss, mode="lines"))
fig.show()             

In [31]:
fig = px.line(trainLoss)
fig.add_trace(go.Scatter(y=valLoss, mode="lines"))
fig.show()

In [29]:
Embedder.eval()
Recovery.eval()

Time_GAN_module(
  (rnn): GRU(24, 24, num_layers=3, batch_first=True)
  (fc): Linear(in_features=24, out_features=6, bias=True)
)

In [19]:
H, _ = Embedder(goog)
H = torch.reshape(H, (goog.shape[0], seq_len, hidden_dim))
X_tilde, _ = Recovery(H)
X_tilde = torch.reshape(X_tilde, (goog.shape[0], seq_len, dim))
valLoss = 10 * torch.sqrt(MSE_loss(goog, X_tilde))  
valLoss

tensor(0.3777, grad_fn=<MulBackward0>)

In [22]:
px.line(X_tilde[0].detach())

In [21]:
px.line(goog[0])

# Training with supervised Loss

In [35]:
# Embedder = Time_GAN_module(input_size=dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=n_layers)
# Vamos usar o embedder pré treinado
Supervisor = Time_GAN_module(input_size=hidden_dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=n_layers-1)
embedder_optimizer = optim.Adam(Embedder.parameters(), lr=0.001)
supervisor_optimizer = optim.Adam(Recovery.parameters(), lr=0.001)
epoch = 100

In [38]:
print('Start Training with Supervised Loss Only')
valLoss = []
trainLoss = []
for e in range(epoch): 
    for batch_index, X in enumerate(loader):
        Embedder.train()
        Supervisor.train()

        H, _ = Embedder(X.float())
        H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

        H_hat_supervise, _ = Supervisor(H)
        H_hat_supervise = torch.reshape(H_hat_supervise, (batch_size, seq_len, hidden_dim))  

        G_loss_S = MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:])

        Embedder.zero_grad()
        Supervisor.zero_grad()

        G_loss_S.backward(retain_graph=True)

        embedder_optimizer.step()
        supervisor_optimizer.step()

        if e in range(1,epoch) and batch_index == 0:
            Embedder.eval()
            Supervisor.eval()
            H, _ = Embedder(goog)
            H = torch.reshape(H, (goog.shape[0], seq_len, hidden_dim))
            H_hat_supervise, _ = Supervisor(H)
            H_hat_supervise = torch.reshape(H_hat_supervise, (goog.shape[0], seq_len, hidden_dim)) 
            valLoss.append(MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:]).detach().numpy())
            H, _ = Embedder(data)
            H = torch.reshape(H, (data.shape[0], seq_len, hidden_dim))
            H_hat_supervise, _ = Supervisor(H)
            H_hat_supervise = torch.reshape(H_hat_supervise, (data.shape[0], seq_len, hidden_dim)) 
            trainLoss.append(MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:]).detach().numpy())
            print('step: '+ str(e) + '/' + str(epoch) + ', s_loss: ' + str(np.sqrt(G_loss_S.detach().numpy())))

print('Finish Training with Supervised Loss Only')

Start Training with Supervised Loss Only
step: 1/100, s_loss: 0.02368827
step: 2/100, s_loss: 0.018743552
step: 3/100, s_loss: 0.014971204
step: 4/100, s_loss: 0.012903114
step: 5/100, s_loss: 0.010869999
step: 6/100, s_loss: 0.010323133
step: 7/100, s_loss: 0.009339249
step: 8/100, s_loss: 0.008151464
step: 9/100, s_loss: 0.007883803
step: 10/100, s_loss: 0.0071520777
step: 11/100, s_loss: 0.006448887
step: 12/100, s_loss: 0.0067113275
step: 13/100, s_loss: 0.006464646
step: 14/100, s_loss: 0.0060999803
step: 15/100, s_loss: 0.00596619
step: 16/100, s_loss: 0.005176845
step: 17/100, s_loss: 0.0049726684
step: 18/100, s_loss: 0.004815283
step: 19/100, s_loss: 0.004754663
step: 20/100, s_loss: 0.0042526633
step: 21/100, s_loss: 0.0042157234
step: 22/100, s_loss: 0.0041143917
step: 23/100, s_loss: 0.0041654683
step: 24/100, s_loss: 0.0036213712
step: 25/100, s_loss: 0.0036716044
step: 26/100, s_loss: 0.0033999765
step: 27/100, s_loss: 0.0035637
step: 28/100, s_loss: 0.0034326573
step: 29

In [40]:
torch.save(Embedder.state_dict(), 'Embedder.pth')
torch.save(Supervisor.state_dict(), 'Supervisor.pth')
torch.save(Recovery.state_dict(), 'Recovery.pth')

In [7]:
Embedder = Time_GAN_module(input_size=dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=n_layers)
Recovery = Time_GAN_module(input_size=hidden_dim, output_size=dim, hidden_dim=hidden_dim, n_layers=n_layers)
Supervisor = Time_GAN_module(input_size=hidden_dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=n_layers-1)
Embedder.load_state_dict(torch.load('Embedder.pth'))
Supervisor.load_state_dict(torch.load('Supervisor.pth'))
Recovery.load_state_dict(torch.load('Recovery.pth'))

<All keys matched successfully>

In [39]:
fig = px.line(trainLoss)
fig.add_trace(go.Scatter(y=valLoss, mode="lines"))
fig.show()

# Treinamento do discriminador

In [8]:
random_data = random_generator(batch_size=batch_size, z_dim=dim, 
                                       T_mb=extract_time(data)[0], max_seq_len=extract_time(data)[1])

In [9]:
loader = DataLoader(data, batch_size, shuffle=True, drop_last=True)
random_loader = DataLoader(random_data, batch_size, shuffle=True, drop_last=True)
binary_cross_entropy_loss = nn.BCEWithLogitsLoss()
MSE_loss = nn.MSELoss()

In [49]:
def TimeGAN(data, parameters):
  hidden_dim = parameters["hidden_dim"]
  num_layers = parameters["num_layers"]
  iterations = parameters["iterations"]
  batch_size = parameters["batch_size"]
  module = parameters["module"]
  epoch = parameters["epoch"]
  no, seq_len, dim = np.asarray(data).shape
  z_dim = dim
  gamma = 1

  Embedder = Time_GAN_module(input_size=z_dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=num_layers)
  Recovery = Time_GAN_module(input_size=hidden_dim, output_size=dim, hidden_dim=hidden_dim, n_layers=n_layers)
  Generator = Time_GAN_module(input_size=dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=n_layers)
  Supervisor = Time_GAN_module(input_size=hidden_dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=n_layers-1)
  Discriminator = Time_GAN_module(input_size=hidden_dim, output_size=1, hidden_dim=hidden_dim, n_layers=n_layers, activation=nn.Identity)

  embedder_optimizer = optim.Adam(Embedder.parameters(), lr=0.001)
  recovery_optimizer = optim.Adam(Recovery.parameters(), lr=0.001)
  supervisor_optimizer = optim.Adam(Recovery.parameters(), lr=0.001)
  discriminator_optimizer = optim.Adam(Discriminator.parameters(), lr=0.001)
  generator_optimizer = optim.Adam(Generator.parameters(), lr=0.001)

  # Embedding Network Training
  print('Start Embedding Network Training')
  for e in range(epoch): 
    for batch_index, X in enumerate(loader):
        
        MSE_loss = nn.MSELoss()
        
        H, _ = Embedder(X.float())
        H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

        X_tilde, _ = Recovery(H)
        X_tilde = torch.reshape(X_tilde, (batch_size, seq_len, dim))

        E_loss0 = 10 * torch.sqrt(MSE_loss(X, X_tilde))  

        Embedder.zero_grad()
        Recovery.zero_grad()

        E_loss0.backward(retain_graph=True)

        embedder_optimizer.step()
        recovery_optimizer.step()

        if e in range(1,epoch) and batch_index == 0:
            print('step: '+ str(e) + '/' + str(epoch) + ', e_loss: ' + str(np.sqrt(E_loss0.detach().numpy())))

  print('Finish Embedding Network Training')

  # Training only with supervised loss
  print('Start Training with Supervised Loss Only')
  for e in range(epoch): 
    for batch_index, X in enumerate(loader):

        H, _ = Embedder(X.float())
        H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

        H_hat_supervise, _ = Supervisor(H)
        H_hat_supervise = torch.reshape(H_hat_supervise, (batch_size, seq_len, hidden_dim))  

        G_loss_S = MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:])


        Embedder.zero_grad()
        Supervisor.zero_grad()

        G_loss_S.backward(retain_graph=True)

        embedder_optimizer.step()
        supervisor_optimizer.step()

        if e in range(1,epoch) and batch_index == 0:
            print('step: '+ str(e) + '/' + str(epoch) + ', s_loss: ' + str(np.sqrt(G_loss_S.detach().numpy())))

  print('Finish Training with Supervised Loss Only')
  # Joint Training
  print('Start Joint Training')
  for itt in range(epoch):
    for kk in range(2):
      X = next(iter(loader))
      random_data = random_generator(batch_size=batch_size, z_dim=dim, 
                                       T_mb=extract_time(data)[0], max_seq_len=extract_time(data)[1])
        
      # Generator Training 
      ## Train Generator
      z = torch.tensor(random_data)
      z = z.float()
        
      e_hat, _ = Generator(z)
      e_hat = torch.reshape(e_hat, (batch_size, seq_len, hidden_dim))
        
      H_hat, _ = Supervisor(e_hat)
      H_hat = torch.reshape(H_hat, (batch_size, seq_len, hidden_dim))
        
      Y_fake = Discriminator(H_hat)
      Y_fake = torch.reshape(Y_fake, (batch_size, seq_len, 1))
        
      x_hat, _ = Recovery(H_hat)
      x_hat = torch.reshape(x_hat, (batch_size, seq_len, dim))
        
      H, _ = Embedder(X.float())
      H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

      H_hat_supervise, _ = Supervisor(H)
      H_hat_supervise = torch.reshape(H_hat_supervise, (batch_size, seq_len, hidden_dim))

      Generator.zero_grad()
      Supervisor.zero_grad()
      Discriminator.zero_grad()
      Recovery.zero_grad()

      # line 267 of original implementation: 
      # G_loss_U, G_loss_S, G_loss_V
      G_loss_S = MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:])
        
      G_loss_U = binary_cross_entropy_loss(torch.ones_like(Y_fake), Y_fake)
        
      G_loss_V1 = torch.mean(torch.abs((torch.std(x_hat, [0], unbiased = False)) + 1e-6 - (torch.std(X, [0]) + 1e-6)))
      G_loss_V2 = torch.mean(torch.abs((torch.mean(x_hat, [0]) - (torch.mean(X, [0])))))
      G_loss_V = G_loss_V1 + G_loss_V2
        
      # doing a backward step for each loss should result in gradients accumulating 
      # so we should be able to optimize them jointly
      G_loss_S.backward(retain_graph=True)#
      G_loss_U.backward(retain_graph=True)
      G_loss_V.backward(retain_graph=True)#


      generator_optimizer.step()
      supervisor_optimizer.step()
      discriminator_optimizer.step()
      # Train Embedder 
      ## line 270: we only optimize E_loss_T0
      ## E_loss_T0 = just mse of x and x_tilde
      # but it calls E_solver which optimizes E_loss, which is a sum of 
      # E_loss0 and 0.1* G_loss_S
      MSE_loss = nn.MSELoss()
        
      H, _ = Embedder(X.float())
      H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

      X_tilde, _ = Recovery(H)
      X_tilde = torch.reshape(X_tilde, (batch_size, seq_len, dim))

      E_loss_T0 = MSE_loss(X, X_tilde)
      E_loss0 = 10 * torch.sqrt(MSE_loss(X, X_tilde))  
        
      H_hat_supervise, _ = Supervisor(H)
      H_hat_supervise = torch.reshape(H_hat_supervise, (batch_size, seq_len, hidden_dim))  

      G_loss_S = MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:])
      E_loss = E_loss0  + 0.1 * G_loss_S
        
      G_loss_S.backward(retain_graph=True)
      E_loss_T0.backward()
        
      Embedder.zero_grad()
      Recovery.zero_grad()
      Supervisor.zero_grad()
        
      embedder_optimizer.step()
      recovery_optimizer.step()
      supervisor_optimizer.step()
    # train Discriminator
    for batch_index, X in enumerate(loader):
      random_data = random_generator(batch_size=batch_size, z_dim=dim, 
                                       T_mb=extract_time(data)[0], max_seq_len=extract_time(data)[1])
      
      z = torch.tensor(random_data)
      z = z.float()

      H, _ = Embedder(X)
      H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

      Y_real = Discriminator(H)
      Y_real = torch.reshape(Y_real, (batch_size, seq_len, 1))
      
      e_hat, _ = Generator(z)
      e_hat = torch.reshape(e_hat, (batch_size, seq_len, hidden_dim))

      Y_fake_e = Discriminator(e_hat)
      Y_fake_e = torch.reshape(Y_fake_e, (batch_size, seq_len, 1))
        
      H_hat, _ = Supervisor(e_hat)
      H_hat = torch.reshape(H_hat, (batch_size, seq_len, hidden_dim))
        
      Y_fake = Discriminator(H_hat)
      Y_fake = torch.reshape(Y_fake, (batch_size, seq_len, 1))
        
      x_hat, _ = Recovery(H_hat)
      x_hat = torch.reshape(x_hat, (batch_size, seq_len, dim))

      Generator.zero_grad()
      Supervisor.zero_grad()
      Discriminator.zero_grad()
      Recovery.zero_grad()
      Embedder.zero_grad()

      # logits first, then targets
      # D_loss_real(Y_real, torch.ones_like(Y_real))
      D_loss_real = nn.BCEWithLogitsLoss()
      DLR = D_loss_real(Y_real, torch.ones_like(Y_real))

      D_loss_fake = nn.BCEWithLogitsLoss()
      DLF = D_loss_fake(Y_fake, torch.zeros_like(Y_fake))

      D_loss_fake_e = nn.BCEWithLogitsLoss()
      DLF_e = D_loss_fake_e(Y_fake_e, torch.zeros_like(Y_fake_e))

      D_loss = DLR + DLF + gamma * DLF_e

      # D_loss.backward(retain_graph=True)
      
      # discriminator_optimizer.step()

      # check discriminator loss before updating
      check_d_loss = D_loss
      if (check_d_loss > 0.15):
        D_loss.backward(retain_graph=True)
        discriminator_optimizer.step()
      
      print('step: '+ str(e) + '/' + str(epoch) + 
            ', D_loss: ' + str(D_loss.detach().numpy()) +
            ', G_loss_U: ' + str(G_loss_U.detach().numpy()) + 
            ', G_loss_S: ' + str(G_loss_S.detach().numpy()) + 
            ', E_loss_t0: ' + str(np.sqrt(E_loss0.detach().numpy()))
             )
  print('Finish Joint Training')

In [10]:
parameters = dict()
parameters['module'] = 'gru' 
parameters['hidden_dim'] = 24
parameters['num_layers'] = 3
parameters['iterations'] = 10000
parameters['batch_size'] = 128
parameters['epoch'] = 50

In [51]:
TimeGAN(data, parameters)

Start Embedding Network Training


RuntimeError: shape '[128, 24, 24]' is invalid for input of size 44352

In [57]:
hidden_dim = parameters["hidden_dim"]
num_layers = parameters["num_layers"]
iterations = parameters["iterations"]
batch_size = parameters["batch_size"]
module = parameters["module"]
epoch = parameters["epoch"]
no, seq_len, dim = np.asarray(data).shape
z_dim = dim
gamma = 1

Embedder = Time_GAN_module(input_size=z_dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=num_layers)
Recovery = Time_GAN_module(input_size=hidden_dim, output_size=dim, hidden_dim=hidden_dim, n_layers=n_layers)
Generator = Time_GAN_module(input_size=dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=n_layers)
Supervisor = Time_GAN_module(input_size=hidden_dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=n_layers-1)
Discriminator = Time_GAN_module(input_size=hidden_dim, output_size=1, hidden_dim=hidden_dim, n_layers=n_layers, activation=nn.Identity)

embedder_optimizer = optim.Adam(Embedder.parameters(), lr=0.001)
recovery_optimizer = optim.Adam(Recovery.parameters(), lr=0.001)
supervisor_optimizer = optim.Adam(Recovery.parameters(), lr=0.001)
discriminator_optimizer = optim.Adam(Discriminator.parameters(), lr=0.001)
generator_optimizer = optim.Adam(Generator.parameters(), lr=0.001)

# Embedding Network Training
print('Start Embedding Network Training')
for e in range(epoch): 
    for batch_index, X in enumerate(loader):
        
        MSE_loss = nn.MSELoss()
        
        H, _ = Embedder(X.float())
        H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

        X_tilde, _ = Recovery(H)
        X_tilde = torch.reshape(X_tilde, (batch_size, seq_len, dim))

        E_loss0 = 10 * torch.sqrt(MSE_loss(X, X_tilde))  

        Embedder.zero_grad()
        Recovery.zero_grad()

        E_loss0.backward(retain_graph=True)

        embedder_optimizer.step()
        recovery_optimizer.step()

        if e in range(1,epoch) and batch_index == 0:
            print('step: '+ str(e) + '/' + str(epoch) + ', e_loss: ' + str(np.sqrt(E_loss0.detach().numpy())))

print('Finish Embedding Network Training')

# Training only with supervised loss
print('Start Training with Supervised Loss Only')
for e in range(epoch): 
    for batch_index, X in enumerate(loader):

        H, _ = Embedder(X.float())
        H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

        H_hat_supervise, _ = Supervisor(H)
        H_hat_supervise = torch.reshape(H_hat_supervise, (batch_size, seq_len, hidden_dim))  

        G_loss_S = MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:])


        Embedder.zero_grad()
        Supervisor.zero_grad()

        G_loss_S.backward(retain_graph=True)

        embedder_optimizer.step()
        supervisor_optimizer.step()

        if e in range(1,epoch) and batch_index == 0:
            print('step: '+ str(e) + '/' + str(epoch) + ', s_loss: ' + str(np.sqrt(G_loss_S.detach().numpy())))

print('Finish Training with Supervised Loss Only')

Start Embedding Network Training
step: 1/5, e_loss: 1.5941393
step: 2/5, e_loss: 1.5449587
step: 3/5, e_loss: 1.5283412
step: 4/5, e_loss: 1.5067494
Finish Embedding Network Training
Start Training with Supervised Loss Only
step: 1/5, s_loss: 0.11959934
step: 2/5, s_loss: 0.028934417
step: 3/5, s_loss: 0.013511296
step: 4/5, s_loss: 0.009382767
Finish Training with Supervised Loss Only


In [14]:
Generator = Time_GAN_module(input_size=dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=n_layers)
Discriminator = Time_GAN_module(input_size=hidden_dim, output_size=1, hidden_dim=hidden_dim, n_layers=n_layers, activation=nn.Identity)

embedder_optimizer = optim.Adam(Embedder.parameters(), lr=0.001)
recovery_optimizer = optim.Adam(Recovery.parameters(), lr=0.001)
supervisor_optimizer = optim.Adam(Recovery.parameters(), lr=0.001)
discriminator_optimizer = optim.Adam(Discriminator.parameters(), lr=0.001)
generator_optimizer = optim.Adam(Generator.parameters(), lr=0.001)

In [15]:
print('Start Joint Training')
for e in range(epoch):
    for kk in range(2):
        X = next(iter(loader))
        random_data = random_generator(batch_size=batch_size, z_dim=dim, 
                                        T_mb=extract_time(data)[0], max_seq_len=extract_time(data)[1])
            
        # Generator Training 
        ## Train Generator
        z = torch.tensor(random_data)
        z = z.float()
            
        e_hat, _ = Generator(z)
        e_hat = torch.reshape(e_hat, (batch_size, seq_len, hidden_dim))
            
        H_hat, _ = Supervisor(e_hat)
        H_hat = torch.reshape(H_hat, (batch_size, seq_len, hidden_dim))
            
        Y_fake = Discriminator(H_hat)
        Y_fake = torch.reshape(Y_fake, (batch_size, seq_len, 1))
            
        x_hat, _ = Recovery(H_hat)
        x_hat = torch.reshape(x_hat, (batch_size, seq_len, dim))
            
        H, _ = Embedder(X.float())
        H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

        H_hat_supervise, _ = Supervisor(H)
        H_hat_supervise = torch.reshape(H_hat_supervise, (batch_size, seq_len, hidden_dim))

        Generator.zero_grad()
        Supervisor.zero_grad()
        Discriminator.zero_grad()
        Recovery.zero_grad()

        # line 267 of original implementation: 
        # G_loss_U, G_loss_S, G_loss_V
        G_loss_S = MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:])
            
        G_loss_U = binary_cross_entropy_loss(torch.ones_like(Y_fake), Y_fake)
            
        G_loss_V1 = torch.mean(torch.abs((torch.std(x_hat, [0], unbiased = False)) + 1e-6 - (torch.std(X, [0]) + 1e-6)))
        G_loss_V2 = torch.mean(torch.abs((torch.mean(x_hat, [0]) - (torch.mean(X, [0])))))
        G_loss_V = G_loss_V1 + G_loss_V2
            
        # doing a backward step for each loss should result in gradients accumulating 
        # so we should be able to optimize them jointly
        G_loss_S.backward(retain_graph=True)#
        G_loss_U.backward(retain_graph=True)
        G_loss_V.backward(retain_graph=True)#


        generator_optimizer.step()
        supervisor_optimizer.step()
        discriminator_optimizer.step()
        # Train Embedder 
        ## line 270: we only optimize E_loss_T0
        ## E_loss_T0 = just mse of x and x_tilde
        # but it calls E_solver which optimizes E_loss, which is a sum of 
        # E_loss0 and 0.1* G_loss_S
        MSE_loss = nn.MSELoss()
            
        H, _ = Embedder(X.float())
        H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

        X_tilde, _ = Recovery(H)
        X_tilde = torch.reshape(X_tilde, (batch_size, seq_len, dim))

        E_loss_T0 = MSE_loss(X, X_tilde)
        E_loss0 = 10 * torch.sqrt(MSE_loss(X, X_tilde))  
            
        H_hat_supervise, _ = Supervisor(H)
        H_hat_supervise = torch.reshape(H_hat_supervise, (batch_size, seq_len, hidden_dim))  

        G_loss_S = MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:])
        E_loss = E_loss0  + 0.1 * G_loss_S
            
        G_loss_S.backward(retain_graph=True)
        E_loss_T0.backward()
            
        Embedder.zero_grad()
        Recovery.zero_grad()
        Supervisor.zero_grad()
            
        embedder_optimizer.step()
        recovery_optimizer.step()
        supervisor_optimizer.step()
    # train Discriminator
    for batch_index, X in enumerate(loader):
        random_data = random_generator(batch_size=batch_size, z_dim=dim, 
                                        T_mb=extract_time(data)[0], max_seq_len=extract_time(data)[1])
        
        z = torch.tensor(random_data)
        z = z.float()

        H, _ = Embedder(X)
        H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

        Y_real = Discriminator(H)
        Y_real = torch.reshape(Y_real, (batch_size, seq_len, 1))
        
        e_hat, _ = Generator(z)
        e_hat = torch.reshape(e_hat, (batch_size, seq_len, hidden_dim))

        Y_fake_e = Discriminator(e_hat)
        Y_fake_e = torch.reshape(Y_fake_e, (batch_size, seq_len, 1))
            
        H_hat, _ = Supervisor(e_hat)
        H_hat = torch.reshape(H_hat, (batch_size, seq_len, hidden_dim))
            
        Y_fake = Discriminator(H_hat)
        Y_fake = torch.reshape(Y_fake, (batch_size, seq_len, 1))
            
        x_hat, _ = Recovery(H_hat)
        x_hat = torch.reshape(x_hat, (batch_size, seq_len, dim))

        Generator.zero_grad()
        Supervisor.zero_grad()
        Discriminator.zero_grad()
        Recovery.zero_grad()
        Embedder.zero_grad()

        # logits first, then targets
        # D_loss_real(Y_real, torch.ones_like(Y_real))
        D_loss_real = nn.BCEWithLogitsLoss()
        DLR = D_loss_real(Y_real, torch.ones_like(Y_real))

        D_loss_fake = nn.BCEWithLogitsLoss()
        DLF = D_loss_fake(Y_fake, torch.zeros_like(Y_fake))

        D_loss_fake_e = nn.BCEWithLogitsLoss()
        DLF_e = D_loss_fake_e(Y_fake_e, torch.zeros_like(Y_fake_e))

        D_loss = DLR + DLF + gamma * DLF_e

        # D_loss.backward(retain_graph=True)
        
        # discriminator_optimizer.step()

        # check discriminator loss before updating
        check_d_loss = D_loss
        if (check_d_loss > 0.15):
            D_loss.backward(retain_graph=True)
            discriminator_optimizer.step()
        
        print('step: '+ str(e) + '/' + str(epoch) + 
                ', D_loss: ' + str(D_loss.detach().numpy()) +
                ', G_loss_U: ' + str(G_loss_U.detach().numpy()) + 
                ', G_loss_S: ' + str(G_loss_S.detach().numpy()) + 
                ', E_loss_t0: ' + str(np.sqrt(E_loss0.detach().numpy()))
                )
print('Finish Joint Training')

Start Joint Training
step: 0/50, D_loss: 2.294889, G_loss_U: 1.0195522, G_loss_S: 1.7623687e-06, E_loss_t0: 2.5916085
step: 0/50, D_loss: 2.3087003, G_loss_U: 1.0195522, G_loss_S: 1.7623687e-06, E_loss_t0: 2.5916085
step: 0/50, D_loss: 2.308175, G_loss_U: 1.0195522, G_loss_S: 1.7623687e-06, E_loss_t0: 2.5916085
step: 0/50, D_loss: 2.2991896, G_loss_U: 1.0195522, G_loss_S: 1.7623687e-06, E_loss_t0: 2.5916085
step: 0/50, D_loss: 2.285274, G_loss_U: 1.0195522, G_loss_S: 1.7623687e-06, E_loss_t0: 2.5916085
step: 0/50, D_loss: 2.2685602, G_loss_U: 1.0195522, G_loss_S: 1.7623687e-06, E_loss_t0: 2.5916085
step: 0/50, D_loss: 2.2504292, G_loss_U: 1.0195522, G_loss_S: 1.7623687e-06, E_loss_t0: 2.5916085
step: 0/50, D_loss: 2.2317314, G_loss_U: 1.0195522, G_loss_S: 1.7623687e-06, E_loss_t0: 2.5916085
step: 0/50, D_loss: 2.213047, G_loss_U: 1.0195522, G_loss_S: 1.7623687e-06, E_loss_t0: 2.5916085
step: 0/50, D_loss: 2.1946092, G_loss_U: 1.0195522, G_loss_S: 1.7623687e-06, E_loss_t0: 2.5916085
ste

In [17]:
real_data_loading

<function utils.real_data_loading(path, seq_len)>

In [45]:
  print('Start Joint Training')
  for itt in range(epoch):
    for batch_index, X in enumerate(loader):
      # X = next(iter(loader))
      random_data = random_generator(batch_size=batch_size, z_dim=dim, 
                                       T_mb=extract_time(data)[0], max_seq_len=extract_time(data)[1])
        
      # Generator Training 
      ## Train Generator
      z = torch.tensor(random_data)
      z = z.float()
        
      e_hat, _ = Generator(z)
      e_hat = torch.reshape(e_hat, (batch_size, seq_len, hidden_dim))
        
      H_hat, _ = Supervisor(e_hat)
      H_hat = torch.reshape(H_hat, (batch_size, seq_len, hidden_dim))
        
      Y_fake = Discriminator(H_hat)
      Y_fake = torch.reshape(Y_fake, (batch_size, seq_len, 1))
        
      x_hat, _ = Recovery(H_hat)
      x_hat = torch.reshape(x_hat, (batch_size, seq_len, dim))
        
      H, _ = Embedder(X.float())
      H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

      H_hat_supervise, _ = Supervisor(H)
      H_hat_supervise = torch.reshape(H_hat_supervise, (batch_size, seq_len, hidden_dim))

      Generator.zero_grad()
      Supervisor.zero_grad()
      Discriminator.zero_grad()
      Recovery.zero_grad()

      # line 267 of original implementation: 
      # G_loss_U, G_loss_S, G_loss_V
      G_loss_S = MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:])
        
      G_loss_U = binary_cross_entropy_loss(torch.ones_like(Y_fake), Y_fake)
        
      G_loss_V1 = torch.mean(torch.abs((torch.std(x_hat, [0], unbiased = False)) + 1e-6 - (torch.std(X, [0]) + 1e-6)))
      G_loss_V2 = torch.mean(torch.abs((torch.mean(x_hat, [0]) - (torch.mean(X, [0])))))
      G_loss_V = G_loss_V1 + G_loss_V2
        
      # doing a backward step for each loss should result in gradients accumulating 
      # so we should be able to optimize them jointly
      G_loss_S.backward(retain_graph=True)#
      G_loss_U.backward(retain_graph=True)
      G_loss_V.backward(retain_graph=True)#


      generator_optimizer.step()
      supervisor_optimizer.step()
      discriminator_optimizer.step()
      # Train Embedder 
      ## line 270: we only optimize E_loss_T0
      ## E_loss_T0 = just mse of x and x_tilde
      # but it calls E_solver which optimizes E_loss, which is a sum of 
      # E_loss0 and 0.1* G_loss_S
      MSE_loss = nn.MSELoss()
        
      H, _ = Embedder(X.float())
      H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

      X_tilde, _ = Recovery(H)
      X_tilde = torch.reshape(X_tilde, (batch_size, seq_len, dim))

      E_loss_T0 = MSE_loss(X, X_tilde)
      E_loss0 = 10 * torch.sqrt(MSE_loss(X, X_tilde))  
        
      H_hat_supervise, _ = Supervisor(H)
      H_hat_supervise = torch.reshape(H_hat_supervise, (batch_size, seq_len, hidden_dim))  

      G_loss_S = MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:])
      E_loss = E_loss0  + 0.1 * G_loss_S
        
      G_loss_S.backward(retain_graph=True)
      E_loss_T0.backward()
        
      Embedder.zero_grad()
      Recovery.zero_grad()
      Supervisor.zero_grad()
        
      embedder_optimizer.step()
      recovery_optimizer.step()
      supervisor_optimizer.step()
    # train Discriminator
    for batch_index, X in enumerate(loader):
      random_data = random_generator(batch_size=batch_size, z_dim=dim, 
                                       T_mb=extract_time(data)[0], max_seq_len=extract_time(data)[1])
      
      z = torch.tensor(random_data)
      z = z.float()

      H, _ = Embedder(X)
      H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

      Y_real = Discriminator(H)
      Y_real = torch.reshape(Y_real, (batch_size, seq_len, 1))
      
      e_hat, _ = Generator(z)
      e_hat = torch.reshape(e_hat, (batch_size, seq_len, hidden_dim))

      Y_fake_e = Discriminator(e_hat)
      Y_fake_e = torch.reshape(Y_fake_e, (batch_size, seq_len, 1))
        
      H_hat, _ = Supervisor(e_hat)
      H_hat = torch.reshape(H_hat, (batch_size, seq_len, hidden_dim))
        
      Y_fake = Discriminator(H_hat)
      Y_fake = torch.reshape(Y_fake, (batch_size, seq_len, 1))
        
      x_hat, _ = Recovery(H_hat)
      x_hat = torch.reshape(x_hat, (batch_size, seq_len, dim))

      Generator.zero_grad()
      Supervisor.zero_grad()
      Discriminator.zero_grad()
      Recovery.zero_grad()
      Embedder.zero_grad()

      D_loss_real = nn.BCEWithLogitsLoss()
      # i think we need the order: logits, targets 
      # https://discuss.pytorch.org/t/v1-0-1-nn-bcewithlogitsloss-returns-negative-loss-sigmoid-layer-not-deployed/57409/3
      DLR = D_loss_real(Y_real, torch.ones_like(Y_real))
      D_loss_fake = nn.BCEWithLogitsLoss()
      DLF = D_loss_fake(torch.zeros_like(Y_fake), Y_fake)
      D_loss_fake_e = nn.BCEWithLogitsLoss()
      DLF_e = D_loss_fake_e(torch.zeros_like(Y_fake_e), Y_fake_e)
      D_loss = DLR + DLF + gamma * DLF_e

      D_loss.backward(retain_graph=True)
      
      discriminator_optimizer.step()

      # check discriminator loss before updating
      #check_d_loss = D_loss
      #if (check_d_loss > 0.15):
      #  D_loss.backward(retain_graph=True)
      #  discriminator_optimizer.step()
      
      print('step: '+ str(e) + '/' + str(epoch) + 
            ', D_loss: ' + str(D_loss.detach().numpy()) +
            ', G_loss_U: ' + str(G_loss_U.detach().numpy()) + 
            ', G_loss_S: ' + str(G_loss_S.detach().numpy()) + 
            ', E_loss_t0: ' + str(np.sqrt(E_loss0.detach().numpy()))
             )
  print('Finish Joint Training')

Start Joint Training
step: 99/100, D_loss: -2.7969604, G_loss_U: -4.151405, G_loss_S: 9.272844e-06, E_loss_t0: 2.4685705
step: 99/100, D_loss: -2.8291883, G_loss_U: -4.151405, G_loss_S: 9.272844e-06, E_loss_t0: 2.4685705
step: 99/100, D_loss: -2.8611054, G_loss_U: -4.151405, G_loss_S: 9.272844e-06, E_loss_t0: 2.4685705
step: 99/100, D_loss: -2.8930597, G_loss_U: -4.151405, G_loss_S: 9.272844e-06, E_loss_t0: 2.4685705
step: 99/100, D_loss: -2.9248323, G_loss_U: -4.151405, G_loss_S: 9.272844e-06, E_loss_t0: 2.4685705
step: 99/100, D_loss: -2.956499, G_loss_U: -4.151405, G_loss_S: 9.272844e-06, E_loss_t0: 2.4685705
step: 99/100, D_loss: -2.9881005, G_loss_U: -4.151405, G_loss_S: 9.272844e-06, E_loss_t0: 2.4685705
step: 99/100, D_loss: -3.0196328, G_loss_U: -4.151405, G_loss_S: 9.272844e-06, E_loss_t0: 2.4685705
step: 99/100, D_loss: -3.0511103, G_loss_U: -4.151405, G_loss_S: 9.272844e-06, E_loss_t0: 2.4685705
step: 99/100, D_loss: -3.0824876, G_loss_U: -4.151405, G_loss_S: 9.272844e-06, E

KeyboardInterrupt: ignored

In [47]:
      X = next(iter(loader))
      random_data = random_generator(batch_size=batch_size, z_dim=dim, 
                                       T_mb=extract_time(data)[0], max_seq_len=extract_time(data)[1])
        
      # Generator Training 
      ## Train Generator
      z = torch.tensor(random_data)
      z = z.float()
        
      e_hat, _ = Generator(z)
      e_hat = torch.reshape(e_hat, (batch_size, seq_len, hidden_dim))
        
      H_hat, _ = Supervisor(e_hat)
      H_hat = torch.reshape(H_hat, (batch_size, seq_len, hidden_dim))
        
      Y_fake = Discriminator(H_hat)
      Y_fake = torch.reshape(Y_fake, (batch_size, seq_len, 1))
        
      x_hat, _ = Recovery(H_hat)
      x_hat = torch.reshape(x_hat, (batch_size, seq_len, dim))
        
      H, _ = Embedder(X.float())
      H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

      H_hat_supervise, _ = Supervisor(H)
      H_hat_supervise = torch.reshape(H_hat_supervise, (batch_size, seq_len, hidden_dim))

      Generator.zero_grad()
      Supervisor.zero_grad()
      Discriminator.zero_grad()
      Recovery.zero_grad()

      # line 267 of original implementation: 
      # G_loss_U, G_loss_S, G_loss_V
      G_loss_S = MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:])
      print(G_loss_S)
      print(Y_fake.shape)
      # G_loss_U returns negative values? 
      G_loss_U = binary_cross_entropy_loss(torch.ones_like(Y_fake), Y_fake)
      print(G_loss_U)
      G_loss_V1 = torch.mean(torch.abs((torch.std(x_hat, [0], unbiased = False)) + 1e-6 - (torch.std(X, [0]) + 1e-6)))
      G_loss_V2 = torch.mean(torch.abs((torch.mean(x_hat, [0]) - (torch.mean(X, [0])))))
      G_loss_V = G_loss_V1 + G_loss_V2
      print(G_loss_V)
      # doing a backward step for each loss should result in gradients accumulating 
      # so we should be able to optimize them jointly
      G_loss_S.backward(retain_graph=True)#
      G_loss_U.backward(retain_graph=True)
      G_loss_V.backward(retain_graph=True)#


      generator_optimizer.step()
      supervisor_optimizer.step()
      discriminator_optimizer.step()
      

tensor(9.9414e-06, grad_fn=<MseLossBackward>)
torch.Size([128, 24, 1])
tensor(-4.8705, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
tensor(0.7864, grad_fn=<AddBackward0>)


In [61]:
# Discriminator outputs values larger than 1
Discriminator(H_hat)

(tensor([[0.4977],
         [0.5011],
         [0.5013],
         ...,
         [0.4923],
         [0.4923],
         [0.4923]], grad_fn=<SigmoidBackward>),
 tensor([[[ 0.3385, -0.2502, -0.0390,  ...,  0.0921, -0.1438,  0.1577],
          [ 0.3385, -0.2502, -0.0390,  ...,  0.0921, -0.1438,  0.1577],
          [ 0.3385, -0.2502, -0.0390,  ...,  0.0921, -0.1438,  0.1577],
          ...,
          [ 0.3385, -0.2502, -0.0390,  ...,  0.0921, -0.1438,  0.1577],
          [ 0.3385, -0.2502, -0.0390,  ...,  0.0921, -0.1438,  0.1577],
          [ 0.3385, -0.2502, -0.0390,  ...,  0.0921, -0.1438,  0.1577]],
 
         [[-0.1385,  0.3385,  0.2934,  ..., -0.1475,  0.2490,  0.0824],
          [-0.1385,  0.3385,  0.2934,  ..., -0.1475,  0.2490,  0.0824],
          [-0.1385,  0.3385,  0.2934,  ..., -0.1475,  0.2490,  0.0824],
          ...,
          [-0.1385,  0.3385,  0.2934,  ..., -0.1475,  0.2490,  0.0824],
          [-0.1385,  0.3385,  0.2934,  ..., -0.1475,  0.2490,  0.0824],
          [-0.1385

In [60]:
Discriminator = Time_GAN_module(input_size=hidden_dim, output_size=1, hidden_dim=hidden_dim, n_layers=n_layers, activation=torch.sigmoid)
Discriminator

Time_GAN_module(
  (rnn): GRU(24, 24, num_layers=3, batch_first=True)
  (fc): Linear(in_features=24, out_features=1, bias=True)
)

In [None]:
test = next(iter(loader))
test.shape

In [66]:
D_loss_real(Y_real, torch.ones_like(Y_real))

tensor(0.0023, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)

In [None]:
# outline of original training loop
"""print('Start Joint Training')

for e in range(epoch): 

    for batch_index, X in enumerate(loader):
        
        random_data = random_generator(batch_size=batch_size, z_dim=dim, 
                                       T_mb=extract_time(data)[0], max_seq_len=extract_time(data)[1])
        
        
        # Generator Training 
        ## Train Generator
        z = torch.tensor(random_data)
        z = z.float()
        
        e_hat, _ = Generator(z)
        e_hat = torch.reshape(e_hat, (batch_size, seq_len, hidden_dim))
        
        H_hat, _ = Supervisor(e_hat)
        H_hat = torch.reshape(H_hat, (batch_size, seq_len, hidden_dim))
        
        Y_fake = Discriminator(H_hat)
        Y_fake = torch.reshape(Y_fake, (batch_size, seq_len, 1))
        
        x_hat, _ = Recovery(H_hat)
        x_hat = torch.reshape(x_hat, (batch_size, seq_len, dim))
        
        
        Generator.zero_grad()
        Supervisor.zero_grad()
        Discriminator.zero_grad()
        Recovery.zero_grad()
        
        G_loss_U = binary_cross_entropy_loss(torch.ones_like(Y_fake), Y_fake)
        
        G_loss_V1 = torch.mean(torch.abs((torch.std(x_hat, [0], unbiased = False)) + 1e-6 - (torch.std(X, [0]) + 1e-6)))
        G_loss_V2 = torch.mean(torch.abs((torch.mean(x_hat, [0]) - (torch.mean(X, [0])))))
        G_loss_V = G_loss_V1 + G_loss_V2
        
 
        G_loss_U.backward(retain_graph=True)
        G_loss_V.backward()


        generator_optimizer.step()
        supervisor_optimizer.step()
        discriminator_optimizer.step()
        
        ## Train Embedder
        
        MSE_loss = nn.MSELoss()
        
        H, _ = Embedder(X.float())
        H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

        X_tilde, _ = Recovery(H)
        X_tilde = torch.reshape(X_tilde, (batch_size, seq_len, dim))

        E_loss0 = 10 * torch.sqrt(MSE_loss(X, X_tilde))  
        
        H_hat_supervise, _ = Supervisor(H)
        H_hat_supervise = torch.reshape(H_hat_supervise, (batch_size, seq_len, hidden_dim))  

        G_loss_S = MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:])
        E_loss = E_loss0  + 0.1 * G_loss_S
        
        G_loss_S.backward(retain_graph=True)
        E_loss.backward()
        
        Embedder.zero_grad()
        Recovery.zero_grad()
        Supervisor.zero_grad()
        
        embedder_optimizer.step()
        recovery_optimizer.step()
        supervisor_optimizer.step()
        
        # Train Discriminator 
        
        
        
        
        #if e in range(1,epoch) and batch_index == 0:
        print('step: '+ str(e) + '/' + str(epoch) + ', G_loss_U: ' + str(G_loss_U.detach().numpy()) + ', G_loss_S: ' + 
             str(G_loss_S.detach().numpy()) + ', E_loss_t0: ' + str(np.sqrt(E_loss0.detach().numpy()))
             )
        
        
 



    


        

print('Finish Joint Training')"""

# Exps

In [18]:
class Discriminator(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(Discriminator, self).__init__()
        self.gru = nn.GRU(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x, seq_lengths):
        packed_input = nn.utils.rnn.pack_padded_sequence(x, seq_lengths, batch_first=True, enforce_sorted=False)
        _, h_n = self.gru(packed_input)
        logits = self.fc(h_n[-1])
        y_hat = self.sigmoid(logits)
        return logits, y_hat

In [19]:
hidden_dim = 3
data_dim = 6
disc = Discriminator(input_dim=data_dim, hidden_dim=hidden_dim)

In [34]:
random_data = random_generator(batch_size=3661, z_dim=dim, 
                                T_mb=extract_time(data)[0], max_seq_len=extract_time(data)[1])
z = torch.tensor(random_data)
z = z.float()

e_hat, _ = Generator(z)
e_hat = torch.reshape(e_hat, (3661, seq_len, 24))

H_hat, _ = Supervisor(e_hat)
H_hat = torch.reshape(H_hat, (3661, seq_len, 24))

x_hat, _ = Recovery(H_hat)
x_hat = torch.reshape(x_hat, (3661, seq_len, dim))

In [46]:
from torch.utils.data import DataLoader, TensorDataset, random_split
def train_test_divide(dataX, dataX_hat, dataT, test_split=0.2):
    # Combine real and fake data
    data = np.concatenate((dataX, dataX_hat), axis=0)
    labels = np.concatenate((np.ones(len(dataX)), np.zeros(len(dataX_hat))), axis=0)
    lengths = np.concatenate((dataT, dataT), axis=0)  # Use same lengths for real and fake
    
    # Convert to tensors
    data_tensor = torch.tensor(data, dtype=torch.float32)
    labels_tensor = torch.tensor(labels, dtype=torch.float32)
    lengths_tensor = torch.tensor(lengths, dtype=torch.int64)
    
    # Create dataset
    dataset = TensorDataset(data_tensor, labels_tensor, lengths_tensor)
    
    # Split into train/test
    test_size = int(len(dataset) * test_split)
    train_size = len(dataset) - test_size
    train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
    
    return train_dataset, test_dataset

In [51]:
# Parameters
hidden_dim = max(int(data[0].shape[1] / 2), 1)
iterations = 100
batch_size = 128
learning_rate = 0.001

# Prepare data
No = len(data)
data_dim = data[0].shape[1]

# Compute sequence lengths
dataT = [len(seq) for seq in data]

# Get the maximum sequence length
Max_Seq_Len = max(dataT)

# Padding the sequences to the maximum length
dataX_padded = [np.pad(seq, ((0, Max_Seq_Len - len(seq)), (0, 0)), 'constant') for seq in data]
dataX_hat_padded = [np.pad(seq, ((0, Max_Seq_Len - len(seq)), (0, 0)), 'constant') for seq in x_hat.detach()]

# Divide into training and test sets
train_dataset, test_dataset = train_test_divide(dataX_padded, dataX_hat_padded, dataT)

# Dataloaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [52]:
discriminator = Discriminator(input_dim=data_dim, hidden_dim=hidden_dim)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(discriminator.parameters(), lr=0.001)

# Training
discriminator.train()
for epoch in tqdm(range(iterations)):
    for real_data, labels, seq_lengths in train_loader:
        # Forward pass
        logits, predictions = discriminator(real_data, seq_lengths)
        loss = criterion(logits.squeeze(), labels)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

                                                 

In [53]:
from sklearn.metrics import accuracy_score

In [54]:
# Testing
discriminator.eval()
y_pred = []
y_true = []

with torch.no_grad():
    for real_data, labels, seq_lengths in test_loader:
        logits, predictions = discriminator(real_data, seq_lengths)
        y_pred.extend(predictions.squeeze().cpu().numpy())
        y_true.extend(labels.cpu().numpy())

# Calculate accuracy
y_pred = np.array(y_pred)
y_true = np.array(y_true)
accuracy = accuracy_score(y_true, y_pred > 0.5)

# Discriminative score
disc_score = np.abs(0.5 - accuracy)

In [55]:
disc_score

0.4979508196721312