In [18]:
import numpy as np
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

# from data_loading import sine_data_generation
# from utils import random_generator
# from data_loading import MinMaxScaler

from torch.utils.data import DataLoader


# from utils import extract_time


In [19]:
def train_test_divide (data_x, data_x_hat, data_t, data_t_hat, train_rate = 0.8):
  """Divide train and test data for both original and synthetic data.
  
  Args:
    - data_x: original data
    - data_x_hat: generated data
    - data_t: original time
    - data_t_hat: generated time
    - train_rate: ratio of training data from the original data
  """
  # Divide train/test index (original data)
  no = len(data_x)
  idx = np.random.permutation(no)
  train_idx = idx[:int(no*train_rate)]
  test_idx = idx[int(no*train_rate):]
    
  train_x = [data_x[i] for i in train_idx]
  test_x = [data_x[i] for i in test_idx]
  train_t = [data_t[i] for i in train_idx]
  test_t = [data_t[i] for i in test_idx]      
    
  # Divide train/test index (synthetic data)
  no = len(data_x_hat)
  idx = np.random.permutation(no)
  train_idx = idx[:int(no*train_rate)]
  test_idx = idx[int(no*train_rate):]
  
  train_x_hat = [data_x_hat[i] for i in train_idx]
  test_x_hat = [data_x_hat[i] for i in test_idx]
  train_t_hat = [data_t_hat[i] for i in train_idx]
  test_t_hat = [data_t_hat[i] for i in test_idx]
  
  return train_x, train_x_hat, test_x, test_x_hat, train_t, train_t_hat, test_t, test_t_hat


def extract_time (data):
  """Returns Maximum sequence length and each sequence length.
  
  Args:
    - data: original data
    
  Returns:
    - time: extracted time information
    - max_seq_len: maximum sequence length
  """
  time = list()
  max_seq_len = 0
  for i in range(len(data)):
    max_seq_len = max(max_seq_len, len(data[i][:,0]))
    time.append(len(data[i][:,0]))
    
  return time, max_seq_len

def random_generator (batch_size, z_dim, T_mb, max_seq_len):
  """Random vector generation.
  
  Args:
    - batch_size: size of the random vector
    - z_dim: dimension of random vector
    - T_mb: time information for the random vector
    - max_seq_len: maximum sequence length
    
  Returns:
    - Z_mb: generated random vector
  """
  Z_mb = list()
  for i in range(batch_size):
    temp = np.zeros([max_seq_len, z_dim])
    temp_Z = np.random.uniform(0., 1, [T_mb[i], z_dim])
    temp[:T_mb[i],:] = temp_Z
    Z_mb.append(temp_Z)
  return Z_mb


def batch_generator(data, time, batch_size):
  """Mini-batch generator.
  
  Args:
    - data: time-series data
    - time: time information
    - batch_size: the number of samples in each batch
    
  Returns:
    - X_mb: time-series data in each batch
    - T_mb: time information in each batch
  """
  no = len(data)
  idx = np.random.permutation(no)
  train_idx = idx[:batch_size]     
            
  X_mb = list(data[i] for i in train_idx)
  T_mb = list(time[i] for i in train_idx)
  
  return X_mb, T_mb

In [20]:
def MinMaxScaler(data):
  """Min Max normalizer.
  
  Args:
    - data: original data
  
  Returns:
    - norm_data: normalized data
  """
  numerator = data - np.min(data, 0)
  denominator = np.max(data, 0) - np.min(data, 0)
  norm_data = numerator / (denominator + 1e-7)
  return norm_data


def sine_data_generation (no, seq_len, dim):
  """Sine data generation.
  
  Args:
    - no: the number of samples
    - seq_len: sequence length of the time-series
    - dim: feature dimensions
    
  Returns:
    - data: generated data
  """  
  # Initialize the output
  data = list()

  # Generate sine data
  for i in range(no):      
    # Initialize each time-series
    temp = list()
    # For each feature
    for k in range(dim):
      # Randomly drawn frequency and phase
      freq = np.random.uniform(0, 0.1)            
      phase = np.random.uniform(0, 0.1)
          
      # Generate sine signal based on the drawn frequency and phase
      temp_data = [np.sin(freq * j + phase) for j in range(seq_len)] 
      temp.append(temp_data)
        
    # Align row/column
    temp = np.transpose(np.asarray(temp))        
    # Normalize to [0,1]
    temp = (temp + 1)*0.5
    # Stack the generated data
    data.append(temp)
                
  return data
    

def real_data_loading (data_name, seq_len):
  """Load and preprocess real-world datasets.
  
  Args:
    - data_name: stock or energy
    - seq_len: sequence length
    
  Returns:
    - data: preprocessed data.
  """  
  assert data_name in ['stock','energy']
  
  if data_name == 'stock':
    ori_data = np.loadtxt('data/stock_data.csv', delimiter = ",",skiprows = 1)
  elif data_name == 'energy':
    ori_data = np.loadtxt('data/energy_data.csv', delimiter = ",",skiprows = 1)
        
  # Flip the data to make chronological data
  ori_data = ori_data[::-1]
  # Normalize the data
  ori_data = MinMaxScaler(ori_data)
    
  # Preprocess the dataset
  temp_data = []    
  # Cut data by sequence length
  for i in range(0, len(ori_data) - seq_len):
    _x = ori_data[i:i + seq_len]
    temp_data.append(_x)
        
  # Mix the datasets (to make it similar to i.i.d)
  idx = np.random.permutation(len(temp_data))    
  data = []
  for i in range(len(temp_data)):
    data.append(temp_data[idx[i]])
    
  return data

Define Class for Module Construction

In [21]:
class Time_GAN_module(nn.Module):
    """
    Class from which a module of the Time GAN Architecture can be constructed, 
    consisting of a n_layer stacked RNN layers and a fully connected layer
    
    input_size = dim of data (depending if module operates on latent or non-latent space)
    """
    def __init__(self, input_size, output_size, hidden_dim, n_layers, activation=torch.sigmoid):
        super(Time_GAN_module, self).__init__()

        # Parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.sigma = activation

        #Defining the layers
        # RNN Layer
        self.rnn = nn.GRU(input_size, hidden_dim, n_layers, batch_first=True)   
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_size)
        
    def forward(self, x):
    
            batch_size = x.size(0)

            # Initializing hidden state for first input using method defined below
            hidden = self.init_hidden(batch_size)

            # Passing in the input and hidden state into the model and obtaining outputs
            out, hidden = self.rnn(x, hidden)
        
            # Reshaping the outputs such that it can be fit into the fully connected layer
            out = out.contiguous().view(-1, self.hidden_dim)
            out = self.fc(out)
            
            if self.sigma == nn.Identity:
                idendity = nn.Identity()
                return idendity(out)
                
            out = self.sigma(out)
            
            # HIDDEN STATES WERDEN IN DER PAPER IMPLEMENTIERUNG AUCH COMPUTED, ALLERDINGS NICHT BENUTZT?
            
            return out, hidden
    
    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        # We'll send the tensor holding the hidden state to the device we specified earlier as well
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
        return hidden

Parameters

In [22]:
input_size = 5 # sequence length = number of features
output_size = 20
hidden_dim = 20
n_layers = 3
gamma = 1

no, seq_len, dim = 12800, 24, 5 

batch_size = 128
epoch = 100

Data Generation

In [23]:
data = sine_data_generation(no, seq_len, dim)
data = MinMaxScaler(data)
data = torch.Tensor(data)
data.shape

torch.Size([12800, 24, 5])

Create Modules

In [24]:
# embedder: num_layers = num_layers, fully_connected dim = hidden_dim
# recovery: num_layers = num_layers, fully_connected dim = dim 
# generator: num layers = num_layers, fully_connected dim = hidden_dim
# supervisor: num_layers = num_layers-1, fully_connected dim = hidden_dim
# discriminator: num_layers = num_layers, fully_connected dim = 1

In [25]:
Embedder = Time_GAN_module(input_size=dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=n_layers)
Embedder

Time_GAN_module(
  (rnn): GRU(5, 20, num_layers=3, batch_first=True)
  (fc): Linear(in_features=20, out_features=20, bias=True)
)

In [26]:
Recovery = Time_GAN_module(input_size=hidden_dim, output_size=dim, hidden_dim=hidden_dim, n_layers=n_layers)
Recovery

Time_GAN_module(
  (rnn): GRU(20, 20, num_layers=3, batch_first=True)
  (fc): Linear(in_features=20, out_features=5, bias=True)
)

In [27]:
Generator = Time_GAN_module(input_size=dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=n_layers)
Generator

Time_GAN_module(
  (rnn): GRU(5, 20, num_layers=3, batch_first=True)
  (fc): Linear(in_features=20, out_features=20, bias=True)
)

In [28]:
Supervisor = Time_GAN_module(input_size=hidden_dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=n_layers-1)
Supervisor

Time_GAN_module(
  (rnn): GRU(20, 20, num_layers=2, batch_first=True)
  (fc): Linear(in_features=20, out_features=20, bias=True)
)

In [29]:
Discriminator = Time_GAN_module(input_size=hidden_dim, output_size=1, hidden_dim=hidden_dim, n_layers=n_layers, 
                               activation=nn.Identity)
Discriminator

Time_GAN_module(
  (rnn): GRU(20, 20, num_layers=3, batch_first=True)
  (fc): Linear(in_features=20, out_features=1, bias=True)
)

Create Optimizers

In [30]:
embedder_optimizer = optim.Adam(Embedder.parameters(), lr=0.001)
recovery_optimizer = optim.Adam(Recovery.parameters(), lr=0.001)
supervisor_optimizer = optim.Adam(Recovery.parameters(), lr=0.001)
discriminator_optimizer = optim.Adam(Discriminator.parameters(), lr=0.001)
generator_optimizer = optim.Adam(Generator.parameters(), lr=0.001)

Data Loader

In [31]:
loader = DataLoader(data, batch_size, shuffle=True)
X = next(iter(loader))
H, _ = Embedder(X.float())
H_re = torch.reshape(H, (batch_size, seq_len, hidden_dim))
H.shape, H_re.shape

(torch.Size([3072, 20]), torch.Size([128, 24, 20]))

In [32]:
batch_size, seq_len, hidden_dim

(128, 24, 20)

Embedder Training

In [33]:
print('Start Embedding Network Training')

for e in range(epoch): 
    for batch_index, X in enumerate(loader):
        
        MSE_loss = nn.MSELoss()
        
        H, _ = Embedder(X.float())
        H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

        X_tilde, _ = Recovery(H)
        X_tilde = torch.reshape(X_tilde, (batch_size, seq_len, dim))

        E_loss0 = 10 * torch.sqrt(MSE_loss(X, X_tilde))  

        Embedder.zero_grad()
        Recovery.zero_grad()

        E_loss0.backward(retain_graph=True)

        embedder_optimizer.step()
        recovery_optimizer.step()

        if e in range(1,epoch) and batch_index == 0:
            print('step: '+ str(e) + '/' + str(epoch) + ', e_loss: ' + str(np.sqrt(E_loss0.detach().numpy())))

print('Finish Embedding Network Training')

Start Embedding Network Training
step: 1/100, e_loss: 1.6197354
step: 2/100, e_loss: 1.4922516
step: 3/100, e_loss: 1.3177843
step: 4/100, e_loss: 1.3521059
step: 5/100, e_loss: 1.2815592
step: 6/100, e_loss: 1.3027968
step: 7/100, e_loss: 1.2591631
step: 8/100, e_loss: 1.32466
step: 9/100, e_loss: 1.1807969
step: 10/100, e_loss: 1.1071249
step: 11/100, e_loss: 1.0591449
step: 12/100, e_loss: 1.0917394
step: 13/100, e_loss: 0.9606062
step: 14/100, e_loss: 0.99057525
step: 15/100, e_loss: 0.9419938
step: 16/100, e_loss: 0.69091374
step: 17/100, e_loss: 0.6154215
step: 18/100, e_loss: 0.57664984
step: 19/100, e_loss: 0.54816735
step: 20/100, e_loss: 0.5283633
step: 21/100, e_loss: 0.49599373
step: 22/100, e_loss: 0.4994968
step: 23/100, e_loss: 0.48997918
step: 24/100, e_loss: 0.483458
step: 25/100, e_loss: 0.4680167
step: 26/100, e_loss: 0.46107087
step: 27/100, e_loss: 0.4470878
step: 28/100, e_loss: 0.4443866
step: 29/100, e_loss: 0.43648893
step: 30/100, e_loss: 0.43595234
step: 31/1

Training with supervised Loss

In [34]:
print('Start Training with Supervised Loss Only')

for e in range(epoch): 
    for batch_index, X in enumerate(loader):

        H, _ = Embedder(X.float())
        H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

        H_hat_supervise, _ = Supervisor(H)
        H_hat_supervise = torch.reshape(H_hat_supervise, (batch_size, seq_len, hidden_dim))  

        G_loss_S = MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:])


        Embedder.zero_grad()
        Supervisor.zero_grad()

        G_loss_S.backward(retain_graph=True)

        embedder_optimizer.step()
        supervisor_optimizer.step()

        if e in range(1,epoch) and batch_index == 0:
            print('step: '+ str(e) + '/' + str(epoch) + ', s_loss: ' + str(np.sqrt(G_loss_S.detach().numpy())))

print('Finish Training with Supervised Loss Only')

Start Training with Supervised Loss Only
step: 1/100, s_loss: 0.10398547
step: 2/100, s_loss: 0.09963137
step: 3/100, s_loss: 0.095322266
step: 4/100, s_loss: 0.088691786
step: 5/100, s_loss: 0.08786065
step: 6/100, s_loss: 0.08571244
step: 7/100, s_loss: 0.07921417
step: 8/100, s_loss: 0.07805923
step: 9/100, s_loss: 0.07324201
step: 10/100, s_loss: 0.070104145
step: 11/100, s_loss: 0.06643666
step: 12/100, s_loss: 0.061148997
step: 13/100, s_loss: 0.059470728
step: 14/100, s_loss: 0.057204135
step: 15/100, s_loss: 0.05500602
step: 16/100, s_loss: 0.0514243
step: 17/100, s_loss: 0.047650483
step: 18/100, s_loss: 0.046424177
step: 19/100, s_loss: 0.043680355
step: 20/100, s_loss: 0.042070862
step: 21/100, s_loss: 0.03882782
step: 22/100, s_loss: 0.03869168
step: 23/100, s_loss: 0.037388377
step: 24/100, s_loss: 0.035496406
step: 25/100, s_loss: 0.033455666
step: 26/100, s_loss: 0.031500757
step: 27/100, s_loss: 0.032185104
step: 28/100, s_loss: 0.02985961
step: 29/100, s_loss: 0.027589

In [35]:
epoch = 2

In [36]:
random_data = random_generator(batch_size=batch_size, z_dim=dim, 
                                       T_mb=extract_time(data)[0], max_seq_len=extract_time(data)[1])

In [37]:
loader = DataLoader(data, batch_size, shuffle=True)

random_loader = DataLoader(random_data, batch_size, shuffle=True)

binary_cross_entropy_loss = nn.BCEWithLogitsLoss()

MSE_loss = nn.MSELoss()



In [38]:
X = next(iter(loader))
X.shape

torch.Size([128, 24, 5])

In [67]:
def TimeGAN(data, parameters):
  hidden_dim = parameters["hidden_dim"]
  num_layers = parameters["num_layers"]
  iterations = parameters["iterations"]
  batch_size = parameters["batch_size"]
  module = parameters["module"]
  epoch = parameters["epoch"]
  no, seq_len, dim = np.asarray(data).shape
  z_dim = dim
  gamma = 1

  Embedder = Time_GAN_module(input_size=z_dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=num_layers)
  Recovery = Time_GAN_module(input_size=hidden_dim, output_size=dim, hidden_dim=hidden_dim, n_layers=n_layers)
  Generator = Time_GAN_module(input_size=dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=n_layers)
  Supervisor = Time_GAN_module(input_size=hidden_dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=n_layers-1)
  Discriminator = Time_GAN_module(input_size=hidden_dim, output_size=1, hidden_dim=hidden_dim, n_layers=n_layers, activation=nn.Identity)

  embedder_optimizer = optim.Adam(Embedder.parameters(), lr=0.001)
  recovery_optimizer = optim.Adam(Recovery.parameters(), lr=0.001)
  supervisor_optimizer = optim.Adam(Recovery.parameters(), lr=0.001)
  discriminator_optimizer = optim.Adam(Discriminator.parameters(), lr=0.001)
  generator_optimizer = optim.Adam(Generator.parameters(), lr=0.001)

  # Embedding Network Training
  print('Start Embedding Network Training')
  for e in range(epoch): 
    for batch_index, X in enumerate(loader):
        
        MSE_loss = nn.MSELoss()
        
        H, _ = Embedder(X.float())
        H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

        X_tilde, _ = Recovery(H)
        X_tilde = torch.reshape(X_tilde, (batch_size, seq_len, dim))

        E_loss0 = 10 * torch.sqrt(MSE_loss(X, X_tilde))  

        Embedder.zero_grad()
        Recovery.zero_grad()

        E_loss0.backward(retain_graph=True)

        embedder_optimizer.step()
        recovery_optimizer.step()

        if e in range(1,epoch) and batch_index == 0:
            print('step: '+ str(e) + '/' + str(epoch) + ', e_loss: ' + str(np.sqrt(E_loss0.detach().numpy())))

  print('Finish Embedding Network Training')

  # Training only with supervised loss
  print('Start Training with Supervised Loss Only')
  for e in range(epoch): 
    for batch_index, X in enumerate(loader):

        H, _ = Embedder(X.float())
        H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

        H_hat_supervise, _ = Supervisor(H)
        H_hat_supervise = torch.reshape(H_hat_supervise, (batch_size, seq_len, hidden_dim))  

        G_loss_S = MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:])


        Embedder.zero_grad()
        Supervisor.zero_grad()

        G_loss_S.backward(retain_graph=True)

        embedder_optimizer.step()
        supervisor_optimizer.step()

        if e in range(1,epoch) and batch_index == 0:
            print('step: '+ str(e) + '/' + str(epoch) + ', s_loss: ' + str(np.sqrt(G_loss_S.detach().numpy())))

  print('Finish Training with Supervised Loss Only')
  # Joint Training
  print('Start Joint Training')
  for itt in range(epoch):
    for kk in range(2):
      X = next(iter(loader))
      random_data = random_generator(batch_size=batch_size, z_dim=dim, 
                                       T_mb=extract_time(data)[0], max_seq_len=extract_time(data)[1])
        
      # Generator Training 
      ## Train Generator
      z = torch.tensor(random_data)
      z = z.float()
        
      e_hat, _ = Generator(z)
      e_hat = torch.reshape(e_hat, (batch_size, seq_len, hidden_dim))
        
      H_hat, _ = Supervisor(e_hat)
      H_hat = torch.reshape(H_hat, (batch_size, seq_len, hidden_dim))
        
      Y_fake = Discriminator(H_hat)
      Y_fake = torch.reshape(Y_fake, (batch_size, seq_len, 1))
        
      x_hat, _ = Recovery(H_hat)
      x_hat = torch.reshape(x_hat, (batch_size, seq_len, dim))
        
      H, _ = Embedder(X.float())
      H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

      H_hat_supervise, _ = Supervisor(H)
      H_hat_supervise = torch.reshape(H_hat_supervise, (batch_size, seq_len, hidden_dim))

      Generator.zero_grad()
      Supervisor.zero_grad()
      Discriminator.zero_grad()
      Recovery.zero_grad()

      # line 267 of original implementation: 
      # G_loss_U, G_loss_S, G_loss_V
      G_loss_S = MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:])
        
      G_loss_U = binary_cross_entropy_loss(torch.ones_like(Y_fake), Y_fake)
        
      G_loss_V1 = torch.mean(torch.abs((torch.std(x_hat, [0], unbiased = False)) + 1e-6 - (torch.std(X, [0]) + 1e-6)))
      G_loss_V2 = torch.mean(torch.abs((torch.mean(x_hat, [0]) - (torch.mean(X, [0])))))
      G_loss_V = G_loss_V1 + G_loss_V2
        
      # doing a backward step for each loss should result in gradients accumulating 
      # so we should be able to optimize them jointly
      G_loss_S.backward(retain_graph=True)#
      G_loss_U.backward(retain_graph=True)
      G_loss_V.backward(retain_graph=True)#


      generator_optimizer.step()
      supervisor_optimizer.step()
      discriminator_optimizer.step()
      # Train Embedder 
      ## line 270: we only optimize E_loss_T0
      ## E_loss_T0 = just mse of x and x_tilde
      # but it calls E_solver which optimizes E_loss, which is a sum of 
      # E_loss0 and 0.1* G_loss_S
      MSE_loss = nn.MSELoss()
        
      H, _ = Embedder(X.float())
      H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

      X_tilde, _ = Recovery(H)
      X_tilde = torch.reshape(X_tilde, (batch_size, seq_len, dim))

      E_loss_T0 = MSE_loss(X, X_tilde)
      E_loss0 = 10 * torch.sqrt(MSE_loss(X, X_tilde))  
        
      H_hat_supervise, _ = Supervisor(H)
      H_hat_supervise = torch.reshape(H_hat_supervise, (batch_size, seq_len, hidden_dim))  

      G_loss_S = MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:])
      E_loss = E_loss0  + 0.1 * G_loss_S
        
      G_loss_S.backward(retain_graph=True)
      E_loss_T0.backward()
        
      Embedder.zero_grad()
      Recovery.zero_grad()
      Supervisor.zero_grad()
        
      embedder_optimizer.step()
      recovery_optimizer.step()
      supervisor_optimizer.step()
    # train Discriminator
    for batch_index, X in enumerate(loader):
      random_data = random_generator(batch_size=batch_size, z_dim=dim, 
                                       T_mb=extract_time(data)[0], max_seq_len=extract_time(data)[1])
      
      z = torch.tensor(random_data)
      z = z.float()

      H, _ = Embedder(X)
      H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

      Y_real = Discriminator(H)
      Y_real = torch.reshape(Y_real, (batch_size, seq_len, 1))
      
      e_hat, _ = Generator(z)
      e_hat = torch.reshape(e_hat, (batch_size, seq_len, hidden_dim))

      Y_fake_e = Discriminator(e_hat)
      Y_fake_e = torch.reshape(Y_fake_e, (batch_size, seq_len, 1))
        
      H_hat, _ = Supervisor(e_hat)
      H_hat = torch.reshape(H_hat, (batch_size, seq_len, hidden_dim))
        
      Y_fake = Discriminator(H_hat)
      Y_fake = torch.reshape(Y_fake, (batch_size, seq_len, 1))
        
      x_hat, _ = Recovery(H_hat)
      x_hat = torch.reshape(x_hat, (batch_size, seq_len, dim))

      Generator.zero_grad()
      Supervisor.zero_grad()
      Discriminator.zero_grad()
      Recovery.zero_grad()
      Embedder.zero_grad()

      # logits first, then targets
      # D_loss_real(Y_real, torch.ones_like(Y_real))
      D_loss_real = nn.BCEWithLogitsLoss()
      DLR = D_loss_real(Y_real, torch.ones_like(Y_real))

      D_loss_fake = nn.BCEWithLogitsLoss()
      DLF = D_loss_fake(Y_fake, torch.zeros_like(Y_fake))

      D_loss_fake_e = nn.BCEWithLogitsLoss()
      DLF_e = D_loss_fake_e(Y_fake_e, torch.zeros_like(Y_fake_e))

      D_loss = DLR + DLF + gamma * DLF_e

      # D_loss.backward(retain_graph=True)
      
      # discriminator_optimizer.step()

      # check discriminator loss before updating
      check_d_loss = D_loss
      if (check_d_loss > 0.15):
        D_loss.backward(retain_graph=True)
        discriminator_optimizer.step()
      
      print('step: '+ str(e) + '/' + str(epoch) + 
            ', D_loss: ' + str(D_loss.detach().numpy()) +
            ', G_loss_U: ' + str(G_loss_U.detach().numpy()) + 
            ', G_loss_S: ' + str(G_loss_S.detach().numpy()) + 
            ', E_loss_t0: ' + str(np.sqrt(E_loss0.detach().numpy()))
             )
  print('Finish Joint Training')

In [68]:
parameters = dict()
parameters['module'] = 'gru' 
parameters['hidden_dim'] = 24
parameters['num_layers'] = 3
parameters['iterations'] = 10000
parameters['batch_size'] = 128
parameters['epoch'] = 100

In [69]:
TimeGAN(data, parameters)

Start Embedding Network Training
step: 1/100, e_loss: 1.6255708
step: 2/100, e_loss: 1.4654692
step: 3/100, e_loss: 1.3650312
step: 4/100, e_loss: 1.3015138
step: 5/100, e_loss: 1.2590116
step: 6/100, e_loss: 1.2514571
step: 7/100, e_loss: 1.1545329
step: 8/100, e_loss: 1.0581712
step: 9/100, e_loss: 1.0139271
step: 10/100, e_loss: 0.99184287
step: 11/100, e_loss: 0.9825848
step: 12/100, e_loss: 0.7539981
step: 13/100, e_loss: 0.64375967
step: 14/100, e_loss: 0.57008266
step: 15/100, e_loss: 0.5210716
step: 16/100, e_loss: 0.4996004
step: 17/100, e_loss: 0.4849279
step: 18/100, e_loss: 0.46555084
step: 19/100, e_loss: 0.47098878
step: 20/100, e_loss: 0.44210142
step: 21/100, e_loss: 0.44026056
step: 22/100, e_loss: 0.42776528
step: 23/100, e_loss: 0.42436484
step: 24/100, e_loss: 0.40902647
step: 25/100, e_loss: 0.4068279
step: 26/100, e_loss: 0.3989191
step: 27/100, e_loss: 0.39009485
step: 28/100, e_loss: 0.3912966
step: 29/100, e_loss: 0.40551588
step: 30/100, e_loss: 0.40117383
ste

KeyboardInterrupt: ignored

In [43]:
  hidden_dim = parameters["hidden_dim"]
  num_layers = parameters["num_layers"]
  iterations = parameters["iterations"]
  batch_size = parameters["batch_size"]
  module = parameters["module"]
  epoch = parameters["epoch"]
  no, seq_len, dim = np.asarray(data).shape
  z_dim = dim
  gamma = 1

  Embedder = Time_GAN_module(input_size=z_dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=num_layers)
  Recovery = Time_GAN_module(input_size=hidden_dim, output_size=dim, hidden_dim=hidden_dim, n_layers=n_layers)
  Generator = Time_GAN_module(input_size=dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=n_layers)
  Supervisor = Time_GAN_module(input_size=hidden_dim, output_size=hidden_dim, hidden_dim=hidden_dim, n_layers=n_layers-1)
  Discriminator = Time_GAN_module(input_size=hidden_dim, output_size=1, hidden_dim=hidden_dim, n_layers=n_layers, activation=nn.Identity)

  embedder_optimizer = optim.Adam(Embedder.parameters(), lr=0.001)
  recovery_optimizer = optim.Adam(Recovery.parameters(), lr=0.001)
  supervisor_optimizer = optim.Adam(Recovery.parameters(), lr=0.001)
  discriminator_optimizer = optim.Adam(Discriminator.parameters(), lr=0.001)
  generator_optimizer = optim.Adam(Generator.parameters(), lr=0.001)

  # Embedding Network Training
  print('Start Embedding Network Training')
  for e in range(epoch): 
    for batch_index, X in enumerate(loader):
        
        MSE_loss = nn.MSELoss()
        
        H, _ = Embedder(X.float())
        H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

        X_tilde, _ = Recovery(H)
        X_tilde = torch.reshape(X_tilde, (batch_size, seq_len, dim))

        E_loss0 = 10 * torch.sqrt(MSE_loss(X, X_tilde))  

        Embedder.zero_grad()
        Recovery.zero_grad()

        E_loss0.backward(retain_graph=True)

        embedder_optimizer.step()
        recovery_optimizer.step()

        if e in range(1,epoch) and batch_index == 0:
            print('step: '+ str(e) + '/' + str(epoch) + ', e_loss: ' + str(np.sqrt(E_loss0.detach().numpy())))

  print('Finish Embedding Network Training')

  # Training only with supervised loss
  print('Start Training with Supervised Loss Only')
  for e in range(epoch): 
    for batch_index, X in enumerate(loader):

        H, _ = Embedder(X.float())
        H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

        H_hat_supervise, _ = Supervisor(H)
        H_hat_supervise = torch.reshape(H_hat_supervise, (batch_size, seq_len, hidden_dim))  

        G_loss_S = MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:])


        Embedder.zero_grad()
        Supervisor.zero_grad()

        G_loss_S.backward(retain_graph=True)

        embedder_optimizer.step()
        supervisor_optimizer.step()

        if e in range(1,epoch) and batch_index == 0:
            print('step: '+ str(e) + '/' + str(epoch) + ', s_loss: ' + str(np.sqrt(G_loss_S.detach().numpy())))

  print('Finish Training with Supervised Loss Only')

Start Embedding Network Training
step: 1/100, e_loss: 1.6018305
step: 2/100, e_loss: 1.55982
step: 3/100, e_loss: 1.4912727
step: 4/100, e_loss: 1.4599437
step: 5/100, e_loss: 1.3488882
step: 6/100, e_loss: 1.2394172
step: 7/100, e_loss: 1.2484871
step: 8/100, e_loss: 1.2529289
step: 9/100, e_loss: 1.2284856
step: 10/100, e_loss: 1.1609265
step: 11/100, e_loss: 1.0964484
step: 12/100, e_loss: 0.8498254
step: 13/100, e_loss: 0.68953836
step: 14/100, e_loss: 0.6233022
step: 15/100, e_loss: 0.5515359
step: 16/100, e_loss: 0.54300267
step: 17/100, e_loss: 0.51206124
step: 18/100, e_loss: 0.48474026
step: 19/100, e_loss: 0.4790923
step: 20/100, e_loss: 0.47527474
step: 21/100, e_loss: 0.45888883
step: 22/100, e_loss: 0.4562261
step: 23/100, e_loss: 0.44347155
step: 24/100, e_loss: 0.42158148
step: 25/100, e_loss: 0.42365658
step: 26/100, e_loss: 0.41077211
step: 27/100, e_loss: 0.41154054
step: 28/100, e_loss: 0.4007569
step: 29/100, e_loss: 0.4030361
step: 30/100, e_loss: 0.394878
step: 31

In [45]:
  print('Start Joint Training')
  for itt in range(epoch):
    for batch_index, X in enumerate(loader):
      # X = next(iter(loader))
      random_data = random_generator(batch_size=batch_size, z_dim=dim, 
                                       T_mb=extract_time(data)[0], max_seq_len=extract_time(data)[1])
        
      # Generator Training 
      ## Train Generator
      z = torch.tensor(random_data)
      z = z.float()
        
      e_hat, _ = Generator(z)
      e_hat = torch.reshape(e_hat, (batch_size, seq_len, hidden_dim))
        
      H_hat, _ = Supervisor(e_hat)
      H_hat = torch.reshape(H_hat, (batch_size, seq_len, hidden_dim))
        
      Y_fake = Discriminator(H_hat)
      Y_fake = torch.reshape(Y_fake, (batch_size, seq_len, 1))
        
      x_hat, _ = Recovery(H_hat)
      x_hat = torch.reshape(x_hat, (batch_size, seq_len, dim))
        
      H, _ = Embedder(X.float())
      H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

      H_hat_supervise, _ = Supervisor(H)
      H_hat_supervise = torch.reshape(H_hat_supervise, (batch_size, seq_len, hidden_dim))

      Generator.zero_grad()
      Supervisor.zero_grad()
      Discriminator.zero_grad()
      Recovery.zero_grad()

      # line 267 of original implementation: 
      # G_loss_U, G_loss_S, G_loss_V
      G_loss_S = MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:])
        
      G_loss_U = binary_cross_entropy_loss(torch.ones_like(Y_fake), Y_fake)
        
      G_loss_V1 = torch.mean(torch.abs((torch.std(x_hat, [0], unbiased = False)) + 1e-6 - (torch.std(X, [0]) + 1e-6)))
      G_loss_V2 = torch.mean(torch.abs((torch.mean(x_hat, [0]) - (torch.mean(X, [0])))))
      G_loss_V = G_loss_V1 + G_loss_V2
        
      # doing a backward step for each loss should result in gradients accumulating 
      # so we should be able to optimize them jointly
      G_loss_S.backward(retain_graph=True)#
      G_loss_U.backward(retain_graph=True)
      G_loss_V.backward(retain_graph=True)#


      generator_optimizer.step()
      supervisor_optimizer.step()
      discriminator_optimizer.step()
      # Train Embedder 
      ## line 270: we only optimize E_loss_T0
      ## E_loss_T0 = just mse of x and x_tilde
      # but it calls E_solver which optimizes E_loss, which is a sum of 
      # E_loss0 and 0.1* G_loss_S
      MSE_loss = nn.MSELoss()
        
      H, _ = Embedder(X.float())
      H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

      X_tilde, _ = Recovery(H)
      X_tilde = torch.reshape(X_tilde, (batch_size, seq_len, dim))

      E_loss_T0 = MSE_loss(X, X_tilde)
      E_loss0 = 10 * torch.sqrt(MSE_loss(X, X_tilde))  
        
      H_hat_supervise, _ = Supervisor(H)
      H_hat_supervise = torch.reshape(H_hat_supervise, (batch_size, seq_len, hidden_dim))  

      G_loss_S = MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:])
      E_loss = E_loss0  + 0.1 * G_loss_S
        
      G_loss_S.backward(retain_graph=True)
      E_loss_T0.backward()
        
      Embedder.zero_grad()
      Recovery.zero_grad()
      Supervisor.zero_grad()
        
      embedder_optimizer.step()
      recovery_optimizer.step()
      supervisor_optimizer.step()
    # train Discriminator
    for batch_index, X in enumerate(loader):
      random_data = random_generator(batch_size=batch_size, z_dim=dim, 
                                       T_mb=extract_time(data)[0], max_seq_len=extract_time(data)[1])
      
      z = torch.tensor(random_data)
      z = z.float()

      H, _ = Embedder(X)
      H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

      Y_real = Discriminator(H)
      Y_real = torch.reshape(Y_real, (batch_size, seq_len, 1))
      
      e_hat, _ = Generator(z)
      e_hat = torch.reshape(e_hat, (batch_size, seq_len, hidden_dim))

      Y_fake_e = Discriminator(e_hat)
      Y_fake_e = torch.reshape(Y_fake_e, (batch_size, seq_len, 1))
        
      H_hat, _ = Supervisor(e_hat)
      H_hat = torch.reshape(H_hat, (batch_size, seq_len, hidden_dim))
        
      Y_fake = Discriminator(H_hat)
      Y_fake = torch.reshape(Y_fake, (batch_size, seq_len, 1))
        
      x_hat, _ = Recovery(H_hat)
      x_hat = torch.reshape(x_hat, (batch_size, seq_len, dim))

      Generator.zero_grad()
      Supervisor.zero_grad()
      Discriminator.zero_grad()
      Recovery.zero_grad()
      Embedder.zero_grad()

      D_loss_real = nn.BCEWithLogitsLoss()
      # i think we need the order: logits, targets 
      # https://discuss.pytorch.org/t/v1-0-1-nn-bcewithlogitsloss-returns-negative-loss-sigmoid-layer-not-deployed/57409/3
      DLR = D_loss_real(Y_real, torch.ones_like(Y_real))
      D_loss_fake = nn.BCEWithLogitsLoss()
      DLF = D_loss_fake(torch.zeros_like(Y_fake), Y_fake)
      D_loss_fake_e = nn.BCEWithLogitsLoss()
      DLF_e = D_loss_fake_e(torch.zeros_like(Y_fake_e), Y_fake_e)
      D_loss = DLR + DLF + gamma * DLF_e

      D_loss.backward(retain_graph=True)
      
      discriminator_optimizer.step()

      # check discriminator loss before updating
      #check_d_loss = D_loss
      #if (check_d_loss > 0.15):
      #  D_loss.backward(retain_graph=True)
      #  discriminator_optimizer.step()
      
      print('step: '+ str(e) + '/' + str(epoch) + 
            ', D_loss: ' + str(D_loss.detach().numpy()) +
            ', G_loss_U: ' + str(G_loss_U.detach().numpy()) + 
            ', G_loss_S: ' + str(G_loss_S.detach().numpy()) + 
            ', E_loss_t0: ' + str(np.sqrt(E_loss0.detach().numpy()))
             )
  print('Finish Joint Training')

Start Joint Training
step: 99/100, D_loss: -2.7969604, G_loss_U: -4.151405, G_loss_S: 9.272844e-06, E_loss_t0: 2.4685705
step: 99/100, D_loss: -2.8291883, G_loss_U: -4.151405, G_loss_S: 9.272844e-06, E_loss_t0: 2.4685705
step: 99/100, D_loss: -2.8611054, G_loss_U: -4.151405, G_loss_S: 9.272844e-06, E_loss_t0: 2.4685705
step: 99/100, D_loss: -2.8930597, G_loss_U: -4.151405, G_loss_S: 9.272844e-06, E_loss_t0: 2.4685705
step: 99/100, D_loss: -2.9248323, G_loss_U: -4.151405, G_loss_S: 9.272844e-06, E_loss_t0: 2.4685705
step: 99/100, D_loss: -2.956499, G_loss_U: -4.151405, G_loss_S: 9.272844e-06, E_loss_t0: 2.4685705
step: 99/100, D_loss: -2.9881005, G_loss_U: -4.151405, G_loss_S: 9.272844e-06, E_loss_t0: 2.4685705
step: 99/100, D_loss: -3.0196328, G_loss_U: -4.151405, G_loss_S: 9.272844e-06, E_loss_t0: 2.4685705
step: 99/100, D_loss: -3.0511103, G_loss_U: -4.151405, G_loss_S: 9.272844e-06, E_loss_t0: 2.4685705
step: 99/100, D_loss: -3.0824876, G_loss_U: -4.151405, G_loss_S: 9.272844e-06, E

KeyboardInterrupt: ignored

In [47]:
      X = next(iter(loader))
      random_data = random_generator(batch_size=batch_size, z_dim=dim, 
                                       T_mb=extract_time(data)[0], max_seq_len=extract_time(data)[1])
        
      # Generator Training 
      ## Train Generator
      z = torch.tensor(random_data)
      z = z.float()
        
      e_hat, _ = Generator(z)
      e_hat = torch.reshape(e_hat, (batch_size, seq_len, hidden_dim))
        
      H_hat, _ = Supervisor(e_hat)
      H_hat = torch.reshape(H_hat, (batch_size, seq_len, hidden_dim))
        
      Y_fake = Discriminator(H_hat)
      Y_fake = torch.reshape(Y_fake, (batch_size, seq_len, 1))
        
      x_hat, _ = Recovery(H_hat)
      x_hat = torch.reshape(x_hat, (batch_size, seq_len, dim))
        
      H, _ = Embedder(X.float())
      H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

      H_hat_supervise, _ = Supervisor(H)
      H_hat_supervise = torch.reshape(H_hat_supervise, (batch_size, seq_len, hidden_dim))

      Generator.zero_grad()
      Supervisor.zero_grad()
      Discriminator.zero_grad()
      Recovery.zero_grad()

      # line 267 of original implementation: 
      # G_loss_U, G_loss_S, G_loss_V
      G_loss_S = MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:])
      print(G_loss_S)
      print(Y_fake.shape)
      # G_loss_U returns negative values? 
      G_loss_U = binary_cross_entropy_loss(torch.ones_like(Y_fake), Y_fake)
      print(G_loss_U)
      G_loss_V1 = torch.mean(torch.abs((torch.std(x_hat, [0], unbiased = False)) + 1e-6 - (torch.std(X, [0]) + 1e-6)))
      G_loss_V2 = torch.mean(torch.abs((torch.mean(x_hat, [0]) - (torch.mean(X, [0])))))
      G_loss_V = G_loss_V1 + G_loss_V2
      print(G_loss_V)
      # doing a backward step for each loss should result in gradients accumulating 
      # so we should be able to optimize them jointly
      G_loss_S.backward(retain_graph=True)#
      G_loss_U.backward(retain_graph=True)
      G_loss_V.backward(retain_graph=True)#


      generator_optimizer.step()
      supervisor_optimizer.step()
      discriminator_optimizer.step()
      

tensor(9.9414e-06, grad_fn=<MseLossBackward>)
torch.Size([128, 24, 1])
tensor(-4.8705, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
tensor(0.7864, grad_fn=<AddBackward0>)


In [61]:
# Discriminator outputs values larger than 1
Discriminator(H_hat)

(tensor([[0.4977],
         [0.5011],
         [0.5013],
         ...,
         [0.4923],
         [0.4923],
         [0.4923]], grad_fn=<SigmoidBackward>),
 tensor([[[ 0.3385, -0.2502, -0.0390,  ...,  0.0921, -0.1438,  0.1577],
          [ 0.3385, -0.2502, -0.0390,  ...,  0.0921, -0.1438,  0.1577],
          [ 0.3385, -0.2502, -0.0390,  ...,  0.0921, -0.1438,  0.1577],
          ...,
          [ 0.3385, -0.2502, -0.0390,  ...,  0.0921, -0.1438,  0.1577],
          [ 0.3385, -0.2502, -0.0390,  ...,  0.0921, -0.1438,  0.1577],
          [ 0.3385, -0.2502, -0.0390,  ...,  0.0921, -0.1438,  0.1577]],
 
         [[-0.1385,  0.3385,  0.2934,  ..., -0.1475,  0.2490,  0.0824],
          [-0.1385,  0.3385,  0.2934,  ..., -0.1475,  0.2490,  0.0824],
          [-0.1385,  0.3385,  0.2934,  ..., -0.1475,  0.2490,  0.0824],
          ...,
          [-0.1385,  0.3385,  0.2934,  ..., -0.1475,  0.2490,  0.0824],
          [-0.1385,  0.3385,  0.2934,  ..., -0.1475,  0.2490,  0.0824],
          [-0.1385

In [60]:
Discriminator = Time_GAN_module(input_size=hidden_dim, output_size=1, hidden_dim=hidden_dim, n_layers=n_layers, activation=torch.sigmoid)
Discriminator

Time_GAN_module(
  (rnn): GRU(24, 24, num_layers=3, batch_first=True)
  (fc): Linear(in_features=24, out_features=1, bias=True)
)

In [None]:
test = next(iter(loader))
test.shape

In [66]:
D_loss_real(Y_real, torch.ones_like(Y_real))

tensor(0.0023, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)

In [None]:
# outline of original training loop
"""print('Start Joint Training')

for e in range(epoch): 

    for batch_index, X in enumerate(loader):
        
        random_data = random_generator(batch_size=batch_size, z_dim=dim, 
                                       T_mb=extract_time(data)[0], max_seq_len=extract_time(data)[1])
        
        
        # Generator Training 
        ## Train Generator
        z = torch.tensor(random_data)
        z = z.float()
        
        e_hat, _ = Generator(z)
        e_hat = torch.reshape(e_hat, (batch_size, seq_len, hidden_dim))
        
        H_hat, _ = Supervisor(e_hat)
        H_hat = torch.reshape(H_hat, (batch_size, seq_len, hidden_dim))
        
        Y_fake = Discriminator(H_hat)
        Y_fake = torch.reshape(Y_fake, (batch_size, seq_len, 1))
        
        x_hat, _ = Recovery(H_hat)
        x_hat = torch.reshape(x_hat, (batch_size, seq_len, dim))
        
        
        Generator.zero_grad()
        Supervisor.zero_grad()
        Discriminator.zero_grad()
        Recovery.zero_grad()
        
        G_loss_U = binary_cross_entropy_loss(torch.ones_like(Y_fake), Y_fake)
        
        G_loss_V1 = torch.mean(torch.abs((torch.std(x_hat, [0], unbiased = False)) + 1e-6 - (torch.std(X, [0]) + 1e-6)))
        G_loss_V2 = torch.mean(torch.abs((torch.mean(x_hat, [0]) - (torch.mean(X, [0])))))
        G_loss_V = G_loss_V1 + G_loss_V2
        
 
        G_loss_U.backward(retain_graph=True)
        G_loss_V.backward()


        generator_optimizer.step()
        supervisor_optimizer.step()
        discriminator_optimizer.step()
        
        ## Train Embedder
        
        MSE_loss = nn.MSELoss()
        
        H, _ = Embedder(X.float())
        H = torch.reshape(H, (batch_size, seq_len, hidden_dim))

        X_tilde, _ = Recovery(H)
        X_tilde = torch.reshape(X_tilde, (batch_size, seq_len, dim))

        E_loss0 = 10 * torch.sqrt(MSE_loss(X, X_tilde))  
        
        H_hat_supervise, _ = Supervisor(H)
        H_hat_supervise = torch.reshape(H_hat_supervise, (batch_size, seq_len, hidden_dim))  

        G_loss_S = MSE_loss(H[:,1:,:], H_hat_supervise[:,:-1,:])
        E_loss = E_loss0  + 0.1 * G_loss_S
        
        G_loss_S.backward(retain_graph=True)
        E_loss.backward()
        
        Embedder.zero_grad()
        Recovery.zero_grad()
        Supervisor.zero_grad()
        
        embedder_optimizer.step()
        recovery_optimizer.step()
        supervisor_optimizer.step()
        
        # Train Discriminator 
        
        
        
        
        #if e in range(1,epoch) and batch_index == 0:
        print('step: '+ str(e) + '/' + str(epoch) + ', G_loss_U: ' + str(G_loss_U.detach().numpy()) + ', G_loss_S: ' + 
             str(G_loss_S.detach().numpy()) + ', E_loss_t0: ' + str(np.sqrt(E_loss0.detach().numpy()))
             )
        
        
 



    


        

print('Finish Joint Training')"""