In [1]:
## Necessary packages
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import warnings
warnings.filterwarnings("ignore")

# 1. TimeGAN model
from timegan import timegan
# 2. Data loading
from data_loading import real_data_loading_sce1, sine_data_generation_sce1
# 3. Metrics
from metrics.discriminative_metrics import discriminative_score_metrics
from metrics.predictive_metrics import predictive_score_metrics
from metrics.visualization_metrics import visualization

import tensorflow as tf

import os, time

In [2]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES'] = "0"
config = tf.compat.v1.ConfigProto() # Another Version: config = tf.ConfigProto()
config.gpu_options.allow_growth = True

In [3]:
## Data loading
data_name = 'sines'
seq_len = 24
test_per = 0.05 # testing percentage: 5%

if data_name in ['stock', 'energy']:
    train_data, test_data = real_data_loading_sce1(data_name, seq_len, test_per)
elif data_name == 'sines':
    # Set number of samples and its dimensions
    no, dim = 10000, 5
    train_data, test_data = sine_data_generation_sce1(no, seq_len, dim, test_per)
    
print(data_name + ' dataset is ready.')

sines dataset is ready.


In [4]:
np.array(train_data).shape, np.array(test_data).shape

((9500, 24, 5), (500, 24, 5))

In [5]:
## Newtork parameters
parameters = dict()

parameters['module'] = 'gru' 
parameters['hidden_dim'] = 20
parameters['num_layer'] = 3
parameters['iterations'] = 5000
parameters['batch_size'] = 128

In [None]:
# Run TimeGAN
start = time.time()
generated_data = timegan(train_data, parameters)   
end = time.time()
print('Finish Synthetic Data Generation')
print(f"Runtime of the Generator is {end - start}")





Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons


2021-08-20 12:21:47.080668: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2021-08-20 12:21:47.129101: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1618] Found device 0 with properties: 
name: Tesla V100-PCIE-32GB major: 7 minor: 0 memoryClockRate(GHz): 1.38
pciBusID: 0000:3b:00.0
2021-08-20 12:21:47.129353: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.0
2021-08-20 12:21:47.130820: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0
2021-08-20 12:21:47.132294: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10.0
2021-08-20 12:21:47.132618: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10.0
2021-08-20 12:21:47.134380: I tensorflow/stream_executor/platfo

Start Embedding Network Training


2021-08-20 12:21:50.172343: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10.0


step: 0/5000, e_loss: 0.3296
step: 1000/5000, e_loss: 0.0434
step: 2000/5000, e_loss: 0.032
step: 3000/5000, e_loss: 0.0281
step: 4000/5000, e_loss: 0.0245
Finish Embedding Network Training
Start Training with Supervised Loss Only
step: 0/5000, s_loss: 0.1776
step: 1000/5000, s_loss: 0.019
step: 2000/5000, s_loss: 0.0139
step: 3000/5000, s_loss: 0.0108
step: 4000/5000, s_loss: 0.0088
Finish Training with Supervised Loss Only
Start Joint Training
step: 0/5000, d_loss: 2.0378, g_loss_u: 0.7408, g_loss_s: 0.0186, g_loss_v: 0.4587, e_loss_t0: 0.1236


In [None]:
filename = data_name + "_data_TimeGAN_5000_sce1-wTest.npy"
np.save(filename, generated_data)

In [None]:
generated_data.shape

In [None]:
generated_data[0][0]

In [None]:
# this is the discriminative scores that we need. 
metric_iteration = 5
discriminative_score = list()
time_usage = list()

for _ in range(metric_iteration):
    start = time.time()
    temp_disc = discriminative_score_metrics(test_data, generated_data)
    end = time.time()
    discriminative_score.append(temp_disc)
    time_usage.append(end-start)


print("the discriminative scores are:", discriminative_score)
print("the average value of predicted score: ", str(np.round(np.mean(discriminative_score), 4)))
print("the std value of predicted score: ", str(np.round(np.std(discriminative_score), 4)))
#================Time calculation==========================#
print('Time usage avg. value: ' + str(np.round(np.mean(time_usage), 4)))
print('Time usage std. value: ' + str(np.round(np.std(time_usage), 4)))

In [None]:
metric_iteration = 5

predictive_score = list()
time_usage = list()
for tt in range(metric_iteration):
    start = time.time()
    temp_pred = predictive_score_metrics(test_data, generated_data)
    end = time.time()
    predictive_score.append(temp_pred)
    time_usage.append(end-start)
    
print('Predictive score: ' + str(np.round(np.mean(predictive_score), 4)))

In [None]:
print("all the predictive scores: ", predictive_score)
print('Predictive score avg. value: ' + str(np.round(np.mean(predictive_score), 4)))
print('Predictive score std. value: ' + str(np.round(np.std(predictive_score), 4)))

#================Time calculation==========================#
print('Time usage avg. value: ' + str(np.round(np.mean(time_usage), 4)))
print('Time usage std. value: ' + str(np.round(np.std(time_usage), 4)))

In [None]:
#ori_data = np.append(train_data, test_data, axis=0) # append the train and test data together

In [None]:
visualization(train_data, generated_data, 'pca')
visualization(train_data, generated_data, 'tsne')