In [111]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 15962041257352351903
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 16591054485076167791
physical_device_desc: "device: XLA_GPU device"
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 4424036384668240335
physical_device_desc: "device: XLA_CPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 11276946637
locality {
  bus_id: 1
  links {
  }
}
incarnation: 15242822112926518825
physical_device_desc: "device: 0, name: Tesla K80, pci bus id: 0000:00:04.0, compute capability: 3.7"
]


In [36]:
# Imports
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, LSTM
from tensorflow.keras.models import Model, Sequential
from sklearn.model_selection import train_test_split
import time
from math import log10, floor

In [86]:
# Define variables
n_vars = 3
var_ids = list(range(n_vars))
var_names = ['var' + str(i) for i in var_ids]
var_weights = [0.1, 0.6, 0.3] # variable distribution of mock data
n_time_steps = 20
n_individuals = 10000

In [109]:
# Helper function(s)

# round a number to n significant digits
def round_to_n(x, n = 2):
    return round(x, -int(floor(log10(abs(x)))) + (n - 1)) if x != 0 else 0

def visualize_output(generator, z, n=2):
    p = np.reshape(generator.predict(z), (n_time_steps, n_vars))
    p.shape
    for t in range(p.shape[0]):
        tmp = []
        for f in range(p.shape[1]):
            tmp.append(round_to_n(p[t,f], n))
        print(tmp)

In [88]:
# Generate mock data

data = np.zeros(shape=(n_individuals, n_time_steps, n_vars))

start_time = time.time()

for indv in range(n_individuals):
    for t in range(n_time_steps):
        var = np.random.choice(var_ids, p=var_weights)
        data[indv][t][var] = 1
        
print('time taken:', round_to_n(time.time() - start_time), 'seconds')
        
#print(data)

time taken: 5.1 seconds


In [89]:
indv_sum = np.sum(data, axis=0)
indv_sum / n_individuals

array([[0.1009, 0.5885, 0.3106],
       [0.1012, 0.5952, 0.3036],
       [0.105 , 0.5953, 0.2997],
       [0.1007, 0.6035, 0.2958],
       [0.0968, 0.6109, 0.2923],
       [0.1011, 0.599 , 0.2999],
       [0.0952, 0.602 , 0.3028],
       [0.0997, 0.6086, 0.2917],
       [0.0982, 0.6037, 0.2981],
       [0.1039, 0.6   , 0.2961],
       [0.1017, 0.6059, 0.2924],
       [0.0988, 0.5932, 0.308 ],
       [0.0977, 0.607 , 0.2953],
       [0.0998, 0.5988, 0.3014],
       [0.1031, 0.5957, 0.3012],
       [0.0994, 0.6056, 0.295 ],
       [0.0994, 0.5891, 0.3115],
       [0.1011, 0.602 , 0.2969],
       [0.0985, 0.5967, 0.3048],
       [0.1044, 0.601 , 0.2946]])

In [90]:
# Divide the data into train and test sets 

data_train, data_test = train_test_split(data, test_size = 0.2)
print(data_train.shape, data_test.shape)

(8000, 20, 3) (2000, 20, 3)


In [91]:
# Define the discriminator

inp_d = Input(shape=(n_time_steps, n_vars))
lstm_d = LSTM(1, return_sequences=True)(inp_d)
out_d = Dense(1, activation='sigmoid')(lstm_d)
discriminator = Model(inp_d, out_d, name='discriminator')
discriminator.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_16 (InputLayer)        (None, 20, 3)             0         
_________________________________________________________________
lstm_16 (LSTM)               (None, 20, 1)             20        
_________________________________________________________________
dense_16 (Dense)             (None, 20, 1)             2         
Total params: 22
Trainable params: 22
Non-trainable params: 0
_________________________________________________________________


In [92]:
x = data_train[0]
discriminator.predict(np.reshape(x, (1, n_time_steps, n_vars))).shape

(1, 20, 1)

In [93]:
# Define the generator

noise_length = 5

inp_g = Input(shape=(n_time_steps, noise_length))
lstm_g = LSTM(3, return_sequences=True)(inp_g)
out_g = Dense(3, activation='softmax')(lstm_g)
generator = Model(inp_g, out_g, name='generator')
generator.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_17 (InputLayer)        (None, 20, 5)             0         
_________________________________________________________________
lstm_17 (LSTM)               (None, 20, 3)             108       
_________________________________________________________________
dense_17 (Dense)             (None, 20, 3)             12        
Total params: 120
Trainable params: 120
Non-trainable params: 0
_________________________________________________________________


In [94]:
z = np.random.normal(scale = 0.5, size = (1, n_time_steps, noise_length))
generator.predict(z).shape

(1, 20, 3)

In [95]:
# Define the trainable discriminator model
discriminator.trainable = True

discriminator_model = Sequential()
discriminator_model.add(discriminator)
discriminator_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
discriminator_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
discriminator (Model)        (None, 20, 1)             22        
Total params: 22
Trainable params: 22
Non-trainable params: 0
_________________________________________________________________


In [96]:
# Define the trainable adversarial model
discriminator.trainable = False

adversarial_model = Sequential()
adversarial_model.add(generator)
adversarial_model.add(discriminator)
adversarial_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
adversarial_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
generator (Model)            (None, 20, 3)             120       
_________________________________________________________________
discriminator (Model)        (None, 20, 1)             22        
Total params: 142
Trainable params: 120
Non-trainable params: 22
_________________________________________________________________


In [97]:
# Define a train function

def train(batch_size = 50, epochs = 10, print_step = 1):
    for epoch in range(epochs):
        idx_true = np.random.choice(data_train.shape[0], size = batch_size, replace = False)
        x_true = data_train[idx_true, :, :]
        z = np.random.normal(scale = 1, size = (batch_size, n_time_steps, noise_length))
        
        x_fake = generator.predict(z)
        
        x = np.concatenate((x_true, x_fake))
        y = np.ones([2 * batch_size, 20, 1])
        y[batch_size:, :, :] = 0
        discriminator.trainable = True
        d_loss = discriminator_model.train_on_batch(x, y)
        
        y = np.ones([batch_size, 20, 1])
        discriminator.trainable = False
        z = np.random.normal(scale = 1, size = (batch_size, n_time_steps, noise_length)) 
        a_loss = adversarial_model.train_on_batch(z, y)
        
        if (epoch % print_step) == 0:
            log_mesg = "%d: [D loss: %f, acc: %f]" % (epoch, d_loss[0], d_loss[1])
            log_mesg = "%s  [A loss: %f, acc: %f]" % (log_mesg, a_loss[0], a_loss[1])
            print(log_mesg)
        

In [100]:
# run training function

start_time = time.time()
train(batch_size = 100, epochs = 1000, print_step = 50)
print('time taken:', round_to_n(time.time() - start_time), 'seconds')

0: [D loss: 0.673320, acc: 0.673750]  [A loss: 0.738642, acc: 0.050000]
50: [D loss: 0.672162, acc: 0.686750]  [A loss: 0.738330, acc: 0.068000]
100: [D loss: 0.670003, acc: 0.540750]  [A loss: 0.847548, acc: 0.065000]
150: [D loss: 0.656923, acc: 0.558250]  [A loss: 0.843167, acc: 0.132000]
200: [D loss: 0.642969, acc: 0.619000]  [A loss: 0.851687, acc: 0.172500]
250: [D loss: 0.626151, acc: 0.679000]  [A loss: 0.878341, acc: 0.185000]
300: [D loss: 0.604707, acc: 0.750250]  [A loss: 0.899589, acc: 0.190000]
350: [D loss: 0.586880, acc: 0.785750]  [A loss: 0.913796, acc: 0.199500]
400: [D loss: 0.575091, acc: 0.800750]  [A loss: 0.930761, acc: 0.203000]
450: [D loss: 0.561313, acc: 0.808750]  [A loss: 0.946859, acc: 0.210500]
500: [D loss: 0.545479, acc: 0.823750]  [A loss: 0.963473, acc: 0.212000]
550: [D loss: 0.534572, acc: 0.826750]  [A loss: 0.985672, acc: 0.214000]
600: [D loss: 0.519236, acc: 0.839750]  [A loss: 0.998054, acc: 0.230000]
650: [D loss: 0.509956, acc: 0.838750]  [

In [110]:
z = np.random.normal(scale = 0.5, size = (1, n_time_steps, noise_length))
visualize_output(generator, z, 1)

[0.04, 0.9, 0.02]
[0.01, 1.0, 0.003]
[0.005, 1.0, 0.002]
[0.005, 1.0, 0.002]
[0.004, 1.0, 0.001]
[0.004, 1.0, 0.002]
[0.006, 1.0, 0.001]
[0.003, 1.0, 0.001]
[0.004, 1.0, 0.001]
[0.005, 1.0, 0.001]
[0.004, 1.0, 0.001]
[0.004, 1.0, 0.001]
[0.004, 1.0, 0.002]
[0.005, 1.0, 0.001]
[0.004, 1.0, 0.001]
[0.004, 1.0, 0.001]
[0.003, 1.0, 0.001]
[0.003, 1.0, 0.001]
[0.004, 1.0, 0.002]
[0.005, 1.0, 0.001]
