# Conditional GAN for timeseries generation

..

In [None]:
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import tensorflow_probability as tfp
import matplotlib.pyplot as plt
from tensorflow import keras

%matplotlib inline

In [None]:
warnings.filterwarnings('ignore')
pd.options.display.float_format = '{:20,.4f}'.format

In [None]:
sns.set_style("whitegrid")
sns.set_palette("colorblind")

In [None]:
from keras.models import Sequential, Model
from keras.layers import Input
from keras.optimizers import Adam
from sklearn.metrics import mean_squared_error

In [None]:
gan_params = {
   'num_epochs': 800,
   'save_interval': 100,
   'sequence_length': 6,
   'num_variables': gan_cols,
   'batch_size': 64,
   'lr': 0.0001 
}

In [None]:
generator_params = {
   'noise_sigma': 0.3,
   'lstm_units': 128,
   'lstm_dropout': 0.4,
   'gru_units': 64,
   'lr': 0.0001
}

In [None]:
discriminator_params = {
   'bi_lstm_units': 256,
   'dropout_rate': 0.4,
   'lr': 0.0001
}

In [None]:
gan_features = []
gan_labels = []

for i in range(int(num_obs / gan_params['sequence_length'])):
    gan_labels_df = gan_df['label']
    gan_labels.append(gan_labels_df[i:(i+gan_params['sequence_length'])].values[-1:])
    gan_features.append(gan_df[i:(i+gan_params['sequence_length'])].values)

In [None]:
real = np.asarray(gan_features)
real = real.reshape((real.shape[0], gan_params['sequence_length'], gan_cols))

In [None]:
real.shape

### Generator

In [None]:
from keras.layers import GaussianNoise, LSTM, Dropout, BatchNormalization, Dense, LocallyConnected2D, GRU, Reshape

def build_generator(params):
    gshape = params['sequence_length'], params['num_variables']
    inputs = Input(shape=(gshape))
    
    g = Sequential(name='generator')
    g.add(GRU(params['gru_units'], input_shape=(gshape), return_sequences=True))
    g.add(Dense(params['num_variables'], activation='softmax'))
    g.add(Reshape(target_shape=(gshape)))
    g.summary()
    
    return Model(inputs, g(inputs))

In [None]:
generator = build_generator({**gan_params, **generator_params})

### Discriminator

In [None]:
from keras.layers import Bidirectional, LSTM, Dense, concatenate, Flatten

def build_discriminator(params):
    dshape = params['sequence_length'], params['num_variables']
    batch_shape = params['batch_size'], params['sequence_length'], params['num_variables']
    
    real = Input(shape=(dshape))
    generated = Input(shape=(dshape))
    inputs = concatenate([generated, real], axis=1)
    
    d = Sequential(name='discriminator')
    d.add(Bidirectional(LSTM(params['bi_lstm_units']), batch_input_shape=(batch_shape)))
    d.add(Dropout(params['dropout_rate']))
    d.add(Dense(1, activation='sigmoid'))
    d.summary()
    return Model([generated, real], d(inputs))

In [None]:
discriminator = build_discriminator({**gan_params, **discriminator_params})
discriminator.compile(loss='mse', optimizer=Adam(lr=discriminator_params['lr']), metrics=['mse'])

### CGAN

In [None]:
def build_cgan(generator, discriminator, params):
    ganshape = params['sequence_length'], params['num_variables']
    discriminator.trainable = False
    
    noise = Input(shape=(ganshape))
    generated = generator(noise)
    
    prediction = discriminator([data, generated])
    
    gan = Model([noise, data], prediction, name='cgan')
    gan.summary()
    return gan

In [None]:
cgan = build_cgan(generator, discriminator, gan_params)
cgan.compile(loss=['kullback_leibler_divergence', 'kullback_leibler_divergence'], 
            optimizer=Adam(lr=generator_params['lr']), metrics=['mse', 'mse'])

In [None]:
def train_gan(real, batch_size, params):
    g_metrics = []
    d_metrics = []
    
    reals = np.ones(batch_size)
    synths = np.zeros(batch_size)
    
    for i in range(params['num_epochs']):
        # create input of real and synthetic data
        random_index = np.random.randint(0, len(real) - batch_size)
        half_real = real[random_index:int(random_index + batch_size)]
        half_synth = np.random.normal(-1.0, 1.0, size=[batch_size, params['sequence_length'], real.shape[2]])
        
        # apply generator and encoder
        generated = generator.predict(half_synth)
        
        # train discriminator
        disc = discriminator.train_on_batch([generated, half_real], reals)
                                                            
        # train gan
        gen_ = cgan.train_on_batch([half_synth, generated], [reals, synths])
        if i % 100 == 0:
            print('Epoch %s losses: discriminator %.4f%%, generator: %.4f%%' % 
                  (i, d[0], gen_[0]))
        
        d_metrics.append(disc)
        g_metrics.append(gen_)
    return d_metrics, g_metrics

In [None]:
d_metrics, g_metrics = train_gan(real, gan_params['batch_size'], gan_params)

In [None]:
plt.figure(figsize=(15,7))
plt.plot([metrics[0] for metrics in d_metrics], label='discriminator loss')
plt.plot([metrics[0] for metrics in g_metrics], label='generator loss')
plt.legend()
plt.title('CGAN losses')
plt.show()

In [None]:
plt.figure(figsize=(15,7))
plt.plot([metrics[1] for metrics in d_metrics], label='discriminator mean squared error')
plt.plot([metrics[1] for metrics in g_metrics], label='generator mean average error')
plt.legend()
plt.title('CGAN performance metrics')
plt.show()

In [None]:
generated_y = generator.predict(np.random.rand(num_obs, gan_params['sequence_length'], gan_cols))[:,-1,-1]
gan_y = gan_df['label'].values

In [None]:
plt.figure(figsize=(15,7))
plt.plot(gan_y, label='observed cpi')
plt.plot(generated_y, label='gan-generated cpi')
plt.legend()
plt.title('Observed versus GAN-generated values for consumer price inflation in %s' % country)
plt.show()

In [None]:
print('rmse: %s\nmean observed: %s\nmean generated: %s' % (np.sqrt(mean_squared_error(gan_y, generated_y)),
                                                           np.mean(gan_y), np.mean(generated_y)))