### TadGAN for Tensorflow 2.0 

#### Part 1
- Connect and authenticate user google drive 
- Data load and prepare

In [None]:
# drive mount 
from google.colab import drive
drive.mount('/content/drive')#, force_remount=True)  # Force_remount 는 강제적으로 해당 경로로 mount 하겠다는 것입니다. 

In [None]:
os.chdir('/content/drive/My Drive/CoLab/TimeSeries/TadGAN') # 다음 python 실행 부터는 해당 코드만 실행하면 됩니다.

In [None]:
# load generals
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler

from utils import plot, plot_ts, plot_rws, plot_error, unroll_ts

In [None]:
df = pd.read_csv('nyc_taxi.csv')

In [None]:
df.head(5)

In [None]:
def time_segments_aggregate(X, interval, time_column, method=['mean']):
    """Aggregate values over given time span.
    Args:
        X (ndarray or pandas.DataFrame):
            N-dimensional sequence of values.
        interval (int):
            Integer denoting time span to compute aggregation of.
        time_column (int):
            Column of X that contains time values.
        method (str or list):
            Optional. String describing aggregation method or list of strings describing multiple
            aggregation methods. If not given, `mean` is used.
    Returns:
        ndarray, ndarray:
            * Sequence of aggregated values, one column for each aggregation method.
            * Sequence of index values (first index of each aggregated segment).
    """
    if isinstance(X, np.ndarray):
        X = pd.DataFrame(X)

    X = X.sort_values(time_column).set_index(time_column)

    if isinstance(method, str):
        method = [method]

    start_ts = X.index.values[0]
    max_ts = X.index.values[-1]

    values = list()
    index = list()
    while start_ts <= max_ts:
        end_ts = start_ts + interval
        subset = X.loc[start_ts:end_ts - 1]
        aggregated = [
            getattr(subset, agg)(skipna=True).values
            for agg in method
        ]
        values.append(np.concatenate(aggregated))
        index.append(start_ts)
        start_ts = end_ts

    return np.asarray(values), np.asarray(index)

In [None]:
# TimeSegments 
X, index = time_segments_aggregate(df, interval=1800, time_column='timestamp')

In [None]:
imp = SimpleImputer()
X = imp.fit_transform(X)

In [None]:
scaler = MinMaxScaler(feature_range=(-1, 1))
X = scaler.fit_transform(X)

In [None]:
def rolling_window_sequences(X, index, window_size, target_size, step_size, target_column,
                             drop=None, drop_windows=False):
    """Create rolling window sequences out of time series data.
    The function creates an array of input sequences and an array of target sequences by rolling
    over the input sequence with a specified window.
    Optionally, certain values can be dropped from the sequences.
    Args:
        X (ndarray):
            N-dimensional sequence to iterate over.
        index (ndarray):
            Array containing the index values of X.
        window_size (int):
            Length of the input sequences.
        target_size (int):
            Length of the target sequences.
        step_size (int):
            Indicating the number of steps to move the window forward each round.
        target_column (int):
            Indicating which column of X is the target.
        drop (ndarray or None or str or float or bool):
            Optional. Array of boolean values indicating which values of X are invalid, or value
            indicating which value should be dropped. If not given, `None` is used.
        drop_windows (bool):
            Optional. Indicates whether the dropping functionality should be enabled. If not
            given, `False` is used.
    Returns:
        ndarray, ndarray, ndarray, ndarray:
            * input sequences.
            * target sequences.
            * first index value of each input sequence.
            * first index value of each target sequence.
    """
    out_X = list()
    out_y = list()
    X_index = list()
    y_index = list()
    target = X[:, target_column]

    if drop_windows:
        if hasattr(drop, '__len__') and (not isinstance(drop, str)):
            if len(drop) != len(X):
                raise Exception('Arrays `drop` and `X` must be of the same length.')
        else:
            if isinstance(drop, float) and np.isnan(drop):
                drop = np.isnan(X)
            else:
                drop = X == drop

    start = 0
    max_start = len(X) - window_size - target_size + 1
    while start < max_start:
        end = start + window_size

        if drop_windows:
            drop_window = drop[start:end + target_size]
            to_drop = np.where(drop_window)[0]
            if to_drop.size:
                start += to_drop[-1] + 1
                continue

        out_X.append(X[start:end])
        out_y.append(target[end:end + target_size])
        X_index.append(index[start])
        y_index.append(index[end])
        start = start + step_size

    return np.asarray(out_X), np.asarray(out_y), np.asarray(X_index), np.asarray(y_index)

In [None]:
X, y, X_index, y_index = rolling_window_sequences(X, index, 
                                                  window_size=100, 
                                                  target_size=1, 
                                                  step_size=1,
                                                  target_column=0)

In [None]:
print("Training data input shape: {}".format(X.shape))
print("Training data index shape: {}".format(X_index.shape))
print("Training y shape: {}".format(y.shape))
print("Training y index shape: {}".format(y_index.shape))


#### Part 2 

- GPU check for TadGAN 
- Load Tensorflow, Keras, Layers ..

In [None]:
# Check gpu envrionmental 
import tensorflow as tf
import logging
import math

gpus = tf.config.experimental.list_physical_devices('GPU') 
if gpus: 
    try: 
        for gpu in gpus: 
            tf.config.experimental.set_memory_growth(gpu, True) 
    except RuntimeError as e: 
        print(e)
print (gpus)

In [None]:
LOGGER = logging.getLogger(__name__)

In [None]:
#import tensorflow as tf
import keras
#import similaritymeasures as sm
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model

from tensorflow.keras.layers import Bidirectional, LSTM, Flatten, Dense, Reshape, UpSampling1D, TimeDistributed
from tensorflow.keras.layers import Activation, Conv1D, LeakyReLU, Dropout, Add, Layer
from tensorflow.compat.v1.keras.layers import CuDNNLSTM
from tensorflow.keras.optimizers import Adam

from functools import partial
from scipy import integrate, stats

In [None]:
X.shape

In [None]:
# Model Building 개별 함수화

In [None]:
class RandomWeightedAverage(Layer):
    def _merge_function(self, inputs):
        alpha = K.random_uniform((64, 1, 1))
        return (alpha * inputs[0]) + ((1 - alpha) * inputs[1])

In [None]:
def build_encoder_layer(input_shape, encoder_reshape_shape):
    x = Input(shape=input_shape)
    model = tf.keras.models.Sequential([
        Bidirectional(LSTM(units=100, return_sequences=True)),
        Flatten(),
        Dense(20),
        Reshape(target_shape=encoder_reshape_shape)])  # (20, 1)

    return Model(x, model(x))


def build_generator_layer(input_shape, generator_reshape_shape):
    # input_shape = (20, 1) / generator_reshape_shape = (50, 1)
    x = Input(shape=input_shape)
    model = tf.keras.models.Sequential([
        Flatten(),
        Dense(50),
        Reshape(target_shape=generator_reshape_shape),  # (50, 1)
        Bidirectional(CuDNNLSTM(units=64, return_sequences=True), merge_mode='concat'),
        Dropout(rate=0.2),
        UpSampling1D(size=2),
        Bidirectional(CuDNNLSTM(units=64, return_sequences=True), merge_mode='concat'),
        Dropout(rate=0.2),
        TimeDistributed(Dense(1)),
        Activation(activation='tanh')])  # (None, 100, 1)

    return Model(x, model(x))


def build_critic_x_layer(input_shape):
    x = Input(shape=input_shape)
    model = tf.keras.models.Sequential([
        Conv1D(filters=64, kernel_size=5),
        LeakyReLU(alpha=0.2),
        Dropout(rate=0.25),
        Conv1D(filters=64, kernel_size=5),
        LeakyReLU(alpha=0.2),
        Dropout(rate=0.25),
        Conv1D(filters=64, kernel_size=5),
        LeakyReLU(alpha=0.2),
        Dropout(rate=0.25),
        Conv1D(filters=64, kernel_size=5),
        LeakyReLU(alpha=0.2),
        Dropout(rate=0.25),
        Flatten(),
        Dense(units=1)])

    return Model(x, model(x))


def build_critic_z_layer(input_shape):
    x = Input(shape=input_shape)
    model = tf.keras.models.Sequential([
        Flatten(),
        Dense(units=100),
        LeakyReLU(alpha=0.2),
        Dropout(rate=0.2),
        Dense(units=100),
        LeakyReLU(alpha=0.2),
        Dropout(rate=0.2),
        Dense(units=1)])

    return Model(x, model(x))


In [None]:
def wasserstein_loss(y_true, y_pred):
    return K.mean(y_true * y_pred)

In [None]:
# Layer Parameters
latent_dim = 20
shape = (100, 1)

encoder_input_shape = (100,1)
generator_input_shape = (20, 1)
critic_x_input_shape = (100,1)
critic_z_input_shape = (20,1)
encoder_reshape_shape = (20, 1)
generator_reshape_shape = (50, 1)
learning_rate = 0.0005

encoder = build_encoder_layer(input_shape=encoder_input_shape,
                              encoder_reshape_shape=encoder_reshape_shape)
generator = build_generator_layer(input_shape=generator_input_shape,
                                  generator_reshape_shape=generator_reshape_shape)
critic_x = build_critic_x_layer(input_shape=critic_x_input_shape)
critic_z = build_critic_z_layer(input_shape=critic_z_input_shape)

optimizer = tf.keras.optimizers.Adam(learning_rate)

In [None]:
z = Input(shape=(latent_dim, 1))
x = Input(shape=shape)
x_ = generator(z)
z_ = encoder(x)
fake_x = critic_x(x_)
valid_x = critic_x(x)
interpolated_x = RandomWeightedAverage()([x, x_])
#validity_interpolated_x = critic_x(interpolated_x)
#partial_gp_loss_x = partial(gradient_penalty_loss, averaged_samples=interpolated_x)
#partial_gp_loss_x.__name__ = 'gradient_penalty'

critic_x_model = Model(inputs=[x, z], outputs=[valid_x, fake_x, interpolated_x])

In [None]:
fake_z = critic_z(z_)
valid_z = critic_z(z)
interpolated_z = RandomWeightedAverage()([z, z_])
#validity_interpolated_z = critic_z(interpolated_z)
#partial_gp_loss_z = partial(gradient_penalty_loss, averaged_samples=interpolated_z)
#partial_gp_loss_z.__name__ = 'gradient_penalty'

critic_z_model = Model(inputs=[x, z], outputs=[valid_z, fake_z, interpolated_z])

In [None]:
z_gen = Input(shape=(latent_dim, 1))
x_gen_ = generator(z_gen)
x_gen = Input(shape=shape)
z_gen_ = encoder(x_gen)
x_gen_rec = generator(z_gen_)
fake_gen_x = critic_x(x_gen_)
fake_gen_z = critic_z(z_gen_)

encoder_generator_model = Model([x_gen, z_gen], [fake_gen_x, fake_gen_z, x_gen_rec])

In [None]:
@tf.function
def critic_x_train_on_batch(x, z, valid, fake, delta):
    with tf.GradientTape() as tape:
        
        (valid_x, fake_x, interpolated) = critic_x_model(inputs=[x, z], training=True) 
        
        with tf.GradientTape() as gp_tape:
            gp_tape.watch(interpolated)
            pred = critic_x(interpolated, training=True)
            
        grads = gp_tape.gradient(pred, interpolated)[0]
        grads = tf.square(grads)
        ddx = tf.sqrt(1e-8 + tf.reduce_sum(grads, axis=np.arange(1, len(grads.shape))))
        gp_loss = tf.reduce_mean((ddx - 1.0) ** 2)

        loss = tf.reduce_mean(wasserstein_loss(valid, valid_x))
        loss += tf.reduce_mean(wasserstein_loss(fake, fake_x))
        loss += gp_loss*10.0
        
    gradients = tape.gradient(loss, critic_x_model.trainable_weights)
    optimizer.apply_gradients(zip(gradients, critic_x_model.trainable_weights))
    return loss

In [None]:
@tf.function
def critic_z_train_on_batch(x, z, valid, fake, delta):
    with tf.GradientTape() as tape:
        
        (valid_z, fake_z, interpolated) = critic_z_model(inputs=[x, z], training=True)

        with tf.GradientTape() as gp_tape:
            gp_tape.watch(interpolated)
            pred = critic_z(interpolated, training=True)
            
        grads = gp_tape.gradient(pred, interpolated)[0]
        grads = tf.square(grads)
        ddx = tf.sqrt(1e-8 + tf.reduce_sum(grads, axis=np.arange(1, len(grads.shape))))
        gp_loss = tf.reduce_mean((ddx - 1.0) ** 2)

        loss = tf.reduce_mean(wasserstein_loss(valid, valid_z))
        loss += tf.reduce_mean(wasserstein_loss(fake, fake_z))
        loss += gp_loss*10.0        
        
    gradients = tape.gradient(loss, critic_z_model.trainable_weights)
    optimizer.apply_gradients(zip(gradients, critic_z_model.trainable_weights))
    return loss

In [None]:
@tf.function
def enc_gen_train_on_batch(x, z, valid):
    with tf.GradientTape() as tape:
        
        (fake_gen_x, fake_gen_z, x_gen_rec) = encoder_generator_model(inputs=[x, z], training=True)
        
        x = tf.squeeze(x)
        x_gen_rec = tf.squeeze(x_gen_rec)
        
        loss = tf.reduce_mean(wasserstein_loss(valid, fake_gen_x))
        loss += tf.reduce_mean(wasserstein_loss(valid, fake_gen_z))
        loss += tf.keras.losses.MSE(x, x_gen_rec)*10
        loss = tf.reduce_mean(loss)
        
    gradients = tape.gradient(loss, encoder_generator_model.trainable_weights)
    optimizer.apply_gradients(zip(gradients, encoder_generator_model.trainable_weights))
    
    return loss

In [None]:
# Train parameters
batch_size = 64
n_critics = 5
epochs = 50

In [None]:
# Train 
X = X.reshape((-1, shape[0], 1))
X_ = np.copy(X)

fake = np.ones((batch_size, 1), dtype=np.float32)
valid = -np.ones((batch_size, 1), dtype=np.float32)
delta = np.ones((batch_size, 1), dtype=np.float32)

for epoch in range(1, epochs+1):
    
    np.random.shuffle(X_)
    
    epoch_g_loss = []
    epoch_cx_loss = []
    epoch_cz_loss = []
    
    minibatches_size = batch_size * n_critics
    num_minibatches = int(X_.shape[0] // minibatches_size)
    
    for i in range(num_minibatches):
        minibatch = X_[i * minibatches_size: (i + 1) * minibatches_size]
        
        generator.trainable = False
        encoder.trainable = False
        # train critics 
        
        for j in range(n_critics):
            x = minibatch[j * batch_size: (j + 1) * batch_size]
            z = np.random.normal(size=(batch_size, latent_dim, 1))
            epoch_cx_loss.append(critic_x_train_on_batch(x, z, valid, fake, delta))
            epoch_cz_loss.append(critic_z_train_on_batch(x, z, valid, fake, delta))
            
        critic_x.trainable = False
        critic_z.trainable = False        
        generator.trainable = True
        encoder.trainable = True        
        # train encoder, generator   
        
        epoch_g_loss.append(enc_gen_train_on_batch(x, z, valid))
    
    cx_loss = np.mean(np.array(epoch_cx_loss), axis=0)
    cz_loss = np.mean(np.array(epoch_cz_loss), axis=0)
    g_loss = np.mean(np.array(epoch_g_loss), axis=0)
    print('Epoch: {}/{}, [Dx loss: {}] [Dz loss: {}] [G loss: {}]'.format(epoch, epochs, cx_loss, cz_loss, g_loss))    