In [1]:
import os 
import pm4py
from pm4py.objects.conversion.log import converter as log_converter
import pandas as pd

curr_path = os.path.abspath('')
folder_path = os.path.join(curr_path, 'data')
filepath = os.path.join(folder_path, 'BPI_Challenge_2012.xes')
log = pm4py.read_xes(filepath)
df = log_converter.apply(log, variant=log_converter.Variants.TO_DATA_FRAME)

parsing log, completed traces ::   0%|          | 0/13087 [00:00<?, ?it/s]

In [2]:
unique_activities = pd.unique(df['concept:name'])
activity_encoder = dict(zip(unique_activities,range(3,len(unique_activities) + 3)))
activity_encoder['Start'] = 1
activity_encoder['End'] = 2
df['concept:encoded'] = df['concept:name'].apply(lambda x: activity_encoder[x])

In [3]:
import pandas as pd

def process_arrays(df, complete=False, W=False):
    proc_df = df
    if complete:
        proc_df = proc_df[proc_df['lifecycle:transition'] == 'COMPLETE']
    if W:
        proc_df = proc_df[proc_df['concept:name'].str.startswith('W_')]
    
    arrays = proc_df.groupby(['case:concept:name']).agg(list)
    arrays.reset_index(inplace=True)
    arrays['time:interarrival_min'] = arrays['time:timestamp'].apply(lambda x: [0] + [0] + [((x[i+1] - x[i]).total_seconds() / 60) for i in range(len(x)-1)] + [0])
    arrays['concept:encoded'] = arrays['concept:encoded'].apply(lambda x: [1] + x + [2])
    return arrays

In [4]:
arrays = process_arrays(df, complete=True, W=True)

In [5]:
from sklearn.model_selection import train_test_split, KFold
import numpy as np
sampled_arrays = arrays.sample(n=5000)

kf = KFold(n_splits=3)
splits = []
for train_index, test_index in kf.split(sampled_arrays['case:concept:name']):
    id_tr = arrays['case:concept:name'].iloc[train_index]
    id_te = arrays['case:concept:name'].iloc[test_index]
    splits.append((id_tr, id_te))

In [6]:
i = 1
id_train, id_test = splits[i]

In [7]:
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers
import numpy as np
# Many to one + context


activity_decoder = {v:k for k,v in activity_encoder.items()}

def many_to_one_prep(journey):
    """
    Setting up a many-to-one scheme.
    Function Input is a complete journey.
    Returns an RNN input of a history of activities, output is the following activity
    """
    inp = [journey[:i] for i in range(1,len(journey))]
    out = journey[1:]
    return (inp,out)

def many_to_many_hybrid_prep(journey):
    """
    Setting up a many-to-many scheme.
    Function Input is a complete journey.
    Returns an RNN input of a history of activities, output is the following activity
    """
    inp = [journey[:i] for i in range(1,len(journey))]
    out = [journey[1:i+1] for i in range(1, len(journey))]
    match = [journey[:i+1] for i in range(1, len(journey))]
    return (inp,out, match)

def many_to_many_prep(journey):
    """
    Setting up a many-to-many scheme shifted by 1.  
    Function input is a complete journey.
    Returns an RNN input of a history of activities omitting the final activity, output is the same history shifted by 1 timeslot (omitting the first activity)
    """
    inp = journey[:-1]
    out = journey[1:]
    return(inp, out)

def many_to_many_make_data(id_indexes, arrays_df):
    """
    Creates training and testing sets for an RNN model.  
    Function input are indices of a selected subset of data, a DataFrame consisting of aggregated list-like journey data, and a data preparation method.
    Returns a tuple of training and testing data for journeys and inter-arrival times.
    """
    X_j = []
    Y_j = []
    X_t = []
    Y_t = []

    selected = arrays_df[arrays_df["case:concept:name"].isin(id_indexes)]

    for index, row in selected.iterrows():
        j_inp, j_out = many_to_many_prep(row['concept:encoded'])
        t_inp, t_out = many_to_many_prep(row['time:interarrival_min'])
        X_j.append(j_inp)
        X_t.append(t_inp)
        Y_j.append(j_out)
        Y_t.append(t_out)
    X_j = keras.preprocessing.sequence.pad_sequences(X_j, padding='pre', maxlen=60)
    X_j = to_categorical(X_j)
    X_t = keras.preprocessing.sequence.pad_sequences(X_t, padding='pre', maxlen=60)
    Y_j = keras.preprocessing.sequence.pad_sequences(Y_j, padding='pre', maxlen=60)
    Y_j = to_categorical(Y_j)
    Y_t = keras.preprocessing.sequence.pad_sequences(Y_t, padding='pre', maxlen=60)
    return (X_j, X_t, Y_j, Y_t)

def many_to_one_make_data(id_indexes, arrays_df):
    """
    Creates training and testing sets for an RNN model.  
    Function input are indices of a selected subset of data, a DataFrame consisting of aggregated list-like journey data, and a data preparation method.
    Returns a tuple of training and testing data for journeys and inter-arrival times.
    """
    X_j = []
    Y_j = []
    
    X_t = []
    Y_t = []
    

    selected = arrays_df[arrays_df["case:concept:name"].isin(id_indexes)]

    for index, row in selected.iterrows():
        j_inp, j_out = many_to_one_prep(row['concept:encoded'])
        t_inp, t_out = many_to_one_prep(row['time:interarrival_min'])
        X_j.extend(j_inp)
        X_t.extend(t_inp)
        Y_j.extend(j_out)
        Y_t.extend(t_out)
    X_j = keras.preprocessing.sequence.pad_sequences(X_j, padding='pre', maxlen=60)
    X_j = to_categorical(X_j)
    X_t = keras.preprocessing.sequence.pad_sequences(X_t, padding='pre', maxlen=60)
    Y_j = np.asarray(Y_j).astype("float32")
    Y_j = to_categorical(Y_j)
    Y_t = np.asarray(Y_t).astype("float32")
    return (X_j, X_t, Y_j, Y_t)

def inject_noise(array, d):
    replaced = np.where(array==1, 0.9, array)
    replaced = np.where(replaced == 0, 0.1/(d-1), array)
    return replaced
    

def many_to_many_hybrid_make_data(id_indexes, arrays_df):
    """
    Creates training and testing sets for an RNN model.  
    Function input are indices of a selected subset of data, a DataFrame consisting of aggregated list-like journey data, and a data preparation method.
    Returns a tuple of training and testing data for journeys and inter-arrival times.
    """
    X_j = []
    Y_j = []
    match_j = []
    X_t = []
    Y_t = []
    match_t = []

    selected = arrays_df[arrays_df["case:concept:name"].isin(id_indexes)]

    for index, row in selected.iterrows():
        j_inp, j_out, j_match = many_to_many_hybrid_prep(row['concept:encoded'])
        t_inp, t_out, t_match = many_to_many_hybrid_prep(row['time:interarrival_min'])
        X_j.extend(j_inp)
        X_t.extend(t_inp)
        match_j.extend(j_match)
        match_t.extend(t_match)
        Y_j.extend(j_out)
        Y_t.extend(t_out)
    X_j = keras.preprocessing.sequence.pad_sequences(X_j, padding='pre', maxlen=60)
    X_j = to_categorical(X_j)
    X_t = keras.preprocessing.sequence.pad_sequences(X_t, padding='pre', maxlen=60)
    Y_j = keras.preprocessing.sequence.pad_sequences(Y_j, padding='pre', maxlen=60)
    Y_j = to_categorical(Y_j)
    Y_t = keras.preprocessing.sequence.pad_sequences(Y_t, padding='pre', maxlen=60)
    match_j = keras.preprocessing.sequence.pad_sequences(match_j, padding='pre', maxlen=60)
    match_j = to_categorical(match_j)
    match_t = keras.preprocessing.sequence.pad_sequences(match_t, padding='pre', maxlen=60)
    return (X_j, X_t, Y_j, Y_t, match_j, match_t)

In [8]:
X_j_train, X_t_train, Y_j_train, Y_t_train, match_j, match_t = many_to_many_hybrid_make_data(id_train.values, arrays)

In [1083]:
X_j_test, X_t_test, Y_j_test, Y_t_test, _, _ = many_to_many_hybrid_make_data(id_test.values, arrays)

In [30]:
X_j_test, X_t_test, Y_j_test, Y_t_test = many_to_one_make_data(id_test.values, arrays)

# VRNN Model (Runs properly)

In [35]:
import tensorflow as tf
import numpy as np
# Implementing Variational RNN's and variations by subclassing Keras RNN-type Cells

class VRNNCell(tf.keras.layers.GRUCell):
    def __init__(self, units, **kwargs):
        super(VRNNCell, self).__init__(units, **kwargs)
    

    def build(self, input_shape):
        # Taking most of the standard weight initiaalizations from the base GRU class
        super().build((input_shape[0], input_shape[1] + self.units))
        
        self.input_kernel = self.add_weight(shape=(input_shape[-1], input_shape[-1]), name="layer", initializer='truncated_normal')
        
        self.prior_kernel = self.add_weight(shape=(self.units, self.units),name="layer", initializer='truncated_normal')
        
        self.pos_kernel = self.add_weight(shape=(input_shape[-1] + self.units,input_shape[-1] + self.units), name="layer", initializer='truncated_normal')
        
        self.encoder_mu_kernel = self.add_weight(shape=(input_shape[-1] + self.units, self.units), name="layer", initializer='truncated_normal')
        
        self.encoder_logvar_kernel = self.add_weight(shape=(input_shape[-1] + self.units, self.units), name="layer", initializer='truncated_normal')
        
        self.prior_mu_kernel = self.add_weight(shape=(self.units, self.units), name="layer", initializer='truncated_normal')
        
        self.prior_logvar_kernel = self.add_weight(shape=(self.units, self.units), name="layer", initializer='truncated_normal')  
        
        self.z_kernel = self.add_weight(shape=(self.units, self.units), name="layer", initializer='truncated_normal')
        
        self.output_kernel = self.add_weight(shape=(self.units + self.units, self.units + self.units), name="layer", initializer='truncated_normal')


    def sample(self, mu, log_var):
        # Sample from unit Normal
        dims = tf.shape(mu)
        epsilon = tf.random.normal(dims)
        # All element-wise computations
        std = tf.math.exp(0.5 * log_var)
        z = tf.math.multiply(std, epsilon) + mu
        return z
    
    def call(self, inputs, states, training=False):
        # Some formulations:
        # Generation:
        # z_t ~ N(mu_(0, t), sigma_(0,t)), w here [mu_(0,t), sigma(0,t)] = phi_prior(h_(t-1))
        # Update: 
        # h_t = f_theta(h_(t-1), z_t, x_t) *recurrence equation
        # Inference:
        # z_t ~ N(mu_z, sigma_z), where [mu_z, sigma_z] = phi_post(x_t, h_(t-1))
        #
        # Let the base RNN cell handle the rest and add loss
        
        if training:
            x_t = tf.matmul(inputs, self.input_kernel)
            h_prev = states[0]

            prior = tf.matmul(h_prev, self.prior_kernel)
            p_mu = tf.matmul(prior, self.prior_mu_kernel)
            p_logvar = tf.matmul(prior, self.prior_logvar_kernel)
            
            input_state_concat = tf.concat([x_t, h_prev], axis=1)
            
            pos = tf.matmul(input_state_concat, self.pos_kernel)
            q_mu = tf.matmul(pos, self.encoder_mu_kernel)
            q_logvar = tf.matmul(pos, self.encoder_logvar_kernel)
            z_t = self.sample(q_mu, q_logvar)
            phi_z_t = tf.matmul(z_t, self.z_kernel)
            
            inp = tf.concat([x_t, phi_z_t], axis=1)
            
            
            output = tf.matmul(tf.concat([h_prev, phi_z_t], axis=1), self.output_kernel)
            
            _, h_next = super().call(inp, h_prev)
            
            all_output = (output, z_t, q_mu, p_mu, q_logvar, p_logvar)
            return all_output, h_next
        
        else:
            x_t = tf.matmul(inputs, self.input_kernel)
            h_prev = states[0]

            prior = tf.matmul(h_prev, self.prior_kernel)
            p_mu = tf.matmul(prior, self.prior_mu_kernel)
            p_logvar = tf.matmul(prior, self.prior_logvar_kernel)
            
            input_state_concat = tf.concat([x_t, h_prev], axis=1)
            
            pos = tf.matmul(input_state_concat, self.pos_kernel)
            q_mu = tf.matmul(pos, self.encoder_mu_kernel)
            q_logvar = tf.matmul(pos, self.encoder_logvar_kernel)
            z_t = self.sample(p_mu, p_logvar)
            phi_z_t = tf.matmul(z_t, self.z_kernel)
            
            inp = tf.concat([x_t, phi_z_t], axis=1)
            
            
            output = tf.matmul(tf.concat([h_prev, phi_z_t], axis=1), self.output_kernel)
            
            _, h_next = super().call(inp, h_prev)
            
            all_output = (output, z_t, q_mu, p_mu, q_logvar, p_logvar)
            return all_output, h_next
    
   
    def get_config(self):
        return {"units":self.units}

In [26]:
def kl_gauss(posterior_means, prior_means, posterior_log_var, prior_log_var):   
    kl = prior_log_var - posterior_log_var + (tf.exp(posterior_log_var) + 
                                       tf.square(posterior_means - prior_means)) / tf.exp(prior_log_var) - 1
    kl = 0.5 * tf.reduce_sum(kl, axis=(1,2))
    return kl

class VRNNGRU(tf.keras.Model):
    def __init__(self, feature_space, latent_dim, timesteps,**kwargs):
        super(VRNNGRU, self).__init__(**kwargs)
        vrnn_cell = VRNNCell(latent_dim)
        self.latent_dim = latent_dim
        
        vrnn_input = keras.layers.Input(shape=(timesteps, feature_space))
        vrnn_output = keras.layers.RNN(vrnn_cell, return_sequences=True)(vrnn_input)
        self.vrnn = keras.Model(vrnn_input, vrnn_output)
        
        decoder_input = keras.layers.Input(shape=(timesteps, latent_dim))
        output =  keras.layers.TimeDistributed(keras.layers.Dense(feature_space, activation='softmax'))(decoder_input)
        self.decoder =keras.Model(decoder_input, output)
        
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")
        
    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        if isinstance(data, tuple):
            input_data = data[0]
            output_data = data[1]
            
            timesteps = input_data.shape[1]
            num_feats = input_data.shape[2]
        with tf.GradientTape() as tape:
            outputs = self.vrnn(input_data, training=True)
            z = outputs[0]
            preds = self.decoder(z)
            
            q_mu = tf.squeeze(tf.squeeze(outputs[1]))
            p_mu = tf.squeeze(tf.squeeze(outputs[2]))
            q_log_var = tf.squeeze(tf.squeeze(outputs[3]))
            p_log_var = tf.squeeze(tf.squeeze(outputs[4]))
            
            kl_loss = tf.reduce_mean(kl_gauss(q_mu, p_mu, q_log_var, p_log_var))
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(tf.keras.losses.categorical_crossentropy(output_data, preds), axis=1)
            )
            total_loss = reconstruction_loss + kl_loss 
            
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)       
        return {
            'total_loss': self.total_loss_tracker.result(),
            'loss': self.reconstruction_loss_tracker.result(),
            'kl': self.kl_loss_tracker.result()
        }

    def call(self, inputs):
        outputs = self.vrnn(inputs, training=True)
        z = outputs[0]
        preds = self.decoder(z)
        return preds
    
    def generate(self, inputs):
        outputs = self.vrnn(inputs, training=False)
        z = outputs[0]
        preds = self.decoder(z)
        return (outputs[0], outputs[1])
    

In [31]:
from tensorflow import keras

checkpoint_filepath = 'vrnn_checkpoints/vrnn_gan'

model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='loss',
    mode='min',
    save_best_only=True,
    verbose=1)

timesteps = X_j_train.shape[1]
latent_dim = 64
feature_space = X_j_train.shape[2]
rvae = VRNNGRU(feature_space, latent_dim, timesteps)
rvae.compile(optimizer=keras.optimizers.Adam(lr=0.01))
rvae.fit(X_j_train, Y_j_train, batch_size=32, epochs=10, callbacks=[model_checkpoint_callback])

Epoch 1/10

Epoch 00001: loss improved from inf to 14.76518, saving model to vrnn_checkpoints/vrnn_gan
Epoch 2/10

Epoch 00002: loss improved from 14.76518 to 9.97605, saving model to vrnn_checkpoints/vrnn_gan
Epoch 3/10

Epoch 00003: loss improved from 9.97605 to 9.78835, saving model to vrnn_checkpoints/vrnn_gan
Epoch 4/10

Epoch 00004: loss improved from 9.78835 to 9.32669, saving model to vrnn_checkpoints/vrnn_gan
Epoch 5/10

Epoch 00005: loss improved from 9.32669 to 9.14287, saving model to vrnn_checkpoints/vrnn_gan
Epoch 6/10

Epoch 00006: loss improved from 9.14287 to 9.05269, saving model to vrnn_checkpoints/vrnn_gan
Epoch 7/10

Epoch 00007: loss improved from 9.05269 to 9.01745, saving model to vrnn_checkpoints/vrnn_gan
Epoch 8/10

Epoch 00008: loss did not improve from 9.01745
Epoch 9/10

Epoch 00009: loss did not improve from 9.01745
Epoch 10/10

Epoch 00010: loss did not improve from 9.01745


<tensorflow.python.keras.callbacks.History at 0x1c5832b10>

In [14]:

preds = rvae.predict(np.reshape(X_j_train[0], (1, 60 , 27)))

Tensor("vrnngru/model/rnn/vrnn_cell/Shape:0", shape=(2,), dtype=int32)
Tensor("vrnngru/model/rnn/while/vrnn_cell/Shape:0", shape=(2,), dtype=int32)


In [15]:
preds[1]

array([[[ 4.5525627e-03, -3.8317323e-03, -6.7129307e-03, ...,
         -9.1836817e-04, -4.6270853e-03,  1.1899514e-02],
        [ 3.0801143e-02, -5.5193239e-01, -2.7746875e-02, ...,
          1.9628802e-02, -8.0291688e-02,  4.2358845e-02],
        [ 2.9639471e-02, -5.7826042e-01, -2.6618993e-02, ...,
          1.8270109e-02, -8.2649373e-02,  4.1432567e-02],
        ...,
        [-7.1097133e-03, -9.4274098e-01, -2.8431911e-03, ...,
         -5.1156536e-02, -1.5139106e-01, -3.5635401e-02],
        [-6.7217015e-03, -9.6359527e-01, -3.2768643e-03, ...,
         -5.7400770e-02, -1.6003168e-01, -3.7894253e-02],
        [-7.7903368e-02, -9.3034852e-01,  1.7862072e-02, ...,
         -7.1719006e-02, -1.0803863e-01, -1.0212935e-01]]], dtype=float32)

In [17]:
preds[2]

array([[[ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.03121545, -0.5662317 , -0.02852057, ...,  0.02034646,
         -0.08453187,  0.0457094 ],
        [ 0.03064846, -0.59325385, -0.02824234, ...,  0.01876517,
         -0.08759888,  0.04468801],
        ...,
        [-0.0273687 , -0.94033605, -0.00321051, ..., -0.0533398 ,
         -0.13607945, -0.05029236],
        [-0.02858901, -0.9597806 , -0.00367656, ..., -0.06085788,
         -0.14209709, -0.05448633],
        [-0.02927848, -0.9454255 , -0.00635662, ..., -0.07087296,
         -0.14637978, -0.05943596]]], dtype=float32)

In [46]:
preds[3]

array([[[ 1.9071437e-04, -6.5758126e-05, -5.0967932e-04, ...,
          6.9155241e-04, -6.4638630e-04, -4.7218148e-04],
        [ 2.6102108e-01,  5.4398924e-01,  5.1196986e-01, ...,
          2.6153010e-01,  1.1261359e-01,  1.2793159e-01],
        [ 3.2691172e-01,  6.4562327e-01,  6.7707944e-01, ...,
          2.9886463e-01,  1.3468513e-01,  2.3776209e-01],
        ...,
        [-1.3218226e-01, -8.6901650e-02, -5.5364227e-01, ...,
          6.1484829e-02,  1.7979853e-03, -7.9711515e-01],
        [-1.5152630e-01, -1.1752264e-01, -6.1181307e-01, ...,
          4.3006521e-02, -3.0866228e-03, -8.4307444e-01],
        [-1.7699826e-01, -1.5612464e-01, -6.7000443e-01, ...,
          2.7960364e-02, -1.5543126e-02, -8.9365137e-01]]], dtype=float32)

In [1294]:
rvae2 =  VRNNGRU(feature_space, latent_dim, timesteps)

In [1295]:
rvae2.load_weights(checkpoint_filepath)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f77d21455e0>

## VRNN-GAN

WHAT WORKS: 
Batch_size = 64
Gen LR: 0.001
Disc LR: 0.0001
Disc Dim: 16
VRNN Dim: 64
Epochs: 50

HIGHLIGHTS:
- discrim fake loss is about 0.5, gen reconstruction accuracy is around 0.14, misled loss about 1.5
- larger batchsize seems to be more helpful in fooling the discriminator
- slowing the LR for the discriminator helps for stabilty
- using BCE with logits over sigmoid output layer
- large enough dimension for VRNN but not too large

In [38]:
from tensorflow.keras import backend as K
import tensorflow_probability as tfp
def kl_gauss(posterior_means, prior_means, posterior_log_var, prior_log_var):   
    kl = prior_log_var - posterior_log_var + (tf.exp(posterior_log_var) + 
                                       tf.square(posterior_means - prior_means)) / tf.exp(prior_log_var) - 1
    kl = 0.5 * tf.reduce_sum(kl, axis=[1,2])
    return kl

def wasserstein_loss(y_true, y_pred):
    return tf.reduce_mean(y_true * y_pred)


class kCallback(tf.keras.callbacks.Callback):
    def __init__(self, count, limit):
        self.limit = limit
        self.count = count
    def on_train_batch_end(self, batch, logs={}):
        if self.count == self.limit:
            self.count = 0
        else:
            self.count += 1
        print('k is currently {}'.format(self.count))


class VRNNGRUGAN(tf.keras.Model):
    def __init__(self, feature_space, latent_dim, timesteps, **kwargs):
        super(VRNNGRUGAN, self).__init__(**kwargs)
        vrnn_cell = VRNNCell(latent_dim)
        self.feature_space = feature_space
        self.latent_dim = latent_dim
        
        vrnn_input = keras.layers.Input(shape=(timesteps, feature_space))
        vrnn_output = keras.layers.RNN(vrnn_cell, return_sequences=True)(vrnn_input)
        self.vrnn = keras.Model(vrnn_input, vrnn_output)
        
        decoder_input = keras.layers.Input(shape=(timesteps, latent_dim*2))
        decoder_output = keras.layers.TimeDistributed(keras.layers.Dense(self.feature_space, activation='softmax'))(decoder_input)
        decoder_output = keras.layers.Dropout(0.5)(decoder_output)
        decoder_model = keras.Model(decoder_input, decoder_output)
        self.decoder = decoder_model
        
        
        disc_input = keras.layers.Input(shape=(timesteps, feature_space))
        disc_rnn = keras.layers.LSTM(8, recurrent_dropout=0.5, dropout=0.5)(disc_input)
        disc_output = keras.layers.Dense(1, activation='sigmoid')(disc_rnn)
        disc_output = keras.layers.Dropout(0.4)(disc_output)
        disc_model = keras.Model(disc_input, disc_output)
        self.discrim = disc_model
        
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.next_step_loss_tracker = keras.metrics.Mean(
            name="next_step_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")
        self.discrim_loss_tracker = keras.metrics.Mean(name="discrim_loss")
        self.discrim_fake_loss_tracker = keras.metrics.Mean(name="discrim_fake_loss")
        self.discrim_real_loss_tracker = keras.metrics.Mean(name="discrim_real_loss")
        self.misled_loss_tracker = keras.metrics.Mean(name="misled_loss")
        
    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
            self.discrim_loss_tracker,
            self.discrim_fake_loss_tracker,
            self.discrim_real_loss_tracker,
            self.misled_loss_tracker,
#             self.next_step_loss_tracker
        ]
    def compile(self, vae_optimizer, discrim_optimizer):
        super(VRNNGRUGAN, self).compile()
        self.vae_optimizer = vae_optimizer
        self.discrim_optimizer = discrim_optimizer

    def train_step(self, data):
        if isinstance(data, tuple):
            input_data, match_data = data[0]
            output_data = data[1]
            
        with tf.GradientTape(persistent=True) as tape:
            outputs = self.vrnn(input_data, training=True)
            vrnn_out = outputs[0]            
            preds = self.decoder(vrnn_out)
            o_preds = tfp.distributions.RelaxedOneHotCategorical(0.01, probs=preds).sample()
            o_preds = tf.squeeze(o_preds)

            
            discrim_fake_output = self.discrim(o_preds)
            discrim_real_output = self.discrim(output_data)
            
            bce_logits = tf.keras.losses.BinaryCrossentropy(from_logits=True)

            discrim_output_loss_fake = tf.reduce_mean(
                tf.keras.losses.binary_crossentropy(tf.zeros_like(discrim_fake_output), discrim_fake_output)
            )
        
            discrim_output_loss_real = tf.reduce_mean(
                tf.keras.losses.binary_crossentropy(tf.ones_like(discrim_real_output), discrim_real_output)
            )
              
            discrim_loss = 0.5 * (discrim_output_loss_fake + discrim_output_loss_real)
        discrim_grads = tape.gradient(discrim_loss, self.discrim.trainable_weights)
        self.discrim_optimizer.apply_gradients(zip(discrim_grads, self.discrim.trainable_weights))
        self.discrim_loss_tracker.update_state(discrim_loss)
        self.discrim_fake_loss_tracker.update_state(discrim_output_loss_fake)
        self.discrim_real_loss_tracker.update_state(discrim_output_loss_real)
        del tape
        
        with tf.GradientTape(persistent=True) as tape:
            outputs = self.vrnn(input_data, training=True)
            vrnn_out = outputs[0]
            preds = self.decoder(vrnn_out)
            o_preds = tfp.distributions.RelaxedOneHotCategorical(0.01, probs=preds).sample()
            o_preds = tf.squeeze(o_preds)
            discrim_fake_output = self.discrim(o_preds)

            z = outputs[1]
            q_mu = outputs[2]
            p_mu = outputs[3]
            q_log_var = outputs[4]
            p_log_var = outputs[5]


            kl_loss = tf.reduce_mean(kl_gauss(q_mu, p_mu, q_log_var, p_log_var))
            bce_logits = tf.keras.losses.BinaryCrossentropy(from_logits=True)

            reconstruction_loss = tf.reduce_mean(
                tf.reduce_mean(tf.keras.losses.categorical_crossentropy(output_data, preds), axis=1)
            )
            
            mislead_output_discrim_loss = tf.reduce_mean(
                tf.keras.losses.binary_crossentropy(tf.ones_like(discrim_fake_output), discrim_fake_output) 
            )
            
            discrim_loss = tf.reduce_mean(
                tf.keras.losses.binary_crossentropy(tf.zeros_like(discrim_fake_output), discrim_fake_output)
            )
            total_loss =  reconstruction_loss + 0.1 * kl_loss + mislead_output_discrim_loss


        encoder_grads = tape.gradient(total_loss, self.vrnn.trainable_weights)
        decoder_grads = tape.gradient(total_loss, self.decoder.trainable_weights)
        encoder_grads = [(tf.clip_by_value(grad, clip_value_min=-5.0, clip_value_max=5.0))
                                  for grad in encoder_grads]
        decoder_grads = [(tf.clip_by_value(grad, clip_value_min=-5.0, clip_value_max=5.0))
                                  for grad in decoder_grads]

        self.vae_optimizer.apply_gradients(zip(encoder_grads, self.vrnn.trainable_weights))
        self.vae_optimizer.apply_gradients(zip(decoder_grads, self.decoder.trainable_weights))

        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        self.misled_loss_tracker.update_state(mislead_output_discrim_loss)
        del tape
            
        return {
            'total_loss': self.total_loss_tracker.result(),
            'loss': self.reconstruction_loss_tracker.result(),
            'kl': self.kl_loss_tracker.result(),
            'discrim_loss':self.discrim_loss_tracker.result(),
            'discrim_loss_fake':self.discrim_fake_loss_tracker.result(),
            'discrim_loss_real':self.discrim_real_loss_tracker.result(),
            'misled_loss':self.misled_loss_tracker.result(),
        }

    def call(self, inputs):
        outputs = self.vrnn(inputs, training=True)
        z = outputs[0]
        preds = self.decoder(z)
        return preds
    
    def generate(self, inputs):
        outputs = self.vrnn(inputs, training=False)
        z = outputs[0]
        preds = self.decoder(z)
        return (outputs[0], outputs[1])
    
    def test_step(self, data):
        inputs, match = data[0]
        outputs = data[1]
        preds = self(inputs, training=False)
        recon_loss = tf.keras.losses.categorical_crossentropy(outputs, preds)
        return {
            "loss": recon_loss
        }
        
    

In [39]:
checkpoint_filepath = 'vrnn_gan_checkpoints/vrnngan_best.epoch{epoch:02d}-loss{loss:.2f}'

model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='loss',
    mode='min',
    save_best_only=True)

timesteps = X_j_train.shape[1]
latent_dim = 32
feature_space = X_j_train.shape[2]
rvae = VRNNGRUGAN(feature_space, latent_dim, timesteps)
rvae.compile(keras.optimizers.Adam(lr=0.001), keras.optimizers.Adam(lr=0.001))

rvae.fit([X_j_train, match_j], Y_j_train, batch_size= 64, epochs=20, callbacks=[model_checkpoint_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20

KeyboardInterrupt: 

In [25]:
tf.keras.backend.clear_session()

In [224]:
preds = rvae.generate(X_j_train)
# preds = tf.squeeze(tf.slice(preds, [0,59,0], [-1, -1, -1])).numpy()
# preds = np.argmax(preds, axis=1)
truth = np.argmax(Y_j_train, axis=2)


In [227]:
preds[1]

<tf.Tensor: shape=(29198, 60, 2), dtype=float32, numpy=
array([[[0.77190506, 0.6786984 ],
        [1.3107011 , 1.2384986 ],
        [1.3951796 , 1.3275213 ],
        ...,
        [1.3959258 , 1.3283398 ],
        [1.3959258 , 1.3283398 ],
        [1.3693651 , 1.3517888 ]],

       [[0.77190506, 0.6786984 ],
        [1.3107011 , 1.2384986 ],
        [1.3951796 , 1.3275213 ],
        ...,
        [1.3959258 , 1.3283398 ],
        [1.3693651 , 1.3517888 ],
        [1.3657767 , 1.3594463 ]],

       [[0.77190506, 0.6786984 ],
        [1.3107011 , 1.2384986 ],
        [1.3951796 , 1.3275213 ],
        ...,
        [1.3693651 , 1.3517888 ],
        [1.3657767 , 1.3594463 ],
        [1.3807567 , 1.3310295 ]],

       ...,

       [[0.77190506, 0.6786984 ],
        [1.3107011 , 1.2384986 ],
        [1.3951796 , 1.3275213 ],
        ...,
        [1.3807567 , 1.3310295 ],
        [1.3807532 , 1.3310257 ],
        [1.3807533 , 1.3310257 ]],

       [[0.77190506, 0.6786984 ],
        [1.3107011 , 

In [228]:
preds[0]

<tf.Tensor: shape=(29198, 60, 2), dtype=float32, numpy=
array([[[0.5      , 0.5      ],
        [3.209653 , 3.0168333],
        [3.8856559, 3.621873 ],
        ...,
        [3.8920481, 3.6275563],
        [3.8920481, 3.6275563],
        [3.8920481, 3.6275563]],

       [[0.5      , 0.5      ],
        [3.209653 , 3.0168333],
        [3.8856559, 3.621873 ],
        ...,
        [3.8920481, 3.6275563],
        [3.8920481, 3.6275563],
        [3.8919394, 3.6274598]],

       [[0.5      , 0.5      ],
        [3.209653 , 3.0168333],
        [3.8856559, 3.621873 ],
        ...,
        [3.8920481, 3.6275563],
        [3.8919394, 3.6274598],
        [3.8919187, 3.6274414]],

       ...,

       [[0.5      , 0.5      ],
        [3.209653 , 3.0168333],
        [3.8856559, 3.621873 ],
        ...,
        [3.8919187, 3.6274414],
        [3.8918884, 3.6274145],
        [3.8918889, 3.6274147]],

       [[0.5      , 0.5      ],
        [3.209653 , 3.0168333],
        [3.8856559, 3.621873 ],
       

In [179]:
preds = np.argmax(preds, axis=2)

In [181]:
np.unique(preds, return_counts=True)

(array([ 0,  6, 12, 14, 23, 25]),
 array([1514348,   85507,   69943,   17069,   29138,   35875]))

In [176]:
np.unique(truth, return_counts=True)


(array([ 0,  2,  6, 12, 14, 23, 25, 26]),
 array([1514257,    3333,   81951,   78593,   16648,   13142,   43179,
            777]))

In [167]:
activity_encoder

{'A_SUBMITTED': 3,
 'A_PARTLYSUBMITTED': 4,
 'A_PREACCEPTED': 5,
 'W_Completeren aanvraag': 6,
 'A_ACCEPTED': 7,
 'O_SELECTED': 8,
 'A_FINALIZED': 9,
 'O_CREATED': 10,
 'O_SENT': 11,
 'W_Nabellen offertes': 12,
 'O_SENT_BACK': 13,
 'W_Valideren aanvraag': 14,
 'A_REGISTERED': 15,
 'A_APPROVED': 16,
 'O_ACCEPTED': 17,
 'A_ACTIVATED': 18,
 'O_CANCELLED': 19,
 'W_Wijzigen contractgegevens': 20,
 'A_DECLINED': 21,
 'A_CANCELLED': 22,
 'W_Afhandelen leads': 23,
 'O_DECLINED': 24,
 'W_Nabellen incomplete dossiers': 25,
 'W_Beoordelen fraude': 26,
 'Start': 1,
 'End': 2}

In [164]:
np.argmax(preds[16], axis=1)

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0, 23])

In [95]:
np.argmax(match_j[3], axis=1)

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  1,  6, 12, 12, 12])

In [168]:
np.argmax(X_j_test[0], axis=1)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1])

In [165]:
np.argmax(Y_j_train[16], axis=1)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6])

In [733]:
activity_encoder

{'A_SUBMITTED': 3,
 'A_PARTLYSUBMITTED': 4,
 'A_PREACCEPTED': 5,
 'W_Completeren aanvraag': 6,
 'A_ACCEPTED': 7,
 'O_SELECTED': 8,
 'A_FINALIZED': 9,
 'O_CREATED': 10,
 'O_SENT': 11,
 'W_Nabellen offertes': 12,
 'O_SENT_BACK': 13,
 'W_Valideren aanvraag': 14,
 'A_REGISTERED': 15,
 'A_APPROVED': 16,
 'O_ACCEPTED': 17,
 'A_ACTIVATED': 18,
 'O_CANCELLED': 19,
 'W_Wijzigen contractgegevens': 20,
 'A_DECLINED': 21,
 'A_CANCELLED': 22,
 'W_Afhandelen leads': 23,
 'O_DECLINED': 24,
 'W_Nabellen incomplete dossiers': 25,
 'W_Beoordelen fraude': 26,
 'Start': 1,
 'End': 2}

In [925]:
np.argmax(Y_j_test[16])

14

In [933]:
np.argmax(preds[16], axis=1)

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0, 23,  6,  6, 12, 12, 14])

In [936]:
np.argmax(X_j_test[16])

0

In [937]:
np.argmax(X_j_test[16], axis=1)

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  1,  6, 12, 12, 14, 25])

In [72]:
from sklearn.metrics import f1_score, accuracy_score, balanced_accuracy_score
print("Accuracy:{}".format(accuracy_score(truth, preds)))


Accuracy:0.5695623293231973


In [140]:
np.unique(preds)

array([ 0,  2, 12, 23, 25])

In [141]:
preds

array([ 2,  0, 23, ..., 12,  2,  2])

In [116]:
X_j_test

array([[[1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.]],

       [[1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [1., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0.

In [17]:
a = tf.convert_to_tensor([0,1,2])

In [45]:
tf.reshape(a, [1,1,3])

<tf.Tensor: shape=(1, 1, 3), dtype=int32, numpy=array([[[0, 1, 2]]], dtype=int32)>

In [50]:
a = tf.convert_to_tensor([[[1,2,3]]])

In [51]:
a

<tf.Tensor: shape=(1, 1, 3), dtype=int32, numpy=array([[[1, 2, 3]]], dtype=int32)>

In [54]:
b = tf.reshape(tf.convert_to_tensor([4,5,6]),[1,3])

In [55]:
tf.concat([a,b], axis=1)

InvalidArgumentError: ConcatOp : Ranks of all input tensors should match: shape[0] = [1,1,3] vs. shape[1] = [1,3] [Op:ConcatV2] name: concat

In [45]:
from tfp.distributions import RelaxedOneHotCategorical

ModuleNotFoundError: No module named 'tfp'

In [48]:
import tensorflow_probability as tfp

In [150]:
temperature = 0.01
p = [0.1, 0.5, 0.4]
dist = tfp.distributions.RelaxedOneHotCategorical(temperature, logits=p)

In [55]:
tf.nn.softmax(p)

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([0.26030254, 0.38832578, 0.3513717 ], dtype=float32)>

In [63]:
dist.sample(1)

<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[0.000000e+00, 5.633773e-29, 1.000000e+00]], dtype=float32)>

In [140]:
p = [[0.99, 0.01], [0.01, 0.99]]
dist = tfp.distributions.RelaxedOneHotCategorical(0.5, probs=p)

In [151]:
dist.sample()

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([0., 1., 0.], dtype=float32)>

In [183]:
a = [[[0.5,0.5,], [0.5,0.5]],[[0.2,0.8,], [0.8,0.3]]]

In [185]:
tfp.distributions.RelaxedOneHotCategorical(0.001, probs=a).sample()

<tf.Tensor: shape=(2, 2, 2), dtype=float32, numpy=
array([[[0., 1.],
        [1., 0.]],

       [[0., 1.],
        [1., 0.]]], dtype=float32)>