In [1]:
import os 
import pm4py
from pm4py.objects.conversion.log import converter as log_converter
import pandas as pd

curr_path = os.path.abspath('')
folder_path = os.path.join(curr_path, 'data')
filepath = os.path.join(folder_path, 'BPI_Challenge_2012.xes')
log = pm4py.read_xes(filepath)
df = log_converter.apply(log, variant=log_converter.Variants.TO_DATA_FRAME)

parsing log, completed traces ::   0%|          | 0/13087 [00:00<?, ?it/s]

In [2]:
unique_activities = pd.unique(df['concept:name'])
activity_encoder = dict(zip(unique_activities,range(3,len(unique_activities) + 3)))
activity_encoder['Start'] = 1
activity_encoder['End'] = 2
df['concept:encoded'] = df['concept:name'].apply(lambda x: activity_encoder[x])

In [3]:
import pandas as pd

def process_arrays(df, complete=False, W=False):
    proc_df = df
    if complete:
        proc_df = proc_df[proc_df['lifecycle:transition'] == 'COMPLETE']
    if W:
        proc_df = proc_df[proc_df['concept:name'].str.startswith('W_')]
    
    arrays = proc_df.groupby(['case:concept:name']).agg(list)
    arrays.reset_index(inplace=True)
    arrays['time:interarrival_min'] = arrays['time:timestamp'].apply(lambda x: [0] + [0] + [((x[i+1] - x[i]).total_seconds() / 60) for i in range(len(x)-1)] + [0])
    arrays['concept:encoded'] = arrays['concept:encoded'].apply(lambda x: [1] + x + [2])
    return arrays

In [861]:
arrays = process_arrays(df, complete=True, W=True)

In [865]:
arrays['case:concept:name'].sample(n=5000)

4756    193798
7562    205424
4695    193530
7454    205005
4113    191010
         ...  
3685    189274
6879    202614
4882    194341
1112    178287
2513    184102
Name: case:concept:name, Length: 5000, dtype: object

In [874]:
from sklearn.model_selection import train_test_split, KFold
import numpy as np
sampled_arrays = arrays.sample(n=3000)

kf = KFold(n_splits=3)
splits = []
for train_index, test_index in kf.split(sampled_arrays['case:concept:name']):
    id_tr = arrays['case:concept:name'].iloc[train_index]
    id_te = arrays['case:concept:name'].iloc[test_index]
    splits.append((id_tr, id_te))

In [875]:
i = 1
id_train, id_test = splits[i]

In [956]:
a = [1,2,3,4,5]
check = [a[:i+1] for i in range(1, len(a))]

In [957]:
check

[[1, 2], [1, 2, 3], [1, 2, 3, 4], [1, 2, 3, 4, 5]]

In [964]:
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers
import numpy as np
# Many to one + context


activity_decoder = {v:k for k,v in activity_encoder.items()}

def many_to_one_prep(journey):
    """
    Setting up a many-to-one scheme.
    Function Input is a complete journey.
    Returns an RNN input of a history of activities, output is the following activity
    """
    inp = [journey[:i] for i in range(1,len(journey))]
    out = journey[1:]
    return (inp,out)

def many_to_many_hybrid_prep(journey):
    """
    Setting up a many-to-many scheme.
    Function Input is a complete journey.
    Returns an RNN input of a history of activities, output is the following activity
    """
    inp = [journey[:i] for i in range(1,len(journey))]
    out = [journey[1:i+1] for i in range(1, len(journey))]
    match = [journey[:i+1] for i in range(1, len(journey))]
    return (inp,out, match)

def many_to_many_prep(journey):
    """
    Setting up a many-to-many scheme shifted by 1.  
    Function input is a complete journey.
    Returns an RNN input of a history of activities omitting the final activity, output is the same history shifted by 1 timeslot (omitting the first activity)
    """
    inp = journey[:-1]
    out = journey[1:]
    return(inp, out)

def many_to_many_make_data(id_indexes, arrays_df):
    """
    Creates training and testing sets for an RNN model.  
    Function input are indices of a selected subset of data, a DataFrame consisting of aggregated list-like journey data, and a data preparation method.
    Returns a tuple of training and testing data for journeys and inter-arrival times.
    """
    X_j = []
    Y_j = []
    X_t = []
    Y_t = []

    selected = arrays_df[arrays_df["case:concept:name"].isin(id_indexes)]

    for index, row in selected.iterrows():
        j_inp, j_out = many_to_many_prep(row['concept:encoded'])
        t_inp, t_out = many_to_many_prep(row['time:interarrival_min'])
        X_j.append(j_inp)
        X_t.append(t_inp)
        Y_j.append(j_out)
        Y_t.append(t_out)
    X_j = keras.preprocessing.sequence.pad_sequences(X_j, padding='pre', maxlen=60)
    X_j = to_categorical(X_j)
    X_t = keras.preprocessing.sequence.pad_sequences(X_t, padding='pre', maxlen=60)
    Y_j = keras.preprocessing.sequence.pad_sequences(Y_j, padding='pre', maxlen=60)
    Y_j = to_categorical(Y_j)
    Y_t = keras.preprocessing.sequence.pad_sequences(Y_t, padding='pre', maxlen=60)
    return (X_j, X_t, Y_j, Y_t)

def many_to_one_make_data(id_indexes, arrays_df):
    """
    Creates training and testing sets for an RNN model.  
    Function input are indices of a selected subset of data, a DataFrame consisting of aggregated list-like journey data, and a data preparation method.
    Returns a tuple of training and testing data for journeys and inter-arrival times.
    """
    X_j = []
    Y_j = []
    
    X_t = []
    Y_t = []
    

    selected = arrays_df[arrays_df["case:concept:name"].isin(id_indexes)]

    for index, row in selected.iterrows():
        j_inp, j_out = many_to_one_prep(row['concept:encoded'])
        t_inp, t_out = many_to_one_prep(row['time:interarrival_min'])
        X_j.extend(j_inp)
        X_t.extend(t_inp)
        Y_j.extend(j_out)
        Y_t.extend(t_out)
    X_j = keras.preprocessing.sequence.pad_sequences(X_j, padding='pre', maxlen=60)
    X_j = to_categorical(X_j)
    X_t = keras.preprocessing.sequence.pad_sequences(X_t, padding='pre', maxlen=60)
    Y_j = np.asarray(Y_j).astype("float32")
    Y_j = to_categorical(Y_j)
    Y_t = np.asarray(Y_t).astype("float32")
    return (X_j, X_t, Y_j, Y_t)

def many_to_many_hybrid_make_data(id_indexes, arrays_df):
    """
    Creates training and testing sets for an RNN model.  
    Function input are indices of a selected subset of data, a DataFrame consisting of aggregated list-like journey data, and a data preparation method.
    Returns a tuple of training and testing data for journeys and inter-arrival times.
    """
    X_j = []
    Y_j = []
    match_j = []
    X_t = []
    Y_t = []
    match_t = []

    selected = arrays_df[arrays_df["case:concept:name"].isin(id_indexes)]

    for index, row in selected.iterrows():
        j_inp, j_out, j_match = many_to_many_hybrid_prep(row['concept:encoded'])
        t_inp, t_out, t_match = many_to_many_hybrid_prep(row['time:interarrival_min'])
        X_j.extend(j_inp)
        X_t.extend(t_inp)
        match_j.extend(j_match)
        match_t.extend(t_match)
        Y_j.extend(j_out)
        Y_t.extend(t_out)
    X_j = keras.preprocessing.sequence.pad_sequences(X_j, padding='pre', maxlen=60)
    X_j = to_categorical(X_j)
    X_t = keras.preprocessing.sequence.pad_sequences(X_t, padding='pre', maxlen=60)
    Y_j = keras.preprocessing.sequence.pad_sequences(Y_j, padding='pre', maxlen=60)
    Y_j = to_categorical(Y_j)
    Y_t = keras.preprocessing.sequence.pad_sequences(Y_t, padding='pre', maxlen=60)
    match_j = keras.preprocessing.sequence.pad_sequences(match_j, padding='pre', maxlen=60)
    match_j = to_categorical(match_j)
    match_t = keras.preprocessing.sequence.pad_sequences(match_t, padding='pre', maxlen=60)
    return (X_j, X_t, Y_j, Y_t, match_j, match_t)

In [965]:
X_j_train, X_t_train, Y_j_train, Y_t_train, match_j, match_t = many_to_many_hybrid_make_data(id_train.values, arrays)

In [1083]:
X_j_test, X_t_test, Y_j_test, Y_t_test, _, _ = many_to_many_hybrid_make_data(id_test.values, arrays)

In [869]:
X_j_test, X_t_test, Y_j_test, Y_t_test = many_to_one_make_data(id_test.values, arrays)

In [966]:
match_j.shape

(17367, 60, 27)

In [961]:
X_j_train.shape

(17367, 60, 27)

# VRNN Model (Runs properly)

In [1273]:
import tensorflow as tf
import numpy as np
# Implementing Variational RNN's and variations by subclassing Keras RNN-type Cells

class VRNNCell(tf.keras.layers.GRUCell):
    def __init__(self, units, **kwargs):
        super(VRNNCell, self).__init__(units, **kwargs)
    

    def build(self, input_shape):
        # Taking most of the standard weight initiaalizations from the base GRU class
        super().build((input_shape[0], input_shape[1] + self.units))
        
        self.input_kernel = self.add_weight(shape=(input_shape[-1], input_shape[-1]), initializer='truncated_normal')
        
        self.prior_kernel = self.add_weight(shape=(self.units, self.units), initializer='truncated_normal')
        
        self.pos_kernel = self.add_weight(shape=(input_shape[-1] + self.units, input_shape[-1] + self.units), initializer='truncated_normal')
        
        self.encoder_mu_kernel = self.add_weight(shape=(input_shape[-1] + self.units, self.units), initializer='truncated_normal')
        
        self.encoder_logvar_kernel = self.add_weight(shape=(input_shape[-1] + self.units, self.units), initializer='truncated_normal')
        
        self.prior_mu_kernel = self.add_weight(shape=(self.units, self.units), initializer='truncated_normal')
        
        self.prior_logvar_kernel = self.add_weight(shape=(self.units, self.units), initializer='truncated_normal')  
        
        self.batch_size = input_shape[0]


    def sample(self, mu, log_var):
        # Sample from unit Normal
        epsilon = tf.random.normal([1, self.units])
        half_constant = tf.convert_to_tensor(np.full((1, self.units), 0.5).astype('float32'))
        # All element-wise computations
        z = tf.math.multiply(half_constant, tf.math.exp(log_var)) + mu
        return z
    
    def call(self, inputs, states, training=False):
        # Some formulations:
        # Generation:
        # z_t ~ N(mu_(0, t), sigma_(0,t)), w here [mu_(0,t), sigma(0,t)] = phi_prior(h_(t-1))
        # Update: 
        # h_t = f_theta(h_(t-1), z_t, x_t) *recurrence equation
        # Inference:
        # z_t ~ N(mu_z, sigma_z), where [mu_z, sigma_z] = phi_post(x_t, h_(t-1))
        #
        # Let the base RNN cell handle the rest and add loss
        
        if training:
            x_t = tf.matmul(inputs, self.input_kernel)
            h_prev = states[0]

            prior = tf.matmul(h_prev, self.prior_kernel)
            p_mu = tf.matmul(prior, self.prior_mu_kernel)
            p_logvar = tf.matmul(prior, self.prior_logvar_kernel)
            
            input_state_concat = tf.concat([x_t, h_prev], axis=1)
            
            pos = tf.matmul(input_state_concat, self.pos_kernel)
            q_mu = tf.matmul(pos, self.encoder_mu_kernel)
            q_logvar = tf.matmul(pos, self.encoder_logvar_kernel)
            z_t = self.sample(q_mu, q_logvar)
            
            inp = tf.concat([x_t, z_t], axis=1)
            _, h_next = super().call(inp, h_prev)
            
            output = (z_t, q_mu, p_mu, q_logvar, p_logvar)
            return output, h_next
        
        else:
            # Return prior and posterior parameters
            x_t = tf.matmul(inputs, self.input_kernel)
            h_prev = states[0]

            prior = tf.matmul(h_prev, self.prior_kernel)
            p_mu = tf.matmul(prior, self.prior_mu_kernel)
            p_logvar = tf.matmul(prior, self.prior_logvar_kernel)
            
            input_state_concat = tf.concat([x_t, h_prev], axis=1)
            
            pos = tf.matmul(input_state_concat, self.pos_kernel)
            q_mu = tf.matmul(pos, self.encoder_mu_kernel)
            q_logvar = tf.matmul(pos, self.encoder_logvar_kernel)
            z_t = self.sample(p_mu, p_logvar)
            
            inp = tf.concat([x_t, z_t], axis=1)
            _, h_next = super().call(inp, h_prev)
            
            output = (z_t, q_mu, p_mu, q_logvar, p_logvar)
            return output, h_next
    
   
    def get_config(self):
        return {"units":self.units}

In [447]:
def kl_gauss(posterior_means, prior_means, posterior_log_var, prior_log_var):   
    kl = prior_log_var - posterior_log_var + (tf.exp(posterior_log_var) + 
                                       tf.square(posterior_means - prior_means)) / tf.exp(prior_log_var) - 1
    kl = 0.5 * tf.reduce_sum(kl)
    return kl

class VRNNGRU(tf.keras.Model):
    def __init__(self, feature_space, latent_dim, **kwargs):
        super(VRNNGRU, self).__init__(**kwargs)
        vrnn_cell = VRNNCell(3)
        self.latent_dim = latent_dim
        self.vrnn = keras.layers.RNN(vrnn_cell, return_sequences=True)
        self.decoder = keras.layers.TimeDistributed(keras.layers.Dense(feature_space, activation='softmax'))
        
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")
        
    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        if isinstance(data, tuple):
            input_data = data[0]
            output_data = data[1]
            
            timesteps = input_data.shape[1]
            num_feats = input_data.shape[2]
        with tf.GradientTape() as tape:
            outputs = self.vrnn(input_data, training=True)
            z = outputs[0]
            preds = self.decoder(z)
            print(preds)
            
            q_mu = tf.squeeze(tf.squeeze(outputs[1]))
            p_mu = tf.squeeze(tf.squeeze(outputs[2]))
            q_log_var = tf.squeeze(tf.squeeze(outputs[3]))
            p_log_var = tf.squeeze(tf.squeeze(outputs[4]))
            
            kl_loss = tf.reduce_mean(kl_gauss(q_mu, p_mu, q_log_var, p_log_var))
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(tf.keras.losses.categorical_crossentropy(output_data, preds), axis=1)
            )
            total_loss = reconstruction_loss + kl_loss 
            
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)       
        return {
            'total_loss': self.total_loss_tracker.result(),
            'loss': self.reconstruction_loss_tracker.result(),
            'kl': self.kl_loss_tracker.result()
        }

    def call(self, inputs, training=False):
        outputs = self.vrnn(inputs, training)
        return outputs
    
    def generate(self, inputs):
        outputs = self.vrnn(inputs, training=True)
        return outputs
    

In [None]:
from tensorflow import keras

timesteps = X_j_train.shape[1]
latent_dim = 3
feature_space = X_j_train.shape[2]
rvae = VRNNGRU(feature_space, latent_dim)
rvae.compile(optimizer=keras.optimizers.Adam(lr=0.001))
rvae.fit(X_j_train, Y_j_train, batch_size=32, epochs=50)

In [54]:
a = tf.cast(tf.convert_to_tensor([[[1,2,3],[4,5,6]], [[5,7,9], [6,2,8]]]), "float32")
b = a * 8
c = a * 2
d = a * 9

In [61]:
tf.reduce_sum(tf.keras.losses.categorical_crossentropy(a, b), axis=1)

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([22.346254, 38.080425], dtype=float32)>

In [305]:
-1 * tf.ones_like([11,1,1,1])

<tf.Tensor: shape=(4,), dtype=int32, numpy=array([-1, -1, -1, -1], dtype=int32)>

## VRNN-GAN

WHAT WORKS: 
Batch_size = 64
Gen LR: 0.001
Disc LR: 0.0001
Disc Dim: 16
VRNN Dim: 64
Epochs: 50

HIGHLIGHTS:
- discrim fake loss is about 0.5, gen reconstruction accuracy is around 0.14, misled loss about 1.5
- larger batchsize seems to be more helpful in fooling the discriminator
- slowing the LR for the discriminator helps for stabilty
- using BCE with logits over sigmoid output layer
- large enough dimension for VRNN but not too large

In [1274]:
from tensorflow.keras import backend as K
def kl_gauss(posterior_means, prior_means, posterior_log_var, prior_log_var):   
    kl = prior_log_var - posterior_log_var + (tf.exp(posterior_log_var) + 
                                       tf.square(posterior_means - prior_means)) / tf.exp(prior_log_var) - 1
    kl = 0.5 * tf.reduce_sum(kl, axis=[1,2])
    return kl

def wasserstein_loss(y_true, y_pred):
    return tf.reduce_mean(y_true * y_pred)


class kCallback(tf.keras.callbacks.Callback):
    def __init__(self, count, limit):
        self.limit = limit
        self.count = count
    def on_train_batch_end(self, batch, logs={}):
        if self.count == self.limit:
            self.count = 0
        else:
            self.count += 1
        print('k is currently {}'.format(self.count))


class VRNNGRUGAN(tf.keras.Model):
    def __init__(self, feature_space, latent_dim, timesteps, **kwargs):
        super(VRNNGRUGAN, self).__init__(**kwargs)
        vrnn_cell = VRNNCell(latent_dim)
        self.feature_space = feature_space
        self.latent_dim = latent_dim
        self.vrnn = keras.layers.RNN(vrnn_cell, return_sequences=True)
        
        decoder_input = keras.layers.Input(shape=(timesteps, latent_dim))
        decoder_dense = keras.layers.TimeDistributed(keras.layers.Dense(32, activation='relu'))(decoder_input)
        decoder_dense = keras.layers.BatchNormalization()(decoder_dense)
        decoder_output = keras.layers.TimeDistributed(keras.layers.Dense(feature_space, activation='softmax'))(decoder_dense)
        decoder_model = keras.Model(decoder_input, decoder_output)
        self.decoder = decoder_model
        
        
        disc_input = keras.layers.Input(shape=(timesteps, feature_space))
        disc_rnn = keras.layers.Bidirectional(keras.layers.GRU(64, activation='tanh'))(disc_input)
        disc_output = keras.layers.Dense(1, activation='linear')(disc_rnn)
        disc_model = keras.Model(disc_input, disc_output)
        self.discrim = disc_model
        
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.next_step_loss_tracker = keras.metrics.Mean(
            name="next_step_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")
        self.discrim_loss_tracker = keras.metrics.Mean(name="discrim_loss")
        self.discrim_fake_loss_tracker = keras.metrics.Mean(name="discrim_fake_loss")
        self.discrim_real_loss_tracker = keras.metrics.Mean(name="discrim_real_loss")
        self.misled_loss_tracker = keras.metrics.Mean(name="misled_loss")
        
    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
            self.discrim_loss_tracker,
            self.discrim_fake_loss_tracker,
            self.discrim_real_loss_tracker,
            self.misled_loss_tracker,
            self.next_step_loss_tracker
        ]
    def compile(self, vae_optimizer, discrim_optimizer):
        super(VRNNGRUGAN, self).compile()
        self.vae_optimizer = vae_optimizer
        self.discrim_optimizer = discrim_optimizer

    def train_step(self, data):
        if isinstance(data, tuple):
            input_data, match_data = data[0]
            output_data = data[1]
            
        with tf.GradientTape(persistent=True) as tape:
            outputs = self.vrnn(input_data, training=True)
            z = outputs[0]            
            preds = self.decoder(z)
            
            cut_input = tf.slice(input_data, [0,1,0], [-1,-1,-1])
            cut_pred = tf.slice(preds, [0,59,0], [-1,-1,-1])
            fake_seq = tf.concat([cut_input, cut_pred], axis=1)
            
            discrim_fake = self.discrim(fake_seq)
            discrim_real = self.discrim(match_data)
            
            discrim_fake_output = self.discrim(preds)
            discrim_real_output = self.discrim(output_data)
            
            bce_logits = tf.keras.losses.BinaryCrossentropy(from_logits=True)
            discrim_loss_fake = tf.reduce_mean(
                bce_logits(tf.zeros_like(discrim_fake), discrim_fake)
            )
            discrim_loss_real = tf.reduce_mean(
                bce_logits(tf.ones_like(discrim_real), discrim_real)
            )
            
            discrim_output_loss_fake = tf.reduce_mean(
                bce_logits(tf.zeros_like(discrim_fake_output), discrim_fake_output)
            )
        
            discrim_output_loss_real = tf.reduce_mean(
                bce_logits(tf.ones_like(discrim_real_output), discrim_real_output)
            )
             
            discrim_loss = 0.5 * (discrim_output_loss_fake + discrim_output_loss_real + discrim_loss_real + discrim_loss_fake)
        discrim_grads = tape.gradient(discrim_loss, self.discrim.trainable_weights)
        self.discrim_optimizer.apply_gradients(zip(discrim_grads, self.discrim.trainable_weights))
        self.discrim_loss_tracker.update_state(discrim_loss)
        self.discrim_fake_loss_tracker.update_state(discrim_output_loss_fake)
        self.discrim_real_loss_tracker.update_state(discrim_output_loss_real)
        del tape
        
        with tf.GradientTape(persistent=True) as tape:
            outputs = self.vrnn(input_data, training=True)
            z = outputs[0]            
            preds = self.decoder(z)    
            
            cut_input = tf.slice(input_data, [0,1,0], [-1,-1,-1])
            cut_pred = tf.slice(preds, [0,59,0], [-1,-1,-1])
            cut_output = tf.slice(output_data, [0,59,0], [-1, -1, -1])
            fake_seq = tf.concat([cut_input, cut_pred], axis=1)
            discrim_fake = self.discrim(fake_seq)
            discrim_fake_output = self.discrim(preds)

            q_mu = outputs[1]
            p_mu = outputs[2]
            q_log_var = outputs[3]
            p_log_var = outputs[4]


            kl_loss = tf.reduce_mean(kl_gauss(q_mu, p_mu, q_log_var, p_log_var))
            bce_logits = tf.keras.losses.BinaryCrossentropy(from_logits=True)

            reconstruction_loss = tf.reduce_mean(
                tf.keras.losses.categorical_crossentropy(output_data, preds)
            )
            
            next_step_loss = tf.reduce_mean(
                tf.keras.losses.categorical_crossentropy(cut_output, cut_pred)
            )

            mislead_discrim_loss = tf.reduce_mean(
                bce_logits(tf.ones_like(discrim_fake), discrim_fake)
            )
            
            mislead_output_discrim_loss = tf.reduce_mean(
                bce_logits(tf.ones_like(discrim_fake_output), discrim_fake_output) 
            )
            total_loss = next_step_loss + reconstruction_loss + 0.1 * kl_loss + ( mislead_output_discrim_loss + mislead_discrim_loss) 


        encoder_grads = tape.gradient(total_loss, self.vrnn.trainable_weights)
        decoder_grads = tape.gradient(total_loss, self.decoder.trainable_weights)
        encoder_grads, _ = tf.clip_by_global_norm(encoder_grads, 5.0)
        decoder_grads, _ = tf.clip_by_global_norm(decoder_grads, 5.0)

        self.vae_optimizer.apply_gradients(zip(encoder_grads, self.vrnn.trainable_weights))
        self.vae_optimizer.apply_gradients(zip(decoder_grads, self.decoder.trainable_weights))


        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        self.misled_loss_tracker.update_state(mislead_output_discrim_loss)
        self.next_step_loss_tracker.update_state(next_step_loss)
        del tape
            
        return {
            'total_loss': self.total_loss_tracker.result(),
            'loss': self.reconstruction_loss_tracker.result(),
            'kl': self.kl_loss_tracker.result(),
            'discrim_loss':self.discrim_loss_tracker.result(),
            'discrim_loss_fake':self.discrim_fake_loss_tracker.result(),
            'discrim_loss_real':self.discrim_real_loss_tracker.result(),
            'misled_loss':self.misled_loss_tracker.result(),
            'next_step_loss':self.next_step_loss_tracker.result()
        }

    def call(self, inputs):
        outputs = self.vrnn(inputs, training=True)
        z = outputs[0]
        preds = self.decoder(z)
        return preds
    
    def generate(self, inputs):
        outputs = self.vrnn(inputs, training=False)
        z = outputs[0]
        preds = self.decoder(z)
        return preds
    
    def test_step(self, data):
        inputs, match = data[0]
        outputs = data[1]
        preds = self(inputs, training=False)
        recon_loss = tf.keras.losses.categorical_crossentropy(outputs, preds)
        return {
            "loss": recon_loss
        }
        
    

In [1275]:
checkpoint_filepath = 'vrnn_checkpoints/vrnn_gan'

model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=False,
    monitor='loss',
    mode='min',
    save_best_only=True)

timesteps = X_j_train.shape[1]
latent_dim = 64
feature_space = X_j_train.shape[2]
rvae = VRNNGRUGAN(feature_space, latent_dim, timesteps)
rvae.compile(keras.optimizers.Adam(lr=0.001), keras.optimizers.Adam(lr=0.0001))

rvae.fit([X_j_train, match_j], Y_j_train, batch_size=64, epochs=25, validation_split=0.2, callbacks=[model_checkpoint_callback])

Epoch 1/25




TypeError: None has type NoneType, but expected one of: bytes, unicode

In [1270]:
tf.keras.backend.clear_session()

In [1244]:
preds = rvae.predict(X_j_test)
# preds = tf.squeeze(tf.slice(preds, [0,59,0], [-1, -1, -1])).numpy()
# preds = np.argmax(preds, axis=1)
truth = np.argmax(Y_j_test[:,59,:], axis=1)


In [1246]:
a_preds = preds

In [1247]:
a_preds = np.argmax(a_preds[:,59,:], axis=1)

In [1232]:
a_preds

array([ 6,  6,  6, ..., 12, 12, 12])

In [1251]:
np.sum(a_preds == truth) / len(truth)

0.6940651615519805

In [1234]:
inds = np.argwhere(truth == 2)

In [1249]:
a_preds = np.delete(a_preds, np.ndarray.flatten(inds))

In [1250]:
truth = np.delete(truth, np.ndarray.flatten(inds))

In [1237]:
truth

array([23, 23,  6, ..., 12, 12, 12])

In [1238]:
a_preds

array([ 6,  6,  6, ...,  6, 12, 12])

In [1245]:
np.argmax(preds[8], axis=1)

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  6,  6, 12, 12, 12, 12])

In [1240]:
np.argmax(match_j[3], axis=1)

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  1,  6, 12, 12, 12])

In [1088]:
np.argmax(X_j_test[15], axis=1)

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  1, 23,  6, 12])

In [1181]:
np.argmax(Y_j_test[8], axis=1)

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0, 23,  6, 12, 12, 12, 12, 12])

In [733]:
activity_encoder

{'A_SUBMITTED': 3,
 'A_PARTLYSUBMITTED': 4,
 'A_PREACCEPTED': 5,
 'W_Completeren aanvraag': 6,
 'A_ACCEPTED': 7,
 'O_SELECTED': 8,
 'A_FINALIZED': 9,
 'O_CREATED': 10,
 'O_SENT': 11,
 'W_Nabellen offertes': 12,
 'O_SENT_BACK': 13,
 'W_Valideren aanvraag': 14,
 'A_REGISTERED': 15,
 'A_APPROVED': 16,
 'O_ACCEPTED': 17,
 'A_ACTIVATED': 18,
 'O_CANCELLED': 19,
 'W_Wijzigen contractgegevens': 20,
 'A_DECLINED': 21,
 'A_CANCELLED': 22,
 'W_Afhandelen leads': 23,
 'O_DECLINED': 24,
 'W_Nabellen incomplete dossiers': 25,
 'W_Beoordelen fraude': 26,
 'Start': 1,
 'End': 2}

In [925]:
np.argmax(Y_j_test[16])

14

In [933]:
np.argmax(preds[16], axis=1)

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0, 23,  6,  6, 12, 12, 14])

In [936]:
np.argmax(X_j_test[16])

0

In [937]:
np.argmax(X_j_test[16], axis=1)

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  1,  6, 12, 12, 14, 25])

In [1195]:
from sklearn.metrics import f1_score, accuracy_score, balanced_accuracy_score
print("Accuracy:{}".format(accuracy_score(truth, a_preds)))


ValueError: Found input variables with inconsistent numbers of samples: [7397, 8397]

In [140]:
np.unique(preds)

array([ 0,  2, 12, 23, 25])

In [141]:
preds

array([ 2,  0, 23, ..., 12,  2,  2])

In [116]:
X_j_test

array([[[1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.]],

       [[1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [1., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0.

In [17]:
a = tf.convert_to_tensor([0,1,2])

In [45]:
tf.reshape(a, [1,1,3])

<tf.Tensor: shape=(1, 1, 3), dtype=int32, numpy=array([[[0, 1, 2]]], dtype=int32)>

In [50]:
a = tf.convert_to_tensor([[[1,2,3]]])

In [51]:
a

<tf.Tensor: shape=(1, 1, 3), dtype=int32, numpy=array([[[1, 2, 3]]], dtype=int32)>

In [54]:
b = tf.reshape(tf.convert_to_tensor([4,5,6]),[1,3])

In [55]:
tf.concat([a,b], axis=1)

InvalidArgumentError: ConcatOp : Ranks of all input tensors should match: shape[0] = [1,1,3] vs. shape[1] = [1,3] [Op:ConcatV2] name: concat