In [2]:
import os 
import pm4py
from pm4py.objects.conversion.log import converter as log_converter
import pandas as pd

curr_path = os.path.abspath('')
folder_path = os.path.join(curr_path, 'data')
filepath = os.path.join(folder_path, 'BPI_Challenge_2012.xes')
log = pm4py.read_xes(filepath)
df = log_converter.apply(log, variant=log_converter.Variants.TO_DATA_FRAME)

parsing log, completed traces ::   0%|          | 0/13087 [00:00<?, ?it/s]

In [3]:
unique_activities = pd.unique(df['concept:name'])
activity_encoder = dict(zip(unique_activities,range(3,len(unique_activities) + 3)))
activity_encoder['Start'] = 1
activity_encoder['End'] = 2
df['concept:encoded'] = df['concept:name'].apply(lambda x: activity_encoder[x])

In [4]:
import pandas as pd

def process_arrays(df, complete=False, W=False):
    proc_df = df
    if complete:
        proc_df = proc_df[proc_df['lifecycle:transition'] == 'COMPLETE']
    if W:
        proc_df = proc_df[proc_df['concept:name'].str.startswith('W_')]
    
    arrays = proc_df.groupby(['case:concept:name']).agg(list)
    arrays.reset_index(inplace=True)
    arrays['time:interarrival_min'] = arrays['time:timestamp'].apply(lambda x: [0] + [0] + [((x[i+1] - x[i]).total_seconds() / 60) for i in range(len(x)-1)] + [0])
    arrays['concept:encoded'] = arrays['concept:encoded'].apply(lambda x: [1] + x + [2])
    return arrays

In [5]:
arrays = process_arrays(df, complete=True, W=True)

In [6]:
from sklearn.model_selection import train_test_split, KFold
import numpy as np
# sampled_arrays = arrays['case:concept:name'].sample(n= 100000)

kf = KFold(n_splits=3)
splits = []
for train_index, test_index in kf.split(arrays['case:concept:name']):
    id_tr = arrays['case:concept:name'].iloc[train_index]
    id_te = arrays['case:concept:name'].iloc[test_index]
    splits.append((id_tr, id_te))

In [7]:
i = 0
id_train, id_test = splits[i]

In [26]:
from tensorflow import keras
from keras.utils import to_categorical
from tensorflow.keras import layers
import numpy as np
# Many to one + context


activity_decoder = {v:k for k,v in activity_encoder.items()}

def many_to_one_prep(journey):
    """
    Setting up a many-to-one scheme.
    Function Input is a complete journey.
    Returns an RNN input of a history of activities, output is the following activity
    """
    inp = [journey[:i] for i in range(1,len(journey))]
    out = journey[1:]
    return (inp,out)

def many_to_many_prep(journey):
    """
    Setting up a many-to-many scheme shifted by 1.  
    Function input is a complete journey.
    Returns an RNN input of a history of activities omitting the final activity, output is the same history shifted by 1 timeslot (omitting the first activity)
    """
    inp = journey[:-1]
    out = journey[1:]
    return(inp, out)

def many_to_one_make_data(id_indexes, arrays_df):
    """
    Creates training and testing sets for an RNN model.  
    Function input are indices of a selected subset of data, a DataFrame consisting of aggregated list-like journey data, and a data preparation method.
    Returns a tuple of training and testing data for journeys and inter-arrival times.
    """
    X_j = []
    Y_j = []
    X_t = []
    Y_t = []

    selected = arrays_df[arrays_df["case:concept:name"].isin(id_indexes)]

    for index, row in selected.iterrows():
        j_inp, j_out = many_to_many_prep(row['concept:encoded'])
        t_inp, t_out = many_to_many_prep(row['time:interarrival_min'])
        X_j.append(j_inp)
        X_t.append(t_inp)
        Y_j.append(j_out)
        Y_t.append(t_out)
    X_j = keras.preprocessing.sequence.pad_sequences(X_j, padding='pre', maxlen=60)
    X_j = to_categorical(X_j)
    X_t = keras.preprocessing.sequence.pad_sequences(X_t, padding='pre', maxlen=60)
    Y_j = keras.preprocessing.sequence.pad_sequences(Y_j, padding='pre', maxlen=60)
    Y_j = to_categorical(Y_j)
    Y_t = keras.preprocessing.sequence.pad_sequences(Y_t, padding='pre', maxlen=60)
    return (X_j, X_t, Y_j, Y_t)

In [27]:
X_j_train, X_t_train, Y_j_train, Y_t_train = many_to_one_make_data(id_train.values, arrays)
X_j_test, X_t_test, Y_j_test, Y_t_test = many_to_one_make_data(id_test.values, arrays)

# VRNN Model (Runs properly)

In [47]:
import tensorflow as tf
import numpy as np
# Implementing Variational RNN's and variations by subclassing Keras RNN-type Cells

class VRNNCell(tf.keras.layers.GRUCell):
    def __init__(self, units, **kwargs):
        super(VRNNCell, self).__init__(units, **kwargs)
    

    def build(self, input_shape):
        # Taking most of the standard weight initiaalizations from the base GRU class
        super().build((input_shape[0], input_shape[1] + self.units))
        
        self.input_kernel = self.add_weight(shape=(input_shape[-1], input_shape[-1]), initializer='uniform')
        
        self.state_kernel = self.add_weight(shape=(self.units, self.units), initializer='uniform')
        
        self.encoder_mu_kernel = self.add_weight(shape=(input_shape[-1] + self.units, self.units), initializer='uniform')
        
        self.encoder_logvar_kernel = self.add_weight(shape=(input_shape[-1] + self.units, self.units), initializer='uniform')
        
        self.prior_mu_kernel = self.add_weight(shape=(self.units, self.units), initializer='uniform')
        
        self.prior_logvar_kernel = self.add_weight(shape=(self.units, self.units), initializer='uniform')  


    def sample(self, mu, log_var):
        # Sample from unit Normal
        epsilon = tf.random.normal([1, self.units])
        half_constant = tf.convert_to_tensor(np.full((1, self.units), 0.5).astype('float32'))
        # All element-wise computations
        z = tf.math.multiply(half_constant, tf.math.exp(log_var)) + mu
        return z
    
    def call(self, inputs, states, training=False):
        # Some formulations:
        # Generation:
        # z_t ~ N(mu_(0, t), sigma_(0,t)), w here [mu_(0,t), sigma(0,t)] = phi_prior(h_(t-1))
        # Update: 
        # h_t = f_theta(h_(t-1), z_t, x_t) *recurrence equation
        # Inference:
        # z_t ~ N(mu_z, sigma_z), where [mu_z, sigma_z] = phi_post(x_t, h_(t-1))
        #
        # Let the base RNN cell handle the rest and add loss
        
        if training:
            x_t = tf.matmul(inputs, self.input_kernel)
            h_prev = tf.matmul(states[0], self.state_kernel)

            p_mu = tf.matmul(h_prev, self.prior_mu_kernel)
            p_logvar = tf.matmul(h_prev, self.prior_logvar_kernel)
            
            input_state_concat = tf.concat([x_t, h_prev], axis=1)
            
            q_mu = tf.matmul(input_state_concat, self.encoder_mu_kernel)
            q_logvar = tf.matmul(input_state_concat, self.encoder_logvar_kernel)
            z_t = self.sample(q_mu, q_logvar)
            
            inp = tf.concat([x_t, z_t], axis=1)
            _, h_next = super().call(inp, h_prev)
            
            output = (z_t, q_mu, p_mu, q_logvar, p_logvar)
            # self.add_loss(self.kl_gauss(q_mu, p_mu, q_logvar, p_logvar))
            return output, h_next
        
        else:
            # Return prior and posterior parameters
            x_t = inputs
            h_prev = states[0]

            p_mu = tf.matmul(h_prev, self.prior_mu_kernel)
            p_logvar = tf.matmul(h_prev, self.prior_logvar_kernel)
            z_t = self.sample(p_mu, p_logvar)
            
            input_state_concat = tf.concat([x_t, h_prev], axis=1)
            
            q_mu = tf.matmul(input_state_concat, self.encoder_mu_kernel)
            q_logvar = tf.matmul(input_state_concat, self.encoder_logvar_kernel)
            
            
            i = tf.concat([x_t, z_t], axis=1)
            _, h_next = super().call(i, h_prev)
            
            output = (z_t, q_mu, p_mu, q_logvar, p_logvar)
            
            return z_t, h_next
    
   
    def get_config(self):
        return {"units":self.units}

In [50]:
def kl_gauss(posterior_means, prior_means, posterior_log_var, prior_log_var):   
    kl = prior_log_var - posterior_log_var + (tf.exp(posterior_log_var) + 
                                       tf.square(posterior_means - prior_means)) / tf.exp(prior_log_var) - 1
    kl = 0.5 * tf.reduce_sum(kl)
    return kl

class VRNNGRU(tf.keras.Model):
    def __init__(self, feature_space, latent_dim, **kwargs):
        super(VRNNGRU, self).__init__(**kwargs)
        vrnn_cell = VRNNCell(3)
        self.latent_dim = latent_dim
        self.vrnn = keras.layers.RNN(vrnn_cell, return_sequences=True)
        self.decoder = keras.layers.TimeDistributed(keras.layers.Dense(feature_space, activation='softmax'))
        
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")
        
    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        if isinstance(data, tuple):
            input_data = data[0]
            output_data = data[1]
            
            timesteps = input_data.shape[1]
            num_feats = input_data.shape[2]
        with tf.GradientTape() as tape:
            outputs = self.vrnn(input_data, training=True)
            z = outputs[0]
            preds = self.decoder(z)
            print(preds)
            
            q_mu = tf.squeeze(tf.squeeze(outputs[1]))
            p_mu = tf.squeeze(tf.squeeze(outputs[2]))
            q_log_var = tf.squeeze(tf.squeeze(outputs[3]))
            p_log_var = tf.squeeze(tf.squeeze(outputs[4]))
            
            kl_loss = tf.reduce_mean(kl_gauss(q_mu, p_mu, q_log_var, p_log_var))
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(tf.keras.losses.categorical_crossentropy(output_data, preds), axis=1)
            )
            total_loss = reconstruction_loss + kl_loss 
            
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)       
        return {
            'total_loss': self.total_loss_tracker.result(),
            'loss': self.reconstruction_loss_tracker.result(),
            'kl': self.kl_loss_tracker.result()
        }

    def call(self, inputs, training=False):
        outputs = self.vrnn(inputs, training)
        return outputs
    
    def generate(self, inputs):
        outputs = self.vrnn(inputs, training=True)
        return outputs
    

In [51]:
from tensorflow import keras

timesteps = X_j_train.shape[1]
latent_dim = 3
feature_space = X_j_train.shape[2]
rvae = VRNNGRU(feature_space, latent_dim)
rvae.compile(optimizer=keras.optimizers.Adam(lr=0.001))
rvae.fit(X_j_train, Y_j_train, batch_size=32, epochs=50)

Epoch 1/50
Tensor("time_distributed_6/Reshape_1:0", shape=(None, 60, 27), dtype=float32)
Tensor("time_distributed_6/Reshape_1:0", shape=(None, 60, 27), dtype=float32)
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x1884a8290>