In [7]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
data = pd.read_csv('data/chicago.csv')

# Contextual Activity
context_data = data[['Case ID', '(case) customer:age', '(case) customer:currently_student', '(case) customer:household_income',
                    '(case) customer:employment_status', '(case) customer:disabled']]

# Journey Data
all_activities = pd.unique(data['Activity'])
activity_encoder = dict(zip(all_activities,range(3,len(all_activities) + 3)))
activity_encoder['Start'] = 1
activity_encoder['End'] = 2
journeys_data = data[['Case ID','Activity']]
journeys_data['Activity Label'] = journeys_data['Activity'].apply(lambda x: activity_encoder[x])
journeys_data = journeys_data.groupby(['Case ID'])['Activity Label'].apply(list)
journeys_data = journeys_data.reset_index(name="Activity")
journeys_data['Activity'] = journeys_data['Activity'].apply(lambda x: [1] + x + [2])
journey_vecs = keras.preprocessing.sequence.pad_sequences(journeys_data['Activity'], padding='post')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  journeys_data['Activity Label'] = journeys_data['Activity'].apply(lambda x: activity_encoder[x])


In [8]:
context = data[['(case) time:DATE_day_of_week_0_is_monday', '(case) customer:employed',
               '(case) customer:currently_student', '(case) customer:valid_license']]
context = pd.get_dummies(context, columns=['(case) time:DATE_day_of_week_0_is_monday', '(case) customer:employed',
                                          '(case) customer:currently_student','(case) customer:valid_license'])
context = pd.concat([data['Case ID'], context], axis=1)
context.drop_duplicates(subset='Case ID', inplace=True)
context.reset_index(inplace=True)

In [10]:
from sklearn.model_selection import train_test_split
id_train, id_test = train_test_split(journeys_data['Case ID'], test_size=0.2)

In [11]:
# Many to one + context
def mto_lstm_prep(journey):
    inp = [journey[:i] for i in range(1,len(journey))]
    out = journey[1:]
    return (inp,out)

def make_data(id_indexes, journey_df, context_df):
    X_j = []
    Y = []
    X_c = []

    selected = journey_df.iloc[id_indexes]

    for index, row in selected.iterrows():
        inp, out = mto_lstm_prep(row['Activity'])
        rep = len(inp)
        c = context_df.iloc[index].drop(labels=['index','Case ID']).to_numpy()
        c = np.tile(c, (rep, 1))
        X_j.extend(inp)
        Y.extend(out)
        X_c.extend(c)
    X_j = keras.preprocessing.sequence.pad_sequences(X_j, padding='pre')
    X_c = np.asarray(X_c).astype("float32")
    Y = np.asarray(Y).astype("float32")
    return (X_j, X_c, Y)

In [12]:
X_j_train, X_c_train, Y_train = make_data(id_train.index, journeys_data, context)
X_j_test, X_c_test, Y_test = make_data(id_test.index, journeys_data, context)

# VRNN Model

In [56]:
import tensorflow as tf
import numpy as np
# Implementing Variational RNN's and variations by subclassing Keras RNN-type Cells

class VRNNCell(tf.keras.layers.GRUCell):
    def __init__(self, units, **kwargs):
        super(VRNNCell, self).__init__(units, **kwargs)
    

    def build(self, input_shape):
        # Taking most of the standard weight initiaalizations from the base GRU class
        super().build((input_shape[0], input_shape[1] + self.units))
        
        self.input_kernel = self.add_weight(shape=(input_shape[-1], input_shape[-1]), initializer='uniform')
        
        self.state_kernel = self.add_weight(shape=(self.units, self.units), initializer='uniform')
        
        self.encoder_mu_kernel = self.add_weight(shape=(input_shape[-1] + self.units, self.units), initializer='uniform')
        
        self.encoder_logvar_kernel = self.add_weight(shape=(input_shape[-1] + self.units, self.units), initializer='uniform')
        
        self.prior_mu_kernel = self.add_weight(shape=(self.units, self.units), initializer='uniform')
        
        self.prior_logvar_kernel = self.add_weight(shape=(self.units, self.units), initializer='uniform')  


    def sample(self, mu, log_var):
        # Sample from unit Normal
        epsilon = tf.random.normal([1, self.units])
        half_constant = tf.convert_to_tensor(np.full((1, self.units), 0.5).astype('float32'))
        # All element-wise computations
        z = tf.math.multiply(half_constant, tf.math.exp(log_var)) + mu
        return z
    
    def call(self, inputs, states, training=False):
        # Some formulations:
        # Generation:
        # z_t ~ N(mu_(0, t), sigma_(0,t)), w here [mu_(0,t), sigma(0,t)] = phi_prior(h_(t-1))
        # Update: 
        # h_t = f_theta(h_(t-1), z_t, x_t) *recurrence equation
        # Inference:
        # z_t ~ N(mu_z, sigma_z), where [mu_z, sigma_z] = phi_post(x_t, h_(t-1))
        #
        # Let the base RNN cell handle the rest and add loss
        
        if training:
            x_t = tf.matmul(inputs, self.input_kernel)
            h_prev = tf.matmul(states[0], self.state_kernel)

            p_mu = tf.matmul(h_prev, self.prior_mu_kernel)
            p_logvar = tf.matmul(h_prev, self.prior_logvar_kernel)
            
            input_state_concat = tf.concat([x_t, h_prev], axis=1)
            
            q_mu = tf.matmul(input_state_concat, self.encoder_mu_kernel)
            q_logvar = tf.matmul(input_state_concat, self.encoder_logvar_kernel)
            z_t = self.sample(q_mu, q_logvar)
            
            inp = tf.concat([x_t, z_t], axis=1)
            _, h_next = super().call(inp, h_prev)
            
            output = (z_t, q_mu, p_mu, q_logvar, p_logvar)
            # self.add_loss(self.kl_gauss(q_mu, p_mu, q_logvar, p_logvar))
            return output, h_next
        
        else:
            # Return prior and posterior parameters
            x_t = inputs
            h_prev = states[0]

            p_mu = tf.matmul(h_prev, self.prior_mu_kernel)
            p_logvar = tf.matmul(h_prev, self.prior_logvar_kernel)
            z_t = self.sample(p_mu, p_logvar)
            
            input_state_concat = tf.concat([x_t, h_prev], axis=1)
            
            q_mu = tf.matmul(input_state_concat, self.encoder_mu_kernel)
            q_logvar = tf.matmul(input_state_concat, self.encoder_logvar_kernel)
            
            
            i = tf.concat([x_t, z_t], axis=1)
            _, h_next = super().call(i, h_prev)
            
            output = (z_t, q_mu, p_mu, q_logvar, p_logvar)
            
            return z_t, h_next
    
   
    def get_config(self):
        return {"units":self.units}

In [88]:
def kl_gauss(posterior_means, prior_means, posterior_log_var, prior_log_var):   
    kl = prior_log_var - posterior_log_var + (tf.exp(posterior_log_var) + 
                                       tf.square(posterior_means - prior_means)) / tf.exp(prior_log_var) - 1
    kl = 0.5 * tf.reduce_sum(kl)
    return kl

class VRNNGRU(tf.keras.Model):
    def __init__(self, vrnn, **kwargs):
        super(VRNNGRU, self).__init__(**kwargs)
        self.vrnn = vrnn
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")
        
    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        if isinstance(data, tuple):
            input_data = data[0]
            output_data = data[1]

        with tf.GradientTape() as tape:
            outputs = self.vrnn(input_data, training=True)
            preds = tf.slice(outputs[0], 
            print(preds)
            q_mu = tf.squeeze(tf.squeeze(outputs[1]))
            p_mu = tf.squeeze(tf.squeeze(outputs[2]))
            q_log_var = tf.squeeze(tf.squeeze(outputs[3]))
            p_log_var = tf.squeeze(tf.squeeze(outputs[4]))
            
            kl_loss = tf.reduce_mean(kl_gauss(q_mu, p_mu, q_log_var, p_log_var))
            reconstruction_loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(output_data, preds))
            total_loss = reconstruction_loss + kl_loss 
            
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)       
        return {
            'total_loss': self.total_loss_tracker.result(),
            'loss': self.reconstruction_loss_tracker.result(),
            'kl': self.kl_loss_tracker.result()
        }

    def call(self, inputs, training=False):
        outputs = self.vrnn(inputs, training)
        return outputs
    



In [89]:
from tensorflow import keras
batch_size = 64
timesteps = 20

cell = VRNNCell(3)
vrnn = keras.layers.RNN(cell, return_sequences=True)

X = np.random.random((batch_size * 1, timesteps, 32))
Y = np.random.random((batch_size * 1, 3))

rvae = VRNNGRU(vrnn)
rvae.compile(optimizer=keras.optimizers.Adam(lr=0.001))


In [90]:
input_1_data = np.random.random((batch_size * 1, timesteps, 32))
target_1_data = np.random.random((batch_size * 1, 3))
rvae.fit(input_1_data, target_1_data, batch_size=1, epochs=50)

Epoch 1/50
Tensor("rnn_23/transpose_1:0", shape=(1, 20, 3), dtype=float32)


ValueError: in user code:

    c:\users\jchle\work\thesis\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:855 train_function  *
        return step_function(self, iterator)
    c:\users\jchle\work\thesis\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:845 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    c:\users\jchle\work\thesis\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1285 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    c:\users\jchle\work\thesis\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2833 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    c:\users\jchle\work\thesis\venv\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3608 _call_for_each_replica
        return fn(*args, **kwargs)
    c:\users\jchle\work\thesis\venv\lib\site-packages\tensorflow\python\keras\engine\training.py:838 run_step  **
        outputs = model.train_step(data)
    <ipython-input-88-e61cf0c866a9>:40 train_step
        reconstruction_loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(output_data, preds))
    c:\users\jchle\work\thesis\venv\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
        return target(*args, **kwargs)
    c:\users\jchle\work\thesis\venv\lib\site-packages\tensorflow\python\keras\losses.py:1643 categorical_crossentropy
        return backend.categorical_crossentropy(
    c:\users\jchle\work\thesis\venv\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
        return target(*args, **kwargs)
    c:\users\jchle\work\thesis\venv\lib\site-packages\tensorflow\python\keras\backend.py:4862 categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)
    c:\users\jchle\work\thesis\venv\lib\site-packages\tensorflow\python\framework\tensor_shape.py:1161 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (1, 3) and (1, 20, 3) are incompatible


In [5]:
cell = VRNNCell(3)
vrnn = keras.layers.RNN(cell)

input_1 = keras.Input((None, 32))
output = vrnn(input_1, training=True)
model = keras.models.Model(input_1, output)
model.compile(optimizer="adam", loss="mse", metrics=["accuracy"])

In [6]:
model.fit(X,Y )



<tensorflow.python.keras.callbacks.History at 0x28123e202e0>

In [44]:
vrnn(input_1_data, training=False)

(<tf.Tensor: shape=(64, 3), dtype=float32, numpy=
 array([[0.50068074, 0.5292831 , 0.49616778],
        [0.46271214, 0.5261049 , 0.5048643 ],
        [0.48982954, 0.5320121 , 0.50850064],
        [0.452219  , 0.518947  , 0.507816  ],
        [0.5122242 , 0.52833474, 0.5048509 ],
        [0.4915202 , 0.5279296 , 0.51092076],
        [0.5003726 , 0.5333755 , 0.5105326 ],
        [0.48690113, 0.5302101 , 0.5068163 ],
        [0.4777823 , 0.52243525, 0.50713694],
        [0.47992167, 0.5169551 , 0.51436675],
        [0.5010537 , 0.5340675 , 0.510652  ],
        [0.4922114 , 0.5282726 , 0.49006072],
        [0.48289537, 0.52634203, 0.5137888 ],
        [0.48125413, 0.52143514, 0.5062611 ],
        [0.4625477 , 0.5223661 , 0.5075189 ],
        [0.48342434, 0.5331997 , 0.50072455],
        [0.4521863 , 0.5157041 , 0.5132326 ],
        [0.4834373 , 0.53055817, 0.50949186],
        [0.48872975, 0.52493936, 0.5047332 ],
        [0.4389698 , 0.50863194, 0.5096451 ],
        [0.48403248, 0.5371503

In [13]:
vrnn(input_1_data, training=False)

(<tf.Tensor: shape=(64, 3), dtype=float32, numpy=
 array([[0.48544663, 0.50757957, 0.4699906 ],
        [0.50136703, 0.50307465, 0.49709392],
        [0.48719135, 0.50693065, 0.4732785 ],
        [0.4811251 , 0.5139315 , 0.45654422],
        [0.48267457, 0.5189062 , 0.45178062],
        [0.4924169 , 0.5194061 , 0.46450275],
        [0.4944159 , 0.5097243 , 0.47875574],
        [0.4922774 , 0.5230228 , 0.4596571 ],
        [0.4844971 , 0.51291144, 0.46177593],
        [0.4838476 , 0.52520555, 0.44552037],
        [0.4860503 , 0.50793403, 0.46988666],
        [0.48626116, 0.51788074, 0.45810208],
        [0.5016757 , 0.51223445, 0.48537055],
        [0.4957752 , 0.527188  , 0.4598345 ],
        [0.48279372, 0.5172111 , 0.45413494],
        [0.4909631 , 0.5158375 , 0.46712944],
        [0.5005782 , 0.5119957 , 0.48472178],
        [0.48909238, 0.52358216, 0.45513284],
        [0.4848975 , 0.51882964, 0.4550278 ],
        [0.49085367, 0.5058539 , 0.47870076],
        [0.48968858, 0.500719 