### MDN References:

[useful pytorch reference](https://github.com/tonyduan/mixture-density-network)

[keras version](https://github.com/cpmpercussion/keras-mdn-layer)

[another keras version](https://github.com/omimo/Keras-MDN/blob/master/kmdn/mdn.py)


In [1]:
import MDN
import tensorflow as tf
import tensorflow.keras.backend as K
import pandas as pd
from ProcessTrueStateActionData import read_df_in_chunks

### Load and preprocess the data 
(produce tf train+test datasets)

In [2]:
batch_size = 64
train_test_split = 0.8

LATENT_SIZE = 8 # (mdn output_dimension)
NUMBER_MIXTURES = 5

EPOCHS = 1

  and should_run_async(code)


In [3]:
def get_columns_for_training():
    true_states = ["pre","blue","red"]
    ts_columns = {}
    for true_state in true_states:
        ts_columns[true_state] = []
        for node in range(13):
            ts_columns[true_state].append(f"{node}_ts_{true_state}_known_status")
            ts_columns[true_state].append(f"{node}_ts_{true_state}_access_status")
    return ts_columns

In [4]:
cols_dict = get_columns_for_training()
pre_cols, blue_cols, red_cols = cols_dict["pre"], cols_dict["blue"], cols_dict["red"]
all_cols = pre_cols + blue_cols + red_cols

In [5]:
df=pd.read_parquet("csv_data/TrueStatesObsActsRwds_1221_4000_B_Line.parquet")
df = df[all_cols].astype("category")

In [6]:
df.memory_usage(deep=True)

  and should_run_async(code)


Index                      39072000
0_ts_pre_known_status       4884124
0_ts_pre_access_status      4884116
1_ts_pre_known_status       4884132
1_ts_pre_access_status      4884132
                             ...   
10_ts_red_access_status     4884132
11_ts_red_known_status      4884124
11_ts_red_access_status     4884132
12_ts_red_known_status      4884124
12_ts_red_access_status     4884132
Length: 79, dtype: int64

In [7]:
train_df=df.sample(frac=train_test_split,random_state=42)
train_pre_df = train_df[pre_cols]
train_blue_df = train_df[blue_cols]
train_red_df = train_df[red_cols]

test_df=df.drop(train_df.index)
test_pre_df = test_df[pre_cols]
test_blue_df = test_df[blue_cols]
test_red_df = test_df[red_cols]

train_size = train_df.shape[0]
test_size = test_df.shape[0]

  and should_run_async(code)


In [8]:
print(train_size)

3907200


  and should_run_async(code)


In [9]:
train_dataset = (tf.data.Dataset.from_tensor_slices(((train_pre_df.values,train_blue_df.values),train_red_df.values)).batch(batch_size))
test_dataset = (tf.data.Dataset.from_tensor_slices(((test_pre_df.values,test_blue_df.values),test_red_df.values)).shuffle(test_size).batch(1))

In [10]:
for row in train_dataset.take(1):
  print(row)

((<tf.Tensor: shape=(64, 26), dtype=int64, numpy=
array([[0, 0, 0, ..., 0, 1, 0],
       [0, 0, 2, ..., 0, 2, 2],
       [0, 0, 0, ..., 0, 1, 0],
       ...,
       [0, 0, 2, ..., 2, 1, 0],
       [0, 0, 2, ..., 2, 1, 0],
       [1, 0, 2, ..., 0, 2, 2]])>, <tf.Tensor: shape=(64, 26), dtype=int64, numpy=
array([[0, 0, 0, ..., 0, 1, 0],
       [0, 0, 2, ..., 0, 2, 2],
       [0, 0, 0, ..., 0, 1, 0],
       ...,
       [0, 0, 2, ..., 2, 1, 0],
       [0, 0, 2, ..., 2, 1, 0],
       [1, 0, 2, ..., 0, 2, 2]])>), <tf.Tensor: shape=(64, 26), dtype=int64, numpy=
array([[0, 0, 0, ..., 0, 1, 0],
       [0, 0, 2, ..., 0, 2, 2],
       [0, 0, 0, ..., 0, 1, 0],
       ...,
       [0, 0, 2, ..., 2, 1, 0],
       [0, 0, 2, ..., 2, 1, 0],
       [1, 0, 2, ..., 0, 2, 2]])>)


### Create an MDN based model with pretrained encoder/decoder layers

In [11]:
class RedTSPrediction(tf.keras.Model):

    def __init__(self, vae_path, latent_size, num_mixtures):
        super().__init__()
        self.ts_vae = tf.keras.models.load_model(vae_path)
        self.ts_vae.trainable = False
        self.encoder = self.ts_vae.encoder
        self.decoder = self.ts_vae.decoder

        self.ts_dense = tf.keras.layers.Dense(128, activation=tf.nn.relu)
        self.cross_dense = tf.keras.layers.Dense(2048, activation=tf.nn.relu)

        self.mdn = MDN.MDN(output_dimension=latent_size, num_mixtures=num_mixtures)

    def encode(self, x):
        mean, logvar = self.ts_vae.encode(x)
        z = self.ts_vae.reparameterize(mean, logvar)
        return z
    
    def call(self, inputs):
        pre_ts = inputs[0]
        blue_ts = inputs[1]
        
        pre_ts_oh = tf.reshape(tf.one_hot(pre_ts,3),(-1,78))
#         pre_ts_access = tf.reshape(tf.one_hot(pre_ts[:,13:],3),(-1,39))
        
        blue_ts_oh = tf.reshape(tf.one_hot(blue_ts,3),(-1,78))
#         blue_ts_access = tf.reshape(tf.one_hot(blue_ts[:,13:],3),(-1,39))
        
#         print(pre_ts[:,:])
#         print(pre_ts_oh)
#         print(blue_ts)
#         blue_ts_kn = K.print_tensor(blue_ts[:,:13], message='blue known = ')
#         blue_ts_known = K.print_tensor(blue_ts_known[:,:13], message='blue known OH = ')
#         print(blue_ts_known.shape)

        pre_ts_encoded = self.ts_dense(self.encode(pre_ts_oh))
        blue_ts_encoded = self.ts_dense(self.encode(blue_ts_oh))

        combined = tf.keras.layers.concatenate([pre_ts_encoded, blue_ts_encoded])

        combined_hidden = self.cross_dense(combined)

        mdn_out = self.mdn(combined_hidden)

        return mdn_out

    def decode(self, latent_pred):
        return self.decoder(latent_pred)
        


In [13]:
# def get_mixture_loss_func(output_dim, num_mixes, model_encode):
#     """Construct a loss functions for the MDN layer parametrised by number of mixtures."""
#     # Construct a loss function with the right number of mixtures and outputs
#     def mdn_loss_func(y_true, y_pred):
        
#         encoded_y_true = model_encode(y_true)
        
#         # Reshape inputs in case this is used in a TimeDistribued layer
#         y_pred = tf.reshape(y_pred, [-1, (2 * num_mixes * output_dim) + num_mixes], name='reshape_ypreds')
#         y_true = tf.reshape(encoded_y_true, [-1, output_dim], name='reshape_ytrue')
#         # Split the inputs into paramaters
#         out_mu, out_sigma, out_pi = tf.split(y_pred, num_or_size_splits=[num_mixes * output_dim,
#                                                                          num_mixes * output_dim,
#                                                                          num_mixes],
#                                              axis=-1, name='mdn_coef_split')
#         # Construct the mixture models
#         cat = tfd.Categorical(logits=out_pi)
#         component_splits = [output_dim] * num_mixes
#         mus = tf.split(out_mu, num_or_size_splits=component_splits, axis=1)
#         sigs = tf.split(out_sigma, num_or_size_splits=component_splits, axis=1)
#         coll = [tfd.MultivariateNormalDiag(loc=loc, scale_diag=scale) for loc, scale
#                 in zip(mus, sigs)]
#         mixture = tfd.Mixture(cat=cat, components=coll)
#         loss = mixture.log_prob(y_true)
#         loss = tf.negative(loss)
#         loss = tf.reduce_mean(loss)
#         return loss

In [12]:
red_ts_predictor = RedTSPrediction('models/trueStateVAE_7_L8', latent_size=LATENT_SIZE, num_mixtures=NUMBER_MIXTURES)



### Training loop

In [13]:
for row in test_dataset.take(1):
#   print(row)
  out = red_ts_predictor(row[0])
  print(out)

  and should_run_async(code)


tf.Tensor(
[[-0.04560362  0.1059904   0.0295794  -0.08081593  0.13998106 -0.20732507
  -0.07886654 -0.06673503 -0.07052223 -0.03862421  0.32742622  0.00316813
   0.1123829   0.00564317 -0.07104158  0.1187254   0.22052075 -0.13135926
  -0.28310847  0.12735707  0.0231918   0.10797485 -0.08647911  0.01596696
  -0.02562372  0.03188619  0.00454077  0.14835453  0.18995728  0.07866741
   0.24032064  0.21614648 -0.0921552  -0.27748036 -0.129478   -0.16879967
  -0.05060436 -0.04307874  0.1046778   0.23691055  0.883837    0.9813433
   1.0224891   1.0195285   1.2468647   1.159755    0.90905154  0.95568764
   0.79917455  0.8934724   0.88116556  0.7583916   1.070764    1.0735124
   0.91242427  1.3468823   0.9979929   0.90690356  1.1202555   0.9944802
   0.93671894  0.9106967   0.89256805  1.1723987   1.1330614   0.9654097
   1.0642014   0.9894095   0.94632083  1.2603776   1.2378305   0.9284266
   1.0535481   1.1399404   1.1566311   0.9621086   0.90264946  1.1852226
   1.1606067   1.2178311  -0.1930

In [31]:
@tf.function
def compute_loss(model, x, y, loss_func):
    out = model(x)
    
    y_oh = tf.reshape(tf.one_hot(y,3),(-1,78))
    y_encoded = model.encode(y_oh)
    
    loss = loss_func(y_encoded, out)
    
    return loss
    

@tf.function
def train_step(model, x, y, optimizer, loss_func):
    """Executes one training step and returns the loss.

    This function computes the loss and gradients, and uses the latter to
    update the model's parameters.
    """
#     y_encoded = model.encode(y)
    with tf.GradientTape() as tape:
        loss = compute_loss(model, x, y, loss_func)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

In [32]:
# red_ts_predictor.compile(loss=get_mixture_loss_func(LATENT_SIZE,NUMBER_MIXTURES,red_ts_predictor.encode), optimizer=tf.keras.optimizers.Adam(),metrics=['mean_squared_error'])
# red_ts_predictor.build(((1,78),(1,78)))
red_ts_predictor.summary()


Model: "red_ts_prediction"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 true_state_vae_4 (TrueState  multiple                 121736094 
 VAE)                                                            
                                                                 
 sequential_8 (Sequential)   (None, 16)                60892016  
                                                                 
 sequential_9 (Sequential)   (None, 78)                60844078  
                                                                 
 dense (Dense)               multiple                  1152      
                                                                 
 dense_1 (Dense)             multiple                  526336    
                                                                 
 mdn (MDN)                   multiple                  174165    
                                                 

In [33]:
loss_func = MDN.get_mixture_loss_func(LATENT_SIZE,NUMBER_MIXTURES)
optimizer = tf.keras.optimizers.Adam(1e-4)

In [40]:
import time
from IPython import display

EPOCHS = 20
for epoch in range(1, EPOCHS + 1):
    count = 0
    start_time = time.time()
    for train_x, train_y in train_dataset.take(10):
        if (count %1000) == 0:
            print(f"{count}={count*batch_size} samples")
        train_step(red_ts_predictor, train_x, train_y, optimizer, loss_func)
        count += 1
    end_time = time.time()

    loss = tf.keras.metrics.Mean()
    for test_x, test_y in train_dataset.take(10):#test_dataset:
        loss(compute_loss(red_ts_predictor, test_x, test_y, loss_func))
    loss = loss.result()
    display.clear_output(wait=False)
    print('Epoch: {}, Test set loss: {}, time elapse for current epoch: {}'
        .format(epoch, loss, end_time - start_time))

Epoch: 20, Test set loss: 4.044805526733398, time elapse for current epoch: 1.1332051753997803


In [34]:
# history = red_ts_predictor.fit(train_dataset, batch_size=64, epochs=1)

ValueError: in user code:

    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1249, in train_function  *
        return step_function(self, iterator)
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1233, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1222, in run_step  **
        outputs = model.train_step(data)
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 1025, in train_step
        self._validate_target_and_loss(y, loss)
    File "/usr/local/lib/python3.8/dist-packages/keras/engine/training.py", line 989, in _validate_target_and_loss
        raise ValueError(

    ValueError: No loss found. You may have forgotten to provide a `loss` argument in the `compile()` method.


### Test and evaluate