## True State VAE Scalar

Trying a version of the VAE where categories ids 1-3 are scaled to (0,1) using (id-0.5)/3
Wondering if this may work as the categories are essentially ordinal
    (unknown=>known=>scanned)
    (no_access=>user=>privileged)
This could work better because the VAE is designed for scalar values (images) and I could not get the multi-task cat cross entropy to work in the one hot VAE attempt.

In [1]:
!pip install tensorflow-probability
from IPython import display

import glob
import imageio
import matplotlib.pyplot as plt
import numpy as np
import PIL
import tensorflow as tf
import tensorflow_probability as tfp
import time

from ray.rllib.offline.json_reader import JsonReader
import numpy_indexed as npi
import pandas as pd
from true_state_viewer import TrueStateTreeGraphViz, display_tree_pairs

Collecting tensorflow-probability
  Downloading tensorflow_probability-0.19.0-py2.py3-none-any.whl (6.7 MB)
[K     |████████████████████████████████| 6.7 MB 38.4 MB/s eta 0:00:01
Installing collected packages: tensorflow-probability
Successfully installed tensorflow-probability-0.19.0
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [38]:
# Code from https://www.tensorflow.org/tutorials/generative/cvae
class TrueStateVAE(tf.keras.Model):
  """Convolutional variational autoencoder."""

  def __init__(self, latent_dim):
    super(TrueStateVAE, self).__init__()
    self.latent_dim = latent_dim
    self.encoder = tf.keras.Sequential(
        [
            tf.keras.layers.InputLayer(input_shape=(26)),
#             tf.keras.layers.Dense(512, activation="relu"),
#             tf.keras.layers.Dense(512, activation="relu"),
            tf.keras.layers.Dense(512, activation="relu"),
            tf.keras.layers.Dense(256, activation="relu"),
            tf.keras.layers.Dense(128, activation="relu"),
            # No activation
            tf.keras.layers.Dense(latent_dim + latent_dim)
        ]
    )

    self.decoder = tf.keras.Sequential(
        [
            tf.keras.layers.InputLayer(input_shape=(latent_dim,)),
            tf.keras.layers.Dense(128, activation="relu"),
            tf.keras.layers.Dense(256, activation="relu"),
            tf.keras.layers.Dense(512, activation="relu"),
#             tf.keras.layers.Dense(512, activation="relu"),
#             tf.keras.layers.Dense(512, activation="relu"),
            tf.keras.layers.Dense(26)
        ]
    )

  @tf.function
  def sample(self, eps=None):
    if eps is None:
      eps = tf.random.normal(shape=(100, self.latent_dim))
    return self.decode(eps, apply_sigmoid=True)

  def encode(self, x):
    mean, logvar = tf.split(self.encoder(x), num_or_size_splits=2, axis=1)
    return mean, logvar

  def reparameterize(self, mean, logvar):
    eps = tf.random.normal(shape=mean.shape)
    return eps * tf.exp(logvar * .5) + mean

  def decode(self, z, apply_sigmoid=False):
    logits = self.decoder(z)
    if apply_sigmoid:
      probs = tf.sigmoid(logits)
      return probs
    return logits

  and should_run_async(code)


In [39]:
optimizer = tf.keras.optimizers.Adam(1e-4)


def log_normal_pdf(sample, mean, logvar, raxis=1):
  log2pi = tf.math.log(2. * np.pi)
  return tf.reduce_sum(
      -.5 * ((sample - mean) ** 2. * tf.exp(-logvar) + logvar + log2pi),
      axis=raxis)


def compute_loss(model, x):
  mean, logvar = model.encode(x)
  z = model.reparameterize(mean, logvar)
  x_logit = model.decode(z)
  cross_ent = tf.nn.sigmoid_cross_entropy_with_logits(logits=x_logit, labels=x)
  
  logpx_z = -tf.reduce_sum(cross_ent, axis=[1])

  logpz = log_normal_pdf(z, 0., 0.)
  logqz_x = log_normal_pdf(z, mean, logvar)
  return -tf.reduce_mean(logpx_z + logpz - logqz_x)


@tf.function
def train_step(model, x, optimizer):
  """Executes one training step and returns the loss.

  This function computes the loss and gradients, and uses the latter to
  update the model's parameters.
  """
  with tf.GradientTape() as tape:
    loss = compute_loss(model, x)
  gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

In [40]:
epochs = 50
# set the dimensionality of the latent space to a plane for visualization later
latent_dim = 4
# latent_dim = 4
# num_examples_to_generate = 16

# # keeping the random vector constant for generation (prediction) so
# # it will be easier to see the improvement.
# random_vector_for_generation = tf.random.normal(
#     shape=[num_examples_to_generate, latent_dim])
true_state_model = TrueStateVAE(latent_dim)

# train_size = 4445
train_test_split = 0.98
batch_size = 128

In [41]:
meander = pd.read_csv('csv_data/TrueStates_200_4000_Meander.csv')
bline = pd.read_csv('csv_data/TrueStates_1221_4000_B_Line.csv')
badbluemeander = pd.read_csv('csv_data/TrueStates_200_4000_Meander_badblue.csv')

dataset = pd.concat([meander, bline, badbluemeander], ignore_index=True)
dataset = dataset.drop_duplicates()

print(f"number of rows = {meander.shape[0]}")
print(f"number of rows = {bline.shape[0]}")
print(f"number of rows = {badbluemeander.shape[0]}")
print(f"number of rows = {dataset.shape[0]}")
# dataset=bline

number of rows = 4445
number of rows = 305
number of rows = 7972
number of rows = 12716


In [42]:
new_df = pd.DataFrame()
for i in range(13):
    new_df[f"{i}_known"] = (dataset[f"{i}_unknown"] + (dataset[f"{i}_known"]*2) + (dataset[f"{i}_scanned"]*3)-0.5)/3
    new_df[f"{i}_access"] = (dataset[f"{i}_none"] + (dataset[f"{i}_user"]*2) + (dataset[f"{i}_privileged"]*3)-0.5)/3
dataset = new_df

  and should_run_async(code)


In [43]:
dataset.head()

  and should_run_async(code)


Unnamed: 0,0_known,0_access,1_known,1_access,2_known,2_access,3_known,3_access,4_known,4_access,...,8_known,8_access,9_known,9_access,10_known,10_access,11_known,11_access,12_known,12_access
0,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,...,0.5,0.833333,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667
1,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,...,0.5,0.833333,0.5,0.166667,0.5,0.166667,0.5,0.166667,0.5,0.166667
2,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,...,0.5,0.833333,0.833333,0.166667,0.5,0.166667,0.5,0.166667,0.5,0.166667
3,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,...,0.833333,0.833333,0.833333,0.166667,0.5,0.166667,0.5,0.166667,0.5,0.166667
4,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,0.166667,...,0.833333,0.833333,0.833333,0.166667,0.5,0.166667,0.5,0.166667,0.833333,0.166667


In [44]:
train_df=dataset.sample(frac=train_test_split,random_state=200)
test_df=dataset.drop(train_df.index)

train_size = train_df.shape[0]

  and should_run_async(code)


In [45]:
train_dataset = (tf.data.Dataset.from_tensor_slices((train_df.iloc[:,:].values)).shuffle(train_size).batch(batch_size))
test_dataset = (tf.data.Dataset.from_tensor_slices((test_df.iloc[:,:].values)).shuffle(train_size).batch(1))

In [46]:

for epoch in range(1, epochs + 1):
  start_time = time.time()
  for train_x in train_dataset:
    train_x = tf.cast(train_x,tf.float32)
    train_step(true_state_model, train_x, optimizer)
  end_time = time.time()

  loss = tf.keras.metrics.Mean()
  for test_x in test_dataset:
    test_x = tf.cast(test_x,tf.float32)
    loss(compute_loss(true_state_model, test_x))
  elbo = -loss.result()
  display.clear_output(wait=False)
  print('Epoch: {}, Test set ELBO: {}, time elapse for current epoch: {}'
        .format(epoch, elbo, end_time - start_time))
#   generate_and_save_images(model, epoch, test_sample)

Epoch: 50, Test set ELBO: -14.92189884185791, time elapse for current epoch: 0.26542139053344727


In [47]:
def get_state_pred_pair(model, state):
  state = tf.cast(state, tf.float32)
  mean, logvar = model.encode(state)
  z = model.reparameterize(mean, logvar)
  predictions = model.sample(z)
    
  state_scaled = np.rint(((state*3)+0.5).numpy()).reshape(-1).astype(np.int64)-1
  predictions_scaled = np.rint(((predictions*3)+0.5).numpy()).reshape(-1).astype(np.int64)-1
#   print(state)
#   print(state_scaled)
  state_oh = np.eye(3)[state_scaled].reshape(-1,2,3)
  pred_oh = np.eye(3)[predictions_scaled].reshape(-1,2,3)
#   print(state_oh)
#   state_oh = tf.reshape(state,(-1,2)) #tf.one_hot(tf.argmax(tf.reshape(state,(-1,3)),axis=1),depth=3)
#   pred_oh = tf.one_hot(tf.argmax(tf.nn.softmax(tf.reshape(predictions,(-1,2,3))),axis=2),depth=3)
#   print(f"{state_oh.shape}, {pred_oh.shape}")
  return state_oh, pred_oh

In [50]:
# test_dataset = (tf.data.Dataset.from_tensor_slices((dataset.iloc[:200,1:].values)).shuffle(train_size).batch(batch_size))
train_dataset2 = (tf.data.Dataset.from_tensor_slices((train_df.iloc[:,:].values)).shuffle(train_size).batch(1))

total_matches = 0
total = 0
nodes = 0
sum_diffs_sqrd = 0
state_pred_pairs = []
state_pred_pair_tree_vis = []
for test_x in train_dataset2:
    total +=1
    state_oh, pred_oh = get_state_pred_pair(true_state_model, test_x)
    
    state_pred_pairs.append([state_oh, pred_oh])
    state_pred_pair_tree_vis.append([TrueStateTreeGraphViz(state_oh), TrueStateTreeGraphViz(pred_oh)])
    
    diffs = np.rint(state_oh) - np.rint(pred_oh)
#     diffs = get_state_diff(true_state_model,test_x)
    nodes += len(diffs.flatten())
    diffs_sqrd = np.sum(diffs*diffs)
    sum_diffs_sqrd += diffs_sqrd
    if not diffs_sqrd >0:
#       print(diffs)
#     else:
      total_matches += 1
#       print("Match")
#       print(diffs)

print(f"accuracy = {total_matches}/{total} = {total_matches/total}, \nmean of squared diffs = {sum_diffs_sqrd}/{nodes}={sum_diffs_sqrd/nodes}\npercentage wrong = ({sum_diffs_sqrd}/{2})/({nodes}/{3})={(sum_diffs_sqrd/2)/(nodes/3)}")
display_tree_pairs(state_pred_pair_tree_vis)


accuracy = 0/12462 = 0.0, 
mean of squared diffs = 261250.0/972036=0.268765765876984
percentage wrong = (261250.0/2)/(972036/3)=0.40314864881547596
12462


object.__init__() takes exactly one argument (the instance to initialize)
This is deprecated in traitlets 4.2.This error will be raised in a future release of traitlets.
  super().__init__(**kwargs)


HBox(children=(Button(description='<<', style=ButtonStyle()), Button(description='>>', style=ButtonStyle()), I…

HBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x01\xfa\x00\x00\x01[\x08\x02\x00\x00\…