# Tutorial on Particle Smoothing Variational Objectives
## Setup

In [1]:
import tensorflow as tf
import tensorflow_probability as tfp
import os
import numpy as np

np.warnings.filterwarnings('ignore')          # to avoid np deprecation warning
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"      # to avoid lots of log about the device
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'   # hack to avoid OS bug...

In [2]:
cwd = '../SMC_dev/'
os.chdir(cwd)

In [3]:
from runner import main

In [4]:
print("the code is written in:")
print("\t tensorflow version: 1.12.0")
print("\t tensorflow_probability version: 0.5.0")

print("the system uses:")
print("\t tensorflow version:", tf.__version__)
print("\t tensorflow_probability version:", tfp.__version__)

the code is written in:
	 tensorflow version: 1.12.0
	 tensorflow_probability version: 0.5.0
the system uses:
	 tensorflow version: 1.12.0
	 tensorflow_probability version: 0.5.0


## Flags

### Data

In [5]:
# True: generate data set from simulation
# False: read data set from the file
generateTrainingData = False

datadir = '../data/fhn/[1,0]_obs_cov_0.01/'
datadict = 'datadict'

# Was the data pickled in Python2?
isPython2 = False

### Model Specification

In [6]:
Dx = 2  # dimension of hidden states
Dy = 1  # dimension of observations
poisson_emission = False  # emission probability is Poisson or Gaussian

### Training Hyperparameters

In [7]:
n_particles = 32
batch_size = 1
lr = 2e-4
epoch = 300
seed = 0

#### Optional Training Hyperparameters

In [8]:
# only will be used when generating data from simulation
# will be overwritten if loading data from the file
time = 200
n_train = 200 * batch_size
n_test = 40 * batch_size

In [9]:
# stop training early if validation set does not improve
early_stop_patience = 200

# reduce learning rate when testing loss doesn't improve for some time
lr_reduce_patience = 30

# the factor to reduce lr, new_lr = old_lr * lr_reduce_factor
lr_reduce_factor = 1 / np.sqrt(2)

# minimum lr
min_lr = lr / 10

### Networks

In [10]:
# Feed-Forward Network (FFN), number of units in each hidden layer
# For example, [64, 64] means 2 hidden layers, 64 neurons in each hidden layer
q0_layers = [64]        # proposal initial term q(x_1|y_1) or q(x_1|y_1:T)
q1_layers = [64]        # proposal evolution term q(x_t|x_{t-1}), including backward evolution term q(x_{t-1}|x_t)
q2_layers = [64]        # proposal encoding term q(x_t|y_t) or q(x_t|y_1:T)
f_layers = [64]         # target evolution
g_layers = [64]         # target emission

In [11]:
# Covariance Terms
q0_sigma_init, q0_sigma_min = 5, 1
q1_sigma_init, q1_sigma_min = 5, 1
q2_sigma_init, q2_sigma_min = 5, 1
f_sigma_init, f_sigma_min = 5, 1
g_sigma_init, g_sigma_min = 5, 1

In [12]:
# if q, f and g networks also output covariance (sigma)
output_cov = False

# if networks also output covariance, whether they only output diagonal value of cov matrix
diag_cov = False

In [13]:
# bidirectional RNN, number of units in each LSTM cells
# For example, [32, 32] means a bRNN composed of 2 LSTM cells, 32 units in each cell
y_smoother_Dhs = [32]
X0_smoother_Dhs = [32]

In [14]:
# whether use tf.contrib.rnn.stack_bidirectional_dynamic_rnn or tf.nn.bidirectional_dynamic_rnn for bRNN
# check https://stackoverflow.com/a/50552539 for differences between them
use_stack_rnn = True

# whether use a separate RNN for getting X0
X0_use_separate_RNN = True

### State Space Model Parameters

In [15]:
# whether q1 (evolution term in proposal) and f share the same network
# (Even if use_2_q == True, f and q1 can still use different networks)
use_bootstrap = True

# should q use true_X to sample particles? (useful for debugging)
q_uses_true_X = False

# Does proposal uses two networks q1(x_t|x_t-1) and q2(x_t|y_t)
# if True, q_uses_true_X will be overwritten as False
use_2_q = True

### Parameters for Inference Schemes

In [16]:
# Choose one of the following objectives
PSVO = True      # Particle Smoothing Variational Objective (use Forward Filtering Backward Simulation)
SVO = False      # Smoothing Variational Objective (use proposal based on bRNN)
AESMC = False    # Auto-Encoding Sequential Monte Carlo
IWAE = False     # Importance Weighted Auto-Encoder

#### Optional Parameters for Inference Schemes

In [17]:
# ----------------------- FFBSimulation flags----------------------- #
# number of subparticles sampled when augmenting the trajectory backwards
n_particles_for_BSim_proposal = 16

# whether Backward Simulation proposal use unidirectional RNN or bidirectional RNN (bRNN)
BSim_use_single_RNN = False

### Logging and Data Saving

In [18]:
# frequency to evaluate testing loss & other metrics and save results
print_freq = 1

# whether to save the followings during training
#   hidden trajectories
#   k-step y-hat
save_trajectory = True
save_y_hat = False

# dir to save all results
rslt_dir_name = "FHN"

# number of steps to predict y-hat and calculate R_square
MSE_steps = 30

# lattice shape [# of rows, # of columns] to draw arrows in quiver plot
lattice_shape = [25, 25]

# number of testing data used to save hidden trajectories, y-hat, gradient and etc
# will be clipped by number of testing data
saving_num = 30

# whether to save tensorboard
save_tensorboard = False

# whether to save model
save_model = False

## Pack Flags into ```tf.flags```

In [19]:
q0_layers = ",".join([str(x) for x in q0_layers])
q1_layers = ",".join([str(x) for x in q1_layers])
q2_layers = ",".join([str(x) for x in q2_layers])
f_layers = ",".join([str(x) for x in f_layers])
g_layers = ",".join([str(x) for x in g_layers])
y_smoother_Dhs = ",".join([str(x) for x in y_smoother_Dhs])
X0_smoother_Dhs = ",".join([str(x) for x in X0_smoother_Dhs])
lattice_shape = ",".join([str(x) for x in lattice_shape])

flags = tf.app.flags

# --------------------- Training Hyperparameters --------------------- #

flags.DEFINE_integer("Dx", Dx, "dimension of hidden states")
flags.DEFINE_integer("Dy", Dy, "dimension of observations")

flags.DEFINE_integer("n_particles", n_particles, "number of particles")
flags.DEFINE_integer("batch_size", batch_size, "batch size")
flags.DEFINE_float("lr", lr, "learning rate")
flags.DEFINE_integer("epoch", epoch, "number of epoch")

flags.DEFINE_integer("seed", seed, "random seed for np.random and tf")


# ------------------------------- Data ------------------------------- #

flags.DEFINE_boolean("generateTrainingData", generateTrainingData, "True: generate data set from simulation; "
                                                                   "False: read data set from the file")
flags.DEFINE_string("datadir", datadir, "path of the data set file relative to the repository directory")
flags.DEFINE_string("datadict", datadict, "name of the data set file")
flags.DEFINE_boolean("isPython2", isPython2, "Was the data pickled in python 2?")


flags.DEFINE_integer("time", time, "number of timesteps for simulated data")
flags.DEFINE_integer("n_train", n_train, "number of trajactories for traning set")
flags.DEFINE_integer("n_test", n_test, "number of trajactories for testing set")


# ------------------------ Networks parameters ----------------------- #
# Feed-Forward Network (FFN) architectures
flags.DEFINE_string("q0_layers", q0_layers, "architecture for q0 network, int seperated by comma, "
                                            "for example: '50,50' ")
flags.DEFINE_string("q1_layers", q1_layers, "architecture for q1 network, int seperated by comma, "
                                            "for example: '50,50' ")
flags.DEFINE_string("q2_layers", q2_layers, "architecture for q2 network, int seperated by comma, "
                                            "for example: '50,50' ")
flags.DEFINE_string("f_layers",  f_layers,  "architecture for f network, int seperated by comma, "
                                            "for example: '50,50' ")
flags.DEFINE_string("g_layers",  g_layers,  "architecture for g network, int seperated by comma, "
                                            "for example: '50,50' ")

flags.DEFINE_float("q0_sigma_init", q0_sigma_init, "initial value of q0_sigma")
flags.DEFINE_float("q1_sigma_init", q1_sigma_init, "initial value of q1_sigma")
flags.DEFINE_float("q2_sigma_init", q2_sigma_init, "initial value of q2_sigma")
flags.DEFINE_float("f_sigma_init",  f_sigma_init,  "initial value of f_sigma")
flags.DEFINE_float("g_sigma_init",  g_sigma_init,  "initial value of g_sigma")

flags.DEFINE_float("q0_sigma_min", q0_sigma_min, "minimal value of q0_sigma")
flags.DEFINE_float("q1_sigma_min", q1_sigma_min, "minimal value of q1_sigma")
flags.DEFINE_float("q2_sigma_min", q2_sigma_min, "minimal value of q2_sigma")
flags.DEFINE_float("f_sigma_min",  f_sigma_min,  "minimal value of f_sigma")
flags.DEFINE_float("g_sigma_min",  g_sigma_min,  "minimal value of g_sigma")

flags.DEFINE_boolean("output_cov", output_cov, "whether q, f and g networks also output covariance (sigma)")
flags.DEFINE_boolean("diag_cov", diag_cov, "whether the networks only output diagonal value of cov matrix")

# bidirectional RNN
flags.DEFINE_string("y_smoother_Dhs", y_smoother_Dhs, "number of units for y_smoother birdectional RNNs, "
                                                      "int seperated by comma")
flags.DEFINE_string("X0_smoother_Dhs", X0_smoother_Dhs, "number of units for X0_smoother birdectional RNNs, "
                                                        "int seperated by comma")
flags.DEFINE_boolean("X0_use_separate_RNN", X0_use_separate_RNN, "whether use a separate RNN for getting X0")
flags.DEFINE_boolean("use_stack_rnn", use_stack_rnn, "whether use tf.contrib.rnn.stack_bidirectional_dynamic_rnn "
                                                     "or tf.nn.bidirectional_dynamic_rnn")

# ------------------------ State Space Model ------------------------- #
flags.DEFINE_boolean("use_bootstrap", use_bootstrap, "whether q1 and f share the same network, "
                                                     "(ATTENTION: even if use_2_q == True, "
                                                     "f and q1 can still use different networks)")
flags.DEFINE_boolean("q_uses_true_X", q_uses_true_X, "whether q1 uses true hidden states to sample")
flags.DEFINE_boolean("use_2_q", use_2_q, "whether q uses two networks q1(x_t|x_t-1) and q2(x_t|y_t), "
                                         "if True, q_uses_true_X will be overwritten as False")
flags.DEFINE_boolean("poisson_emission", poisson_emission, "whether emission uses Poisson distribution")

# ------------------------- Inference Schemes ------------------------ #

flags.DEFINE_boolean("PSVO", PSVO, "Particle Smoothing Variational Objective (use Forward Filtering Backward Simulation)")
flags.DEFINE_boolean("SVO", SVO, "Smoothing Variational Objective (use proposal based on bRNN)")
flags.DEFINE_boolean("AESMC", AESMC, "Auto-Encoding Sequential Monte Carlo")
flags.DEFINE_boolean("IWAE", IWAE, "Importance Weighted Auto-Encoder")

flags.DEFINE_integer("n_particles_for_BSim_proposal", n_particles_for_BSim_proposal, "number of particles used for"
                                                                                     " each trajectory in "
                                                                                     "backward simulation proposal")
flags.DEFINE_boolean("BSim_use_single_RNN", BSim_use_single_RNN, "whether Backward Simulation proposal "
                                                                 "use unidirectional RNN or bidirectional RNN")

# ----------------------------- Training ----------------------------- #

flags.DEFINE_integer("early_stop_patience", early_stop_patience,
                     "stop training early if validation set does not improve for certain epochs")

flags.DEFINE_integer("lr_reduce_patience", lr_reduce_patience,
                     "educe learning rate when testing loss doesn't improve for some time")
flags.DEFINE_float("lr_reduce_factor", lr_reduce_factor,
                   "the factor to reduce learning rate, new_lr = old_lr * lr_reduce_factor")
flags.DEFINE_float("min_lr", min_lr, "minimum learning rate")

# --------------------- printing and data saving params --------------------- #

flags.DEFINE_integer("print_freq", print_freq, "frequency to evaluate testing loss & other metrics and save results")

flags.DEFINE_boolean("save_trajectory", save_trajectory, "whether to save hidden trajectories during training")
flags.DEFINE_boolean("save_y_hat", save_y_hat, "whether to save k-step y-hat during training")

flags.DEFINE_string("rslt_dir_name", rslt_dir_name, "dir to save all results")
flags.DEFINE_integer("MSE_steps", MSE_steps, "number of steps to predict y-hat and calculate R_square")

flags.DEFINE_string("lattice_shape", lattice_shape, "lattice shape [# of rows, # of columns] "
                                                    "to draw arrows in quiver plot")

flags.DEFINE_integer("saving_num", saving_num, "number of testing data used to "
                                               "save hidden trajectories, y-hat, gradient and etc, "
                                               "will be clipped by number of testing data")

flags.DEFINE_boolean("save_tensorboard", save_tensorboard, "whether to save tensorboard")
flags.DEFINE_boolean("save_model", save_model, "whether to save model")

FLAGS = flags.FLAGS

In [20]:
tf.app.run()

finished preparing dataset
Experiment_params:
	AESMC: False
	BSim_use_single_RNN: False
	Dx: 2
	Dy: 1
	IWAE: False
	MSE_steps: 30
	PSVO: True
	SVO: False
	X0_smoother_Dhs: 32
	X0_use_separate_RNN: True
	batch_size: 1
	datadict: datadict
	datadir: ../data/fhn/[1,0]_obs_cov_0.01/
	diag_cov: False
	early_stop_patience: 200
	epoch: 300
	f_layers: 64
	f_sigma_init: 5.0
	f_sigma_min: 1.0
	g_layers: 64
	g_sigma_init: 5.0
	g_sigma_min: 1.0
	generateTrainingData: False
	isPython2: False
	lattice_shape: 25,25
	lr: 0.0002
	lr_reduce_factor: 0.7071067811865475
	lr_reduce_patience: 30
	min_lr: 2e-05
	n_particles: 32
	n_particles_for_BSim_proposal: 16
	n_test: 40
	n_train: 200
	output_cov: False
	poisson_emission: False
	print_freq: 1
	q0_layers: 64
	q0_sigma_init: 5.0
	q0_sigma_min: 1.0
	q1_layers: 64
	q1_sigma_init: 5.0
	q1_sigma_min: 1.0
	q2_layers: 64
	q2_sigma_init: 5.0
	q2_sigma_min: 1.0
	q_uses_true_X: False
	rslt_dir_name: FHN
	save_model: False
	save_tensorboard: False
	save_trajectory: Tru

epoch 7    took 255.844 seconds

iter 8
Train log_ZSMC: -1501.723, valid log_ZSMC: -1501.850
Train, Valid k-step Rsq:
 [-2.89366736e-03 -8.99893732e-05 -3.97500893e-04 -5.22809040e-04
 -4.77879004e-04 -6.03747407e-04 -7.36115423e-04 -8.63709090e-04
 -1.01634251e-03 -1.19371335e-03 -1.39289007e-03 -1.60777842e-03
 -1.84054816e-03 -2.07949705e-03 -2.32226678e-03 -2.56926677e-03
 -2.81282478e-03 -3.05030061e-03 -3.27362747e-03 -3.48735481e-03
 -3.67723747e-03 -3.84133278e-03 -3.97519687e-03 -4.07357011e-03
 -4.11604965e-03 -4.10575458e-03 -4.04322612e-03 -3.93150699e-03
 -3.76152462e-03 -3.56409588e-03 -3.33312321e-03] 
 [-0.00237631 -0.00011637 -0.00013982 -0.00011454 -0.00011588 -0.00017971
 -0.00025499 -0.00034995 -0.00047209 -0.00062765 -0.00081057 -0.00102029
 -0.00125891 -0.00153031 -0.00182338 -0.002124   -0.00243353 -0.00273043
 -0.00301837 -0.00327723 -0.00349073 -0.00364639 -0.00374755 -0.0037797
 -0.00376452 -0.00368942 -0.00356608 -0.00337773 -0.00313227 -0.0028768
 -0.0025806

epoch 15   took 257.233 seconds

iter 16
Train log_ZSMC: -1461.818, valid log_ZSMC: -1461.875
Train, Valid k-step Rsq:
 [-1.08325931e-03 -4.13069368e-04 -9.39538062e-05  3.78785268e-05
 -7.26834570e-06 -1.73589727e-05 -3.61734510e-05 -6.83177063e-05
 -1.16581048e-04 -1.81359613e-04 -2.63392068e-04 -3.60922651e-04
 -4.75185725e-04 -6.00103226e-04 -7.33637801e-04 -8.75420765e-04
 -1.02030318e-03 -1.16577520e-03 -1.30609300e-03 -1.44315731e-03
 -1.56713928e-03 -1.67577724e-03 -1.76544978e-03 -1.83182301e-03
 -1.86082966e-03 -1.85427671e-03 -1.81239460e-03 -1.73768148e-03
 -1.62497944e-03 -1.49574335e-03 -1.34714959e-03] 
 [-1.22564450e-03 -2.80345542e-04 -2.52963781e-04 -1.05066896e-04
 -8.42258218e-05 -5.55519813e-05 -2.65821325e-05 -5.74213820e-06
 -4.14300528e-07 -1.57668933e-05 -5.45966504e-05 -1.17968626e-04
 -2.07441583e-04 -3.25339610e-04 -4.67233051e-04 -6.24951365e-04
 -7.97579013e-04 -9.71031388e-04 -1.14544529e-03 -1.30683673e-03
 -1.44265451e-03 -1.54321249e-03 -1.60916911e-03

epoch 23   took 256.665 seconds

iter 24
Train log_ZSMC: -1419.332, valid log_ZSMC: -1417.838
Train, Valid k-step Rsq:
 [-1.21434592e-03 -8.82003325e-04 -2.69224677e-04 -1.19608742e-05
 -1.63345091e-04 -1.75710003e-04 -2.34996730e-04 -3.08904068e-04
 -4.04162912e-04 -5.18860449e-04 -6.52470824e-04 -8.01725187e-04
 -9.68284096e-04 -1.14354325e-03 -1.32531960e-03 -1.51360770e-03
 -1.70212311e-03 -1.88830162e-03 -2.06537797e-03 -2.23640786e-03
 -2.38961409e-03 -2.52285842e-03 -2.63215567e-03 -2.71272756e-03
 -2.74773567e-03 -2.73957940e-03 -2.68858499e-03 -2.59751987e-03
 -2.45947204e-03 -2.30007380e-03 -2.11505878e-03] 
 [-1.44153390e-03 -5.30650077e-04 -5.17343714e-04  2.16703531e-04
 -3.20223248e-05 -4.41792828e-06 -1.64227870e-05 -4.81448893e-05
 -9.90408618e-05 -1.76347610e-04 -2.78823220e-04 -4.06836279e-04
 -5.62189743e-04 -7.47827975e-04 -9.56434238e-04 -1.17723351e-03
 -1.41028927e-03 -1.63822870e-03 -1.86274198e-03 -2.06715806e-03
 -2.23727066e-03 -2.36212250e-03 -2.44361920e-03

epoch 32   took 253.687 seconds

iter 33
Train log_ZSMC: -1365.418, valid log_ZSMC: -1364.786
Train, Valid k-step Rsq:
 [ 0.00076815 -0.00934656 -0.00228258 -0.00732654 -0.00625411 -0.00644296
 -0.00701895 -0.00727458 -0.00775042 -0.00824204 -0.00874263 -0.00927956
 -0.0098275  -0.0103712  -0.01090521 -0.01143081 -0.01193473 -0.01241406
 -0.01285439 -0.01326796 -0.0136286  -0.01393598 -0.01418342 -0.01436409
 -0.01444015 -0.01441811 -0.01430032 -0.0140908  -0.01376989 -0.01339279
 -0.01294412] 
 [ 0.00317701 -0.00785078 -0.0006808  -0.00591973 -0.00457159 -0.00471802
 -0.00530371 -0.00557191 -0.00606997 -0.00660739 -0.00716036 -0.00776616
 -0.00839923 -0.00908157 -0.00977898 -0.01046147 -0.01113631 -0.01176338
 -0.01235519 -0.01287431 -0.01329611 -0.01359924 -0.01379481 -0.01385783
 -0.0138285  -0.01368329 -0.01344339 -0.01307353 -0.01258275 -0.01205976
 -0.01143936]
best valid cost on iter: 33

epoch 33   took 254.768 seconds

iter 34
Train log_ZSMC: -1357.821, valid log_ZSMC: -1357.6

KeyboardInterrupt: 