In [1]:
import tensorflow as tf
import tensorflow_probability as tfp
import tensorflow.math as tm
import numpy as np
import time
import matplotlib.pyplot as plt

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import initializers
from tensorflow.keras import Model
from tensorflow.keras import models
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.metrics import accuracy_score
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

In [2]:
def convert2_zero_one(x):
    
    t = [tf.math.sigmoid(i) for i in x]    
    return t

def cont_bern_log_norm(lam, l_lim=0.49, u_lim=0.51):
    '''
    computes the log normalizing constant of a continuous Bernoulli distribution in a numerically stable way.
    returns the log normalizing constant for lam in (0, l_lim) U (u_lim, 1) and a Taylor approximation in
    [l_lim, u_lim].
    cut_y below might appear useless, but it is important to not evaluate log_norm near 0.5 as tf.where evaluates
    both options, regardless of the value of the condition.
    '''
    
    cut_lam = tf.where(tm.logical_or(tm.less(lam, l_lim), tm.greater(lam, u_lim)), lam, l_lim * tf.ones_like(lam))
    log_norm = tm.log(tm.abs(2.0 * tm.atanh(1 - 2.0 * cut_lam))) - tm.log(tm.abs(1 - 2.0 * cut_lam))
    taylor = tm.log(2.0) + 4.0 / 3.0 * tm.pow(lam - 0.5, 2) + 104.0 / 45.0 * tm.pow(lam - 0.5, 4)
    return tf.where(tm.logical_or(tm.less(lam, l_lim), tm.greater(lam, u_lim)), log_norm, taylor)

In [3]:
class StochasticMLP(Model):
    
    def __init__(self, hidden_layer_sizes=[100], n_outputs=10, lr=1e-3):
        super(StochasticMLP, self).__init__()
        self.hidden_layer_sizes = hidden_layer_sizes
        self.fc_layers = [Dense(layer_size) for layer_size in hidden_layer_sizes]
        self.output_layer = Dense(n_outputs)
        self.optimizer = tf.keras.optimizers.Adam(learning_rate = lr)
        
    def call(self, x):
        
        network = []
        
        for i, layer in enumerate(self.fc_layers):
            
            logits = layer(x)
            x = tfp.distributions.Bernoulli(logits=logits).sample()
            network.append(x)

        final_logits = self.output_layer(x) # initial the weight of output layer
            
        return network
    
    def target_log_prob(self, x, h, y, is_gibbs = False, is_hmc = False):
        
        # get current state
        if is_hmc:
            h_current = tf.split(h, self.hidden_layer_sizes, axis = 1)
        else:    
            h_current = [tf.cast(h_i, dtype=tf.float32) for h_i in h]
        h_current = convert2_zero_one(h_current)
        h_previous = [x] + h_current[:-1]
    
        nlog_prob = 0. # negative log probability
        
        for i, (cv, pv, layer) in enumerate(zip(h_current, h_previous, self.fc_layers)):
            
            logits = layer(pv)
            ce = tf.nn.sigmoid_cross_entropy_with_logits(labels = cv, logits = logits)
            if not is_gibbs:
                ce += cont_bern_log_norm(tf.nn.sigmoid(logits))
            
            nlog_prob += tf.reduce_sum(ce, axis = -1)
        
        fce = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.cast(y, tf.float32), logits=self.output_layer(h_current[-1]))
        nlog_prob += tf.reduce_sum(fce, axis = -1)
            
        return -1 * nlog_prob
    
    def gibbs_new_state(self, x, h, y):
        
        '''
            generate a new state for the network node by node in Gibbs setting.
        '''
        
        h_current = h
        h_current = [tf.cast(h_i, dtype=tf.float32) for h_i in h_current]
        
        in_layers = self.fc_layers
        out_layers = self.fc_layers[1:] + [self.output_layer]
        
        prev_vals = [x] + h_current[:-1]
        curr_vals = h_current
        next_vals = h_current[1:] + [y]
        
        for i, (in_layer, out_layer, pv, cv, nv) in enumerate(zip(in_layers, out_layers, prev_vals, curr_vals, next_vals)):

            # node by node
            
            nodes = tf.transpose(cv)
            prob_parents = tm.sigmoid(in_layer(pv))
            
            out_layer_weights = out_layer.get_weights()[0]
            
            next_logits = out_layer(cv)
            
            new_layer = []
            
            for j, node in enumerate(nodes):
                
                # get info for current node (i, j)
                
                prob_parents_j = prob_parents[:, j]
                out_layer_weights_j = out_layer_weights[j]
                
                # calculate logits and logprob for node is 0 or 1
                next_logits_if_node_0 = next_logits[:, :] - node[:, None] * out_layer_weights_j[None, :]
                next_logits_if_node_1 = next_logits[:, :] + (1 - node[:, None]) * out_layer_weights_j[None, :]
                
                #print(next_logits_if_node_0, next_logits_if_node_1)
                
                logprob_children_if_node_0 = -1 * tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=tf.cast(nv, dtype = tf.float32), logits=next_logits_if_node_0), axis = -1)
                
                logprob_children_if_node_1 = -1 * tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=tf.cast(nv, dtype = tf.float32), logits=next_logits_if_node_1), axis = -1)
                
                # calculate prob for node (i, j)
                prob_0 = (1 - prob_parents_j) * tm.exp(logprob_children_if_node_0)
                prob_1 = prob_parents_j * tm.exp(logprob_children_if_node_1)
                prob_j = prob_1 / (prob_1 + prob_0)
            
                # sample new state with prob_j for node (i, j)
                new_node = tfp.distributions.Bernoulli(probs = prob_j).sample() # MAY BE SLOW
                
                # update nodes and logits for following calculation
                new_node_casted = tf.cast(new_node, dtype = "float32")
                next_logits = next_logits_if_node_0 * (1 - new_node_casted)[:, None] \
                            + next_logits_if_node_1 * new_node_casted[:, None] 
                
                # keep track of new node values (in prev/curr/next_vals and h_new)
                new_layer.append(new_node)
           
            new_layer = tf.transpose(new_layer)
            h_current[i] = new_layer
            prev_vals = [x] + h_current[:-1]
            curr_vals = h_current
            next_vals = h_current[1:] + [y]
        
        return h_current
    
    def generate_hmc_kernel(self, x, y, step_size = pow(1000, -1/4)):
        
        adaptive_hmc = tfp.mcmc.SimpleStepSizeAdaptation(tfp.mcmc.HamiltonianMonteCarlo(
            target_log_prob_fn = lambda v: self.target_log_prob(x, v, y, is_hmc = True),
            num_leapfrog_steps = 2,
            step_size = step_size),
            num_adaptation_steps=int(100 * 0.8))
        
        return adaptive_hmc
    
    # new proposing-state method with HamiltonianMonteCarlo
    def propose_new_state_hamiltonian(self, x, h, y, hmc_kernel, is_update_kernel = True):
    
        h_current = h
        h_current = [tf.cast(h_i, dtype=tf.float32) for h_i in h_current]
        h_current = tf.concat(h_current, axis = 1)

        # run the chain (with burn-in)
        num_burnin_steps = 0
        num_results = 1

        samples = tfp.mcmc.sample_chain(
            num_results = num_results,
            num_burnin_steps = num_burnin_steps,
            current_state = h_current, # may need to be reshaped
            kernel = hmc_kernel,
            trace_fn = None,
            return_final_kernel_results = True)
    
        # Generate new states of chains
        #h_state = rerange(samples[0][0])
        h_state = samples[0][0]
        h_new = tf.split(h_state, self.hidden_layer_sizes, axis = 1) 
        
        # Update the kernel if necesssary
        if is_update_kernel:
            new_step_size = samples[2].new_step_size.numpy()
            ker_new = self.generate_hmc_kernel(x, y, new_step_size)
            return(h_new, ker_new)
        else:
            return h_new
    
    def update_weights(self, x, h, y, is_gibbs = False):
        
        with tf.GradientTape() as tape:
            loss = -1 * tf.reduce_mean(self.target_log_prob(x, h, y, is_gibbs = is_gibbs))
        
        grads = tape.gradient(loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
    
    def get_predictions(self, x):

        logits = 0.0
        for layer in self.fc_layers:
            logits = layer(x)
            x = tm.sigmoid(logits)
        
        logits = self.output_layer(x)
        probs = tm.sigmoid(logits)
        labels = tf.cast(tm.greater(probs, 0.5), tf.int32)

        return labels
    
    def get_loss(self, x, y):
        
        logits = 0.0
        for layer in self.fc_layers:
            logits = layer(x)
            x = tm.sigmoid(logits)
            
        logits = self.output_layer(x)
        loss = tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.cast(y, tf.float32), logits = logits)
        
        return tf.reduce_sum(loss, axis = -1)

In [4]:
def standard_backprop(size, dat_train, dat_val, epochs):
    '''
    Standard Backpropogation training
    '''
    
    batch_size = 4
    
    print("Start Standard Backprop")
    model = keras.Sequential(
        [
            layers.InputLayer(input_shape=(2,)),
            layers.Dense(size, activation = "sigmoid"),
            layers.Dense(1, activation = "sigmoid")
        ]
    )   
    opt = tf.keras.optimizers.Adam(learning_rate=0.01)
    st = time.time()
    model.compile(loss = "binary_crossentropy", optimizer = opt, metrics = ["accuracy"])
    history = model.fit(dat_train, batch_size = batch_size, epochs = epochs, validation_data = dat_val)
    train_time = time.time() - st
    
    return train_time, history

In [5]:
def hmc(size, dat_train, dat_val, epochs, burnin = 500):
    '''
    HMC training
    '''
    # Setting
    # Get train labels and val labels
    target_train = np.concatenate([target for data, target in dat_train.as_numpy_iterator()])
    target_val = np.concatenate([target for data, target in dat_val.as_numpy_iterator()])
    
    print("Start HMC")
    model = StochasticMLP(hidden_layer_sizes = [size], n_outputs = 1, lr = 0.01)
    network = [model.call(data) for data, target in dat_train]
    kernels = [model.generate_hmc_kernel(data, target) for data, target in dat_train]  
    
    # Burnin
    print("Start HMC Burning")
    burnin_losses = []
    for i in range(burnin):
        
        if(i % 100 == 0): print("Step %d" % i)

        res = []
        burnin_loss = 0.0
        for bs, (data, target) in enumerate(dat_train):
            res.append(model.propose_new_state_hamiltonian(data, network[bs], target, kernels[bs]))
            burnin_loss += -1 * tf.reduce_mean(model.target_log_prob(data, network[bs], target))
    
        network, kernels = zip(*res)
        burnin_losses.append(burnin_loss / (bs + 1))
    
    # Training
    print("Start HMC Training")
    train_losses = []
    train_accs = []
    val_losses = []
    val_accs = []
    start_time = time.time()
    
    for epoch in range(epochs):
        
        # train
        for bs, (data, target) in enumerate(dat_train):
        
            model.update_weights(data, network[bs], target)
            network = [model.propose_new_state_hamiltonian(x, net, y, ker, is_update_kernel = False) \
                       for (x, y), net, ker in zip(dat_train, network, kernels)]
            
        train_loss = 0.0
        for data, target in dat_train:
            train_loss += tf.reduce_mean(model.get_loss(data, target))
        train_loss /= (bs + 1)
        train_losses.append(train_loss)       
        
        train_preds = [model.get_predictions(data) for data, target in dat_train]
        train_acc = accuracy_score(np.concatenate(train_preds), target_train)
        train_accs.append(train_acc)        
        
        # validate
        
        val_loss = 0.0
        for bs, (data, target) in enumerate(dat_val):
            val_loss += tf.reduce_mean(model.get_loss(data, target))
        val_loss /= (bs + 1)
        val_losses.append(val_loss)  
        
        val_preds = [model.get_predictions(data) for data, target in dat_val]
        val_acc = accuracy_score(np.concatenate(val_preds), target_val)
        val_accs.append(val_acc)
        
        print("Epoch %d/%d: - %.4fs/step - train_loss: %.4f - train_acc: %.4f - val_loss: %.4f - val_acc: %.4f" 
            % (epoch + 1, epochs, (time.time() - start_time) / (epoch + 1), train_loss, train_acc, val_loss, val_acc))

    train_time = time.time() - start_time
    return burnin_losses, train_time, {"train_acc": train_accs, "train_loss": train_losses,
                             "val_acc": val_accs, "val_loss": val_losses}

In [6]:
def gibbs(size, dat_train, dat_val, epochs, burnin = 500):
    '''
    Gibbs Training
    '''
    # Setting
    # Get train labels and val labels
    target_train = np.concatenate([target for data, target in dat_train.as_numpy_iterator()])
    target_val = np.concatenate([target for data, target in dat_val.as_numpy_iterator()])
    
    print("Start Gibbs")
    model = StochasticMLP(hidden_layer_sizes = [size], n_outputs=1, lr = 0.01)
    network = [model.call(data) for data, target in dat_train]
    
    # Burnin
    print("Start Gibbs Burning")    
    burnin_losses = []
    for i in range(burnin):
    
        if(i % 100 == 0): print("Step %d" % i)

        res = []
        burnin_loss = 0.0
        for bs, (data, target) in enumerate(dat_train):
            res.append(model.gibbs_new_state(data, network[bs], target))
            burnin_loss += -1 * tf.reduce_mean(model.target_log_prob(data, network[bs], target, is_gibbs = True))
            
        network = res
        burnin_losses.append(burnin_loss / (bs + 1))
    
    # Training
    train_losses = []
    train_accs = []
    val_losses = []
    val_accs = []
    start_time = time.time()
    
    for epoch in range(epochs):
        
        # train
        for bs, (data, target) in enumerate(dat_train):
        
            model.update_weights(data, network[bs], target, is_gibbs = True)
            network = [model.gibbs_new_state(x, net, y) for (x, y), net in zip(dat_train, network)]
            
        train_loss = 0.0
        for data, target in dat_train:
            train_loss += tf.reduce_mean(model.get_loss(data, target))
        train_loss /= (bs + 1)
        train_losses.append(train_loss)       
        
        train_preds = [model.get_predictions(data) for data, target in dat_train]
        train_acc = accuracy_score(np.concatenate(train_preds), target_train)
        train_accs.append(train_acc)        
        
        # validate
        
        val_loss = 0.0
        for bs, (data, target) in enumerate(dat_val):
            val_loss += tf.reduce_mean(model.get_loss(data, target))
        val_loss /= (bs + 1)
        val_losses.append(val_loss)  
        
        val_preds = [model.get_predictions(data) for data, target in dat_val]
        val_acc = accuracy_score(np.concatenate(val_preds), target_val)
        val_accs.append(val_acc)
        
        print("Epoch %d/%d: - %.4fs/step - train_loss: %.4f - train_acc: %.4f - val_loss: %.4f - val_acc: %.4f" 
            % (epoch + 1, epochs, (time.time() - start_time) / (epoch + 1), train_loss, train_acc, val_loss, val_acc))

    train_time = time.time() - start_time
    return burnin_losses, train_time, {"train_acc": train_accs, "train_loss": train_losses,
                             "val_acc": val_accs, "val_loss": val_losses}

In [7]:
np.random.seed(1234)
X, Y = make_moons(200, noise = 0.3)

# Split into test and training data
x_train, x_val, y_train, y_val = train_test_split(X, Y, test_size = 0.2, random_state=73)
y_train = y_train.reshape(-1, 1)
y_val = y_val.reshape(-1, 1)

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(32)
val_ds = tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(32)

2022-03-07 22:58:47.580464: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
# running multiple times
N = 5
size = 32
epochs = 100
burnin = 100

res_bp, res_hmc, res_gibbs = [], [], []

for i in range(N):
    
    time_bp, history_bp = standard_backprop(size, train_ds, val_ds, epochs)
    burnin_loss_hmc, time_hmc, history_hmc = hmc(size, train_ds, val_ds, epochs, burnin)
    burnin_loss_gibbs, time_gibbs, history_gibbs = gibbs(size, train_ds, val_ds, epochs, burnin)
    
    hist_bp = {"train_acc": history_bp.history['accuracy'], "train_loss": history_bp.history['loss'], 
               "val_acc": history_bp.history['val_accuracy'], "val_loss": history_bp.history['val_loss']}
    rbp = {'time': time_bp, 'history': hist_bp}
    rhmc = {'time': time_hmc, 'burnin': burnin_loss_hmc, 'history': history_hmc}
    rgibbs = {'time': time_gibbs, 'burnin': burnin_loss_gibbs, 'history': history_gibbs}
    
    res_bp.append(rbp)
    res_hmc.append(rhmc)
    res_gibbs.append(rgibbs)

Start Standard Backprop
Epoch 1/100
1/5 [=====>........................] - ETA: 1s - loss: 0.6554 - accuracy: 0.6250

2022-03-07 22:58:49.730220: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100


Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Start HMC
Start HMC Burning
Step 0
Start HMC Training
Epoch 1/100: - 0.6617s/step - train_loss: 0.6800 - train_acc: 0.5188 - val_loss: 0.7021 - val_acc: 0.4250
Epoch 2/100: - 0.6646s/step - train_loss: 0.6701 - train_acc: 0.6687 - val_loss: 0.6991 - val_acc: 0.3750
Epoch 3/100: - 0.6679s/step - train_loss: 0.6644 - train_acc: 0.6312 - val_loss: 0.6977 - val_acc: 0.4500
Epoch 4/100: - 0.6693s/step - train_loss: 0.6581 - train_acc: 0.7188 - val_loss: 0.6922 - val_

Epoch 22/100: - 0.6659s/step - train_loss: 0.5850 - train_acc: 0.8438 - val_loss: 0.6358 - val_acc: 0.6000
Epoch 23/100: - 0.6668s/step - train_loss: 0.5826 - train_acc: 0.8500 - val_loss: 0.6335 - val_acc: 0.6000
Epoch 24/100: - 0.6670s/step - train_loss: 0.5794 - train_acc: 0.8562 - val_loss: 0.6305 - val_acc: 0.5750
Epoch 25/100: - 0.6675s/step - train_loss: 0.5763 - train_acc: 0.8500 - val_loss: 0.6277 - val_acc: 0.6250
Epoch 26/100: - 0.6676s/step - train_loss: 0.5735 - train_acc: 0.8500 - val_loss: 0.6258 - val_acc: 0.6250
Epoch 27/100: - 0.6674s/step - train_loss: 0.5701 - train_acc: 0.8562 - val_loss: 0.6240 - val_acc: 0.6000
Epoch 28/100: - 0.6674s/step - train_loss: 0.5661 - train_acc: 0.8625 - val_loss: 0.6217 - val_acc: 0.5750
Epoch 29/100: - 0.6674s/step - train_loss: 0.5626 - train_acc: 0.8562 - val_loss: 0.6196 - val_acc: 0.6000
Epoch 30/100: - 0.6676s/step - train_loss: 0.5599 - train_acc: 0.8562 - val_loss: 0.6172 - val_acc: 0.6000
Epoch 31/100: - 0.6678s/step - train_

Epoch 99/100: - 0.6664s/step - train_loss: 0.4667 - train_acc: 0.8500 - val_loss: 0.5790 - val_acc: 0.6000
Epoch 100/100: - 0.6662s/step - train_loss: 0.4656 - train_acc: 0.8562 - val_loss: 0.5777 - val_acc: 0.6000
Start Gibbs
Start Gibbs Burning
Step 0
Epoch 1/100: - 2.2941s/step - train_loss: 0.6870 - train_acc: 0.5750 - val_loss: 0.6660 - val_acc: 0.6250
Epoch 2/100: - 2.3114s/step - train_loss: 0.7013 - train_acc: 0.5188 - val_loss: 0.6688 - val_acc: 0.4250
Epoch 3/100: - 2.3061s/step - train_loss: 0.6924 - train_acc: 0.5188 - val_loss: 0.6640 - val_acc: 0.4250
Epoch 4/100: - 2.2999s/step - train_loss: 0.6828 - train_acc: 0.5750 - val_loss: 0.6660 - val_acc: 0.5750
Epoch 5/100: - 2.3009s/step - train_loss: 0.6840 - train_acc: 0.5938 - val_loss: 0.6746 - val_acc: 0.7000
Epoch 6/100: - 2.3048s/step - train_loss: 0.6812 - train_acc: 0.7000 - val_loss: 0.6716 - val_acc: 0.8000
Epoch 7/100: - 2.3067s/step - train_loss: 0.6792 - train_acc: 0.5188 - val_loss: 0.6661 - val_acc: 0.4750
Epoc

Epoch 76/100: - 2.2391s/step - train_loss: 0.6441 - train_acc: 0.8500 - val_loss: 0.6633 - val_acc: 0.6000
Epoch 77/100: - 2.2388s/step - train_loss: 0.6447 - train_acc: 0.8625 - val_loss: 0.6600 - val_acc: 0.6500
Epoch 78/100: - 2.2385s/step - train_loss: 0.6450 - train_acc: 0.8500 - val_loss: 0.6582 - val_acc: 0.6250
Epoch 79/100: - 2.2378s/step - train_loss: 0.6430 - train_acc: 0.7750 - val_loss: 0.6567 - val_acc: 0.6000
Epoch 80/100: - 2.2374s/step - train_loss: 0.6398 - train_acc: 0.7688 - val_loss: 0.6557 - val_acc: 0.6000
Epoch 81/100: - 2.2371s/step - train_loss: 0.6413 - train_acc: 0.8625 - val_loss: 0.6590 - val_acc: 0.6500
Epoch 82/100: - 2.2362s/step - train_loss: 0.6435 - train_acc: 0.8625 - val_loss: 0.6599 - val_acc: 0.6500
Epoch 83/100: - 2.2360s/step - train_loss: 0.6419 - train_acc: 0.7875 - val_loss: 0.6566 - val_acc: 0.6000
Epoch 84/100: - 2.2356s/step - train_loss: 0.6402 - train_acc: 0.8438 - val_loss: 0.6557 - val_acc: 0.6250
Epoch 85/100: - 2.2348s/step - train_

Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100


Epoch 99/100
Epoch 100/100
Start HMC
Start HMC Burning
Step 0
Start HMC Training
Epoch 1/100: - 0.6539s/step - train_loss: 0.6600 - train_acc: 0.5188 - val_loss: 0.6708 - val_acc: 0.4250
Epoch 2/100: - 0.6541s/step - train_loss: 0.6506 - train_acc: 0.8438 - val_loss: 0.6687 - val_acc: 0.6250
Epoch 3/100: - 0.6545s/step - train_loss: 0.6466 - train_acc: 0.8562 - val_loss: 0.6665 - val_acc: 0.5750
Epoch 4/100: - 0.6512s/step - train_loss: 0.6429 - train_acc: 0.7688 - val_loss: 0.6617 - val_acc: 0.6500
Epoch 5/100: - 0.6526s/step - train_loss: 0.6408 - train_acc: 0.6750 - val_loss: 0.6590 - val_acc: 0.5500
Epoch 6/100: - 0.6474s/step - train_loss: 0.6379 - train_acc: 0.7312 - val_loss: 0.6570 - val_acc: 0.5750
Epoch 7/100: - 0.6454s/step - train_loss: 0.6346 - train_acc: 0.8125 - val_loss: 0.6550 - val_acc: 0.6250
Epoch 8/100: - 0.6419s/step - train_loss: 0.6314 - train_acc: 0.8313 - val_loss: 0.6524 - val_acc: 0.6000
Epoch 9/100: - 0.6398s/step - train_loss: 0.6282 - train_acc: 0.7625 - 

Epoch 74/100: - 0.6456s/step - train_loss: 0.4956 - train_acc: 0.8750 - val_loss: 0.5692 - val_acc: 0.6250
Epoch 75/100: - 0.6464s/step - train_loss: 0.4936 - train_acc: 0.8562 - val_loss: 0.5701 - val_acc: 0.6500
Epoch 76/100: - 0.6468s/step - train_loss: 0.4906 - train_acc: 0.8562 - val_loss: 0.5686 - val_acc: 0.6500
Epoch 77/100: - 0.6478s/step - train_loss: 0.4863 - train_acc: 0.8625 - val_loss: 0.5644 - val_acc: 0.6250
Epoch 78/100: - 0.6480s/step - train_loss: 0.4828 - train_acc: 0.8688 - val_loss: 0.5610 - val_acc: 0.6500
Epoch 79/100: - 0.6481s/step - train_loss: 0.4810 - train_acc: 0.8812 - val_loss: 0.5599 - val_acc: 0.6500
Epoch 80/100: - 0.6492s/step - train_loss: 0.4796 - train_acc: 0.8750 - val_loss: 0.5602 - val_acc: 0.6250
Epoch 81/100: - 0.6497s/step - train_loss: 0.4782 - train_acc: 0.8750 - val_loss: 0.5610 - val_acc: 0.6500
Epoch 82/100: - 0.6498s/step - train_loss: 0.4777 - train_acc: 0.8688 - val_loss: 0.5620 - val_acc: 0.6250
Epoch 83/100: - 0.6500s/step - train_

Epoch 51/100: - 2.2151s/step - train_loss: 0.6646 - train_acc: 0.7438 - val_loss: 0.6727 - val_acc: 0.6000
Epoch 52/100: - 2.2146s/step - train_loss: 0.6650 - train_acc: 0.7188 - val_loss: 0.6731 - val_acc: 0.5750
Epoch 53/100: - 2.2140s/step - train_loss: 0.6640 - train_acc: 0.7750 - val_loss: 0.6732 - val_acc: 0.6250
Epoch 54/100: - 2.2134s/step - train_loss: 0.6635 - train_acc: 0.8125 - val_loss: 0.6739 - val_acc: 0.6250
Epoch 55/100: - 2.2146s/step - train_loss: 0.6641 - train_acc: 0.8500 - val_loss: 0.6755 - val_acc: 0.6250
Epoch 56/100: - 2.2168s/step - train_loss: 0.6635 - train_acc: 0.8562 - val_loss: 0.6751 - val_acc: 0.6250
Epoch 57/100: - 2.2184s/step - train_loss: 0.6630 - train_acc: 0.7625 - val_loss: 0.6735 - val_acc: 0.6500
Epoch 58/100: - 2.2177s/step - train_loss: 0.6632 - train_acc: 0.7000 - val_loss: 0.6730 - val_acc: 0.5500
Epoch 59/100: - 2.2173s/step - train_loss: 0.6624 - train_acc: 0.7812 - val_loss: 0.6732 - val_acc: 0.6500
Epoch 60/100: - 2.2167s/step - train_

Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100


Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Start HMC
Start HMC Burning
Step 0
Start HMC Training
Epoch 1/100: - 0.6400s/step - train_loss: 0.7137 - train_acc: 0.5188 - val_loss: 0.6981 - val_acc: 0.4250
Epoch 2/100: - 0.6433s/step - train_loss: 0.6996 - train_acc: 0.4813 - val_loss: 0.6985 - val_acc: 0.5750
Epoch 3/100: - 0.6434s/step - train_loss: 0.6849 - train_acc: 0.4938 - val_loss: 0.6905 - val_acc: 0.5750
Epoch 4/100: - 0.6451s/step - train_loss: 0.6711 - train_acc: 0.6750 - val_loss: 0.6791 - val_acc: 0.5500
Epoch 5/100: - 0.6453s/step - train_loss: 0.6612 - train_acc: 0.5437 - val_loss: 0.6717 - val_acc: 0.4500
Epoch 6/100: - 0.6488s/step - train_loss: 0.6528 - train_acc: 0.6125 - val_loss: 0.6667 - val_acc: 0.5250
Epoch 7/100: - 0.6491s/step - train_

Epoch 49/100: - 0.6452s/step - train_loss: 0.5164 - train_acc: 0.8438 - val_loss: 0.6055 - val_acc: 0.6000
Epoch 50/100: - 0.6467s/step - train_loss: 0.5149 - train_acc: 0.8562 - val_loss: 0.6050 - val_acc: 0.5750
Epoch 51/100: - 0.6484s/step - train_loss: 0.5137 - train_acc: 0.8562 - val_loss: 0.6051 - val_acc: 0.5750
Epoch 52/100: - 0.6493s/step - train_loss: 0.5130 - train_acc: 0.8562 - val_loss: 0.6055 - val_acc: 0.5750
Epoch 53/100: - 0.6505s/step - train_loss: 0.5124 - train_acc: 0.8500 - val_loss: 0.6059 - val_acc: 0.5750
Epoch 54/100: - 0.6505s/step - train_loss: 0.5119 - train_acc: 0.8375 - val_loss: 0.6056 - val_acc: 0.5750
Epoch 55/100: - 0.6504s/step - train_loss: 0.5109 - train_acc: 0.8438 - val_loss: 0.6043 - val_acc: 0.5750
Epoch 56/100: - 0.6500s/step - train_loss: 0.5096 - train_acc: 0.8438 - val_loss: 0.6026 - val_acc: 0.5750
Epoch 57/100: - 0.6497s/step - train_loss: 0.5086 - train_acc: 0.8562 - val_loss: 0.6017 - val_acc: 0.5750
Epoch 58/100: - 0.6496s/step - train_

Epoch 26/100: - 2.2466s/step - train_loss: 0.6667 - train_acc: 0.8500 - val_loss: 0.6762 - val_acc: 0.6250
Epoch 27/100: - 2.2465s/step - train_loss: 0.6685 - train_acc: 0.8625 - val_loss: 0.6769 - val_acc: 0.6250
Epoch 28/100: - 2.2443s/step - train_loss: 0.6705 - train_acc: 0.7125 - val_loss: 0.6758 - val_acc: 0.6000
Epoch 29/100: - 2.2420s/step - train_loss: 0.6719 - train_acc: 0.5687 - val_loss: 0.6753 - val_acc: 0.5250
Epoch 30/100: - 2.2381s/step - train_loss: 0.6720 - train_acc: 0.5750 - val_loss: 0.6749 - val_acc: 0.5250
Epoch 31/100: - 2.2341s/step - train_loss: 0.6704 - train_acc: 0.5813 - val_loss: 0.6739 - val_acc: 0.5250
Epoch 32/100: - 2.2314s/step - train_loss: 0.6684 - train_acc: 0.6937 - val_loss: 0.6734 - val_acc: 0.5750
Epoch 33/100: - 2.2299s/step - train_loss: 0.6672 - train_acc: 0.8500 - val_loss: 0.6736 - val_acc: 0.6250
Epoch 34/100: - 2.2274s/step - train_loss: 0.6671 - train_acc: 0.8625 - val_loss: 0.6746 - val_acc: 0.7000
Epoch 35/100: - 2.2253s/step - train_

Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100


Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Start HMC
Start HMC Burning
Step 0
Start HMC Training
Epoch 1/100: - 0.7138s/step - train_loss: 0.6969 - train_acc: 0.5188 - val_loss: 0.6984 - val_acc: 0.4250
Epoch 2/100: - 0.7062s/step - train_loss: 0.6901 - train_acc: 0.5188 - val_loss: 0.6937 - val_acc: 0.4250
Epoch 3/100: - 0.7097s/step - train_loss: 0.6834 - train_acc: 0.7125 - val_loss: 0.6938 - val_acc: 0.4250
Epoch 4/100: - 0.7104s/step - train_loss: 0.6790 - train_acc: 0.5750 - val_loss: 0.6951 - val_acc: 0.5750
E

Epoch 24/100: - 0.6814s/step - train_loss: 0.6314 - train_acc: 0.8250 - val_loss: 0.6642 - val_acc: 0.5750
Epoch 25/100: - 0.6810s/step - train_loss: 0.6293 - train_acc: 0.8313 - val_loss: 0.6617 - val_acc: 0.5750
Epoch 26/100: - 0.6797s/step - train_loss: 0.6264 - train_acc: 0.8313 - val_loss: 0.6592 - val_acc: 0.5750
Epoch 27/100: - 0.6786s/step - train_loss: 0.6232 - train_acc: 0.8313 - val_loss: 0.6572 - val_acc: 0.6250
Epoch 28/100: - 0.6789s/step - train_loss: 0.6195 - train_acc: 0.8313 - val_loss: 0.6550 - val_acc: 0.6250
Epoch 29/100: - 0.6785s/step - train_loss: 0.6158 - train_acc: 0.8375 - val_loss: 0.6526 - val_acc: 0.6000
Epoch 30/100: - 0.6781s/step - train_loss: 0.6126 - train_acc: 0.8000 - val_loss: 0.6505 - val_acc: 0.6000
Epoch 31/100: - 0.6778s/step - train_loss: 0.6094 - train_acc: 0.8000 - val_loss: 0.6491 - val_acc: 0.6000
Epoch 32/100: - 0.6776s/step - train_loss: 0.6065 - train_acc: 0.8187 - val_loss: 0.6482 - val_acc: 0.6000
Epoch 33/100: - 0.6772s/step - train_

Epoch 1/100: - 2.1687s/step - train_loss: 0.7818 - train_acc: 0.4000 - val_loss: 0.7125 - val_acc: 0.4250
Epoch 2/100: - 2.1880s/step - train_loss: 0.7685 - train_acc: 0.3812 - val_loss: 0.7059 - val_acc: 0.4000
Epoch 3/100: - 2.1790s/step - train_loss: 0.7541 - train_acc: 0.2625 - val_loss: 0.7046 - val_acc: 0.5000
Epoch 4/100: - 2.1683s/step - train_loss: 0.7418 - train_acc: 0.2437 - val_loss: 0.7012 - val_acc: 0.5000
Epoch 5/100: - 2.1593s/step - train_loss: 0.7298 - train_acc: 0.3187 - val_loss: 0.6932 - val_acc: 0.4250
Epoch 6/100: - 2.1537s/step - train_loss: 0.7212 - train_acc: 0.4313 - val_loss: 0.6878 - val_acc: 0.4000
Epoch 7/100: - 2.1508s/step - train_loss: 0.7122 - train_acc: 0.4688 - val_loss: 0.6857 - val_acc: 0.4250
Epoch 8/100: - 2.1485s/step - train_loss: 0.7040 - train_acc: 0.4437 - val_loss: 0.6853 - val_acc: 0.4000
Epoch 9/100: - 2.1460s/step - train_loss: 0.6966 - train_acc: 0.4688 - val_loss: 0.6846 - val_acc: 0.4250
Epoch 10/100: - 2.1444s/step - train_loss: 0.6

Epoch 78/100: - 2.4718s/step - train_loss: 0.6442 - train_acc: 0.8313 - val_loss: 0.6552 - val_acc: 0.6500
Epoch 79/100: - 2.4717s/step - train_loss: 0.6460 - train_acc: 0.8688 - val_loss: 0.6577 - val_acc: 0.6750
Epoch 80/100: - 2.4719s/step - train_loss: 0.6455 - train_acc: 0.8812 - val_loss: 0.6578 - val_acc: 0.6750
Epoch 81/100: - 2.4725s/step - train_loss: 0.6453 - train_acc: 0.8500 - val_loss: 0.6562 - val_acc: 0.6500
Epoch 82/100: - 2.4738s/step - train_loss: 0.6444 - train_acc: 0.7125 - val_loss: 0.6550 - val_acc: 0.5750
Epoch 83/100: - 2.4746s/step - train_loss: 0.6441 - train_acc: 0.8500 - val_loss: 0.6568 - val_acc: 0.6250
Epoch 84/100: - 2.4767s/step - train_loss: 0.6418 - train_acc: 0.8688 - val_loss: 0.6581 - val_acc: 0.6500
Epoch 85/100: - 2.4779s/step - train_loss: 0.6372 - train_acc: 0.8688 - val_loss: 0.6548 - val_acc: 0.6500
Epoch 86/100: - 2.4792s/step - train_loss: 0.6365 - train_acc: 0.7812 - val_loss: 0.6526 - val_acc: 0.6000
Epoch 87/100: - 2.4808s/step - train_

Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


Start HMC
Start HMC Burning
Step 0
Start HMC Training
Epoch 1/100: - 0.7091s/step - train_loss: 0.6791 - train_acc: 0.5188 - val_loss: 0.6829 - val_acc: 0.4250
Epoch 2/100: - 0.7099s/step - train_loss: 0.6704 - train_acc: 0.8000 - val_loss: 0.6825 - val_acc: 0.6000
Epoch 3/100: - 0.7349s/step - train_loss: 0.6640 - train_acc: 0.8000 - val_loss: 0.6800 - val_acc: 0.5750
Epoch 4/100: - 0.7430s/step - train_loss: 0.6587 - train_acc: 0.7000 - val_loss: 0.6757 - val_acc: 0.5750
Epoch 5/100: - 0.7516s/step - train_loss: 0.6547 - train_acc: 0.6500 - val_loss: 0.6725 - val_acc: 0.5000
Epoch 6/100: - 0.7492s/step - train_loss: 0.6507 - train_acc: 0.6500 - val_loss: 0.6697 - val_acc: 0.5000
Epoch 7/100: - 0.7461s/step - train_loss: 0.6472 - train_acc: 0.6500 - val_loss: 0.6672 - val_acc: 0.5000
Epoch 8/100: - 0.7486s/step - train_loss: 0.6433 - train_acc: 0.6937 - val_loss: 0.6649 - val_acc: 0.5500
Epoch 9/100: - 0.7500s/step - train_loss: 0.6390 - train_acc: 0.7562 - val_loss: 0.6622 - val_acc:

Epoch 76/100: - 0.7699s/step - train_loss: 0.4730 - train_acc: 0.8625 - val_loss: 0.5676 - val_acc: 0.6000
Epoch 77/100: - 0.7692s/step - train_loss: 0.4689 - train_acc: 0.8750 - val_loss: 0.5658 - val_acc: 0.6000
Epoch 78/100: - 0.7685s/step - train_loss: 0.4658 - train_acc: 0.8625 - val_loss: 0.5643 - val_acc: 0.6250
Epoch 79/100: - 0.7677s/step - train_loss: 0.4642 - train_acc: 0.8500 - val_loss: 0.5628 - val_acc: 0.6250
Epoch 80/100: - 0.7678s/step - train_loss: 0.4632 - train_acc: 0.8500 - val_loss: 0.5605 - val_acc: 0.6250
Epoch 81/100: - 0.7683s/step - train_loss: 0.4624 - train_acc: 0.8688 - val_loss: 0.5578 - val_acc: 0.6250
Epoch 82/100: - 0.7690s/step - train_loss: 0.4631 - train_acc: 0.8750 - val_loss: 0.5570 - val_acc: 0.6000
Epoch 83/100: - 0.7692s/step - train_loss: 0.4644 - train_acc: 0.8688 - val_loss: 0.5572 - val_acc: 0.6250
Epoch 84/100: - 0.7692s/step - train_loss: 0.4655 - train_acc: 0.8688 - val_loss: 0.5579 - val_acc: 0.6250
Epoch 85/100: - 0.7692s/step - train_

Epoch 53/100: - 2.4336s/step - train_loss: 0.6635 - train_acc: 0.8438 - val_loss: 0.6690 - val_acc: 0.6250
Epoch 54/100: - 2.4331s/step - train_loss: 0.6631 - train_acc: 0.6375 - val_loss: 0.6682 - val_acc: 0.5500
Epoch 55/100: - 2.4333s/step - train_loss: 0.6637 - train_acc: 0.5563 - val_loss: 0.6696 - val_acc: 0.5000
Epoch 56/100: - 2.4333s/step - train_loss: 0.6635 - train_acc: 0.8125 - val_loss: 0.6724 - val_acc: 0.6500
Epoch 57/100: - 2.4334s/step - train_loss: 0.6638 - train_acc: 0.8500 - val_loss: 0.6736 - val_acc: 0.6250
Epoch 58/100: - 2.4348s/step - train_loss: 0.6634 - train_acc: 0.8125 - val_loss: 0.6721 - val_acc: 0.6500
Epoch 59/100: - 2.4347s/step - train_loss: 0.6619 - train_acc: 0.7562 - val_loss: 0.6703 - val_acc: 0.6000
Epoch 60/100: - 2.4346s/step - train_loss: 0.6610 - train_acc: 0.8250 - val_loss: 0.6698 - val_acc: 0.6500
Epoch 61/100: - 2.4353s/step - train_loss: 0.6604 - train_acc: 0.8125 - val_loss: 0.6685 - val_acc: 0.6000
Epoch 62/100: - 2.4416s/step - train_

In [10]:
res_bp_tmp = []
for i in range(N):
    hist_bp = {"train_acc": res_bp[i]['history']['train_acc'], "train_loss": res_bp[i]['history']['train_loss'], 
               "val_acc": res_bp[i]['history']['val_acc'], "val_loss": res_bp[i]['history']['val_loss']}
    r_bp = {"time": res_bp[i]['time'], "history": hist_bp}
    res_bp_tmp.append(r_bp)

In [11]:
# plot all the running times for each method
def plot_all(res, method, metric):
    
    plt.style.use('seaborn')
    nrow = 3
    ncol = 2
    
    fig, ax = plt.subplots(nrow, ncol, sharex = True)
    fig.suptitle(method + "_" + metric)
    for i in range(nrow):
        for j in range(ncol):
            if i * ncol + j < N:
                ax[i, j].plot(res[i * ncol + j]['history'][metric])
                ax[i, j].set_title(f"Run {i * ncol + j}")
    plt.savefig(method + "_" + metric + '.pdf')
    plt.close()

res_bp = res_bp_tmp

res_all = [res_bp, res_hmc, res_gibbs]
methods = ['bp', 'hmc', 'gibbs']
metrics = ['train_acc', 'train_loss', 'val_acc', 'val_loss']
for i, method in enumerate(methods):
    for metric in metrics:
        plot_all(res_all[i], method, metric)

In [12]:
# calculate average curve for each method
def cal_avg(res):
    
    metrics = ['train_acc', 'train_loss', 'val_acc', 'val_loss']
    avg = {}
    for metric in metrics:
        arr_metric = np.zeros((N, epochs))
        for i in range(N):
            arr_metric[i] = np.array(res[i]['history'][metric])
        avg_metric = np.mean(arr_metric, axis = 0)
        avg[metric] = avg_metric
        
    return avg

avg_bp = cal_avg(res_bp)
avg_hmc = cal_avg(res_hmc)
avg_gibbs = cal_avg(res_gibbs)

In [13]:
avg_all = [avg_bp, avg_hmc, avg_gibbs]

plt.style.use('seaborn')
for metric in metrics:
    for i, method in enumerate(methods):
        plt.plot(avg_all[i][metric], label = method)
    plt.title(metric)
    plt.xlabel("epochs")
    plt.ylabel(metric.split("_")[1])
    plt.legend()
    plt.savefig("average_" + metric + '.pdf')
    plt.close()

In [14]:
time_bp, time_hmc, time_gibbs = [], [], []
for i in range(N):
    time_bp.append(res_bp[i]['time'])
    time_hmc.append(res_hmc[i]['time'])
    time_gibbs.append(res_gibbs[i]['time'])

In [15]:
with open('time.npy', 'wb') as f:
    np.save(f, np.array(res_all))
    np.save(f, np.array(time_bp))
    np.save(f, np.array(time_hmc))
    np.save(f, np.array(time_gibbs))