In [1]:
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
import random
import time
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import initializers
from tensorflow.keras import Model
from tensorflow.keras.layers import Flatten, Dense, Conv2D, Dropout, MaxPooling2D
from sklearn.metrics import accuracy_score

In [2]:
# Load MNIST
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Select binary data
label_sub = [0,1]
x_train_sub = np.array([x for x, y in zip(x_train, y_train) if y in label_sub])
y_train_sub = np.array([y for y in y_train if y in label_sub])
x_test_sub = np.array([x for x, y in zip(x_test, y_test) if y in label_sub])
y_test_sub = np.array([y for y in y_test if y in label_sub])

print('There are', len(x_train_sub), 'training images.')
print('There are', len(x_test_sub), 'test images.')

There are 12665 training images.
There are 2115 test images.


In [3]:
# Reshaping the array to 4-dims so that it can work with the Keras API
x_train_sub = x_train_sub.reshape(x_train_sub.shape[0], 28, 28, 1)
x_test_sub = x_test_sub.reshape(x_test_sub.shape[0], 28, 28, 1)
input_shape = (28, 28, 1)

# Making sure that the values are float so that we can get decimal points after division
x_train_sub = x_train_sub.astype('float32')
x_test_sub = x_test_sub.astype('float32')

# Normalizing the RGB codes by dividing it to the max RGB value.
x_train_sub /= 255
x_test_sub /= 255
print('x_train_sub shape:', x_train_sub.shape)
print('Number of images in x_train_sub', x_train_sub.shape[0])
print('Number of images in x_test_sub', x_test_sub.shape[0])

x_train_sub shape: (12665, 28, 28, 1)
Number of images in x_train_sub 12665
Number of images in x_test_sub 2115


In [4]:
train_ds = tf.data.Dataset.from_tensor_slices((x_train_sub, y_train_sub)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test_sub, y_test_sub)).batch(32)

In [5]:
def convert2_zero_one(x):

    t = [tf.math.sigmoid(i) for i in x]
    
    return t

def rerange(x, r = 6.0):
    
    out_of_range = tf.cast(tf.math.greater(tf.math.abs(x), r), tf.float32)
    sign = tf.math.sign(x)
    
    return x * (1 - out_of_range) + sign * r * out_of_range

In [186]:
def convert2_zero_one2(x):

    t = tf.math.sigmoid(x)
    
    return t

In [254]:
# MLP model
class StochasticMLP(Model):
    
    def __init__(self, hidden_layers, n_outputs=10, input_shape = (28, 28, 1)):
        super(StochasticMLP, self).__init__()
        self.hidden_layers = hidden_layers
        self.output_layer = Dense(n_outputs)
        
    def call(self, x):
            
        network = []
            
        logits = self.hidden_layers[0](x)
        network = tfp.distributions.Bernoulli(logits=logits).sample()

        final_logits = self.output_layer(x) # initial the weight of output layer
            
        return network
    
    def target_log_prob(self, x, h, y):
        
        #x = Flatten()(x)
        y = [[i] for i in y]
        h_current = convert2_zero_one(tf.cast(h, dtype=tf.float32))
        h_previous = [x]
        #print("h_current:", h_current)
        #print("h_previous:", h_previous)
        
        nlog_prob = 0. # negative log probability
            
        for i, (cv, pv, layer) in enumerate(
            zip(h_current, h_previous, [self.hidden_layers[0]])):
            
            #print("labels:", cv)
            #print("logits:", layer(pv)[0])
        
            ce = tf.nn.sigmoid_cross_entropy_with_logits(
                labels=cv, logits=layer(pv)[0])
            
            nlog_prob += tf.reduce_sum(ce, axis = -1)
        
        
        f_logits = self.hidden_layers[2](self.hidden_layers[1](self.hidden_layers[0](x)))
        print("y:", tf.cast(y, tf.float32))
        print("f_logits:", f_logits)
        
        fce = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.cast(y, tf.float32), logits=self.output_layer(f_logits))
        nlog_prob += tf.reduce_sum(fce, axis = -1)
            
        return -1 * nlog_prob

    def target_log_prob2(self, x, h, y):
        
        x = Flatten()(x)
        y = [[i] for i in y]
        h_current = convert2_zero_one(tf.split(h, self.hidden_layer_sizes, axis = 1))
        #h_current = [h_current[0]]
        h_previous = [x] + h_current[:-1]
        
        nlog_prob = 0.
        
        for i, (cv, pv, layer) in enumerate(
            zip(h_current, h_previous, self.fc_layers[0])):
            
            ce = tf.nn.sigmoid_cross_entropy_with_logits(
                labels=cv, logits=layer(pv))
            
            nlog_prob += tf.reduce_sum(ce, axis = -1)
        
        fce = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.cast(y, tf.float32), logits=self.output_layer(h_current[-1]))
        nlog_prob += tf.reduce_sum(fce, axis = -1)
            
        return -1 * nlog_prob
    
    def generate_hmc_kernel(self, x, y, step_size = pow(1000, -1/4)):
        
        adaptive_hmc = tfp.mcmc.SimpleStepSizeAdaptation(tfp.mcmc.HamiltonianMonteCarlo(
            target_log_prob_fn = lambda v: self.target_log_prob(x, v, y),
            num_leapfrog_steps = 2,
            step_size = step_size),
            num_adaptation_steps=int(100 * 0.8))
        
        return adaptive_hmc
    
    # new proposing-state method with HamiltonianMonteCarlo
    def propose_new_state_hamiltonian(self, x, h, y, hmc_ker):
    
        h_current = h
        h_current = [tf.cast(h_i, dtype=tf.float32) for h_i in h_current]
        h_current = tf.concat([h[0], h[1]], axis=1)

        # initialize the HMC transition kernel
        
        adaptive_hmc = tfp.mcmc.SimpleStepSizeAdaptation(tfp.mcmc.HamiltonianMonteCarlo(
            target_log_prob_fn = lambda v: self.target_log_prob2(x, v, y),
            num_leapfrog_steps = 2,
            step_size = pow(1000, -1/4)),
            num_adaptation_steps=int(100*0.8))

        # run the chain (with burn-in)
        num_results = 1
        num_burnin_steps = 100

        samples = tfp.mcmc.sample_chain(
            num_results = num_results,
            num_burnin_steps = num_burnin_steps,
            current_state = h_current, # may need to be reshaped
            kernel = adaptive_hmc,
            trace_fn = None)

        h_new = tf.split(samples[0], self.hidden_layer_sizes, axis = 1)

        return(h_new)
    
    def update_weights(self, x, h, y, lr = 0.1):
        
        optimizer = tf.keras.optimizers.SGD(learning_rate = lr)
        with tf.GradientTape() as tape:
            loss = -1 * tf.reduce_mean(self.target_log_prob(x, h, y))
        
        grads = tape.gradient(loss, self.trainable_weights)
        optimizer.apply_gradients(zip(grads, self.trainable_weights))

    def get_predictions(self, x):

        x = Flatten()(x)
        logits = 0.0
        for layer in self.fc_layers:
            logits = layer(x)
            x = tf.math.sigmoid(logits)
        
        logits = self.output_layer(x)
        probs = tf.math.sigmoid(logits)
        #print(probs)
        labels = tf.cast(tf.math.greater(probs, 0.5), tf.int32)

        return labels

In [7]:
# define hidden layers
conv2D_layer = Conv2D(filters = 28, kernel_size = (3, 3), input_shape = (28, 28, 1), activation = 'sigmoid')
maxpooling_layer = MaxPooling2D(pool_size=(2, 2))
flatten_layer = Flatten()

In [255]:
model = StochasticMLP(hidden_layers = [conv2D_layer, maxpooling_layer, flatten_layer], n_outputs = 1)

In [144]:
network = [model.call(images) for images, labels in train_ds]

In [177]:
np.shape(network[395])

TensorShape([25, 26, 26, 28])

In [256]:
tlp = [model.target_log_prob(images, network[bs], labels) for bs, (images, labels) in enumerate(train_ds)]

y: tf.Tensor(
[[1.]
 [1.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [1.]
 [1.]
 [0.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [0.]
 [1.]
 [1.]
 [1.]
 [1.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]], shape=(32, 1), dtype=float32)
f_logits: tf.Tensor(
[[0.5 0.5 0.5 ... 0.5 0.5 0.5]
 [0.5 0.5 0.5 ... 0.5 0.5 0.5]
 [0.5 0.5 0.5 ... 0.5 0.5 0.5]
 ...
 [0.5 0.5 0.5 ... 0.5 0.5 0.5]
 [0.5 0.5 0.5 ... 0.5 0.5 0.5]
 [0.5 0.5 0.5 ... 0.5 0.5 0.5]], shape=(32, 4732), dtype=float32)


InvalidArgumentError: Incompatible shapes: [26,26] vs. [32] [Op:AddV2]

In [15]:
np.shape(network[0])

TensorShape([32, 26, 26, 28])