In [1]:
"""
Mounts your drive for reading and writing datasets, results, weights.
"""
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [2]:
import struct
import numpy as np
import math

def one_hot(a, num_classes):
  """
  Converts the input numpy array to one hot.

  Parameters
  ==========

  a: numpy.array
    The input array.
  num_classes: int
    The number of classes to be used for one hot notation.
  
  Returns
  =======

  np.array
  """
  return np.squeeze(np.eye(num_classes, dtype=np.float32)[a.reshape(-1)])

def read_idx(filename, flatten=True, normalize=True, show_logs=True,
             num_data=1000000, to_one_hot=False,
             cache_result=False, use_cache=False):
    """
    Reads datasets from binary files.

    filename: str
      The path to the file where the dataset is stored.
      Make sure to not add any extensions at the end of
      the filepath.
    flatten: bool
      If True, flattens the data points. By default, True.
    normalize: bool
      If True, the data points are normalised
      by dividing each element by 126. By default, True.
    show_logs: bool
      If True, shows the logs while the data is
      being processed. By default, True.
    num_data: int
      Number of data points to be fetched from the 
      dataset. By default, 1000000.
    to_one_hot: bool
      If True, converts data points to one hot 
      notation using 0 for low and 1 for high.
      By default, False.
    cache_result: bool
      If True, caches the pre-processed data to 
      cache files of `npy` format. By default, False.
    use_cache: bool
      If True, uses the previously cached data, if possible.
      By default, False.
    
    Returns
    =======

    np.array

    Note
    ====

    This function was specifically written for MNIST database
    of handwritten digits.
    """
    if use_cache:
        filename = filename + "_cache.npy"
        ret_val = np.load(filename)
        print("Loaded cached data from %s"%(filename))
        return ret_val
    with open(filename, 'rb') as f:
        zero, data_type, dims = struct.unpack('>HBB', f.read(4))
        shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
        ret_val = np.fromstring(f.read(), dtype=np.uint8).reshape(shape)
        num_data = min(ret_val.shape[0], num_data)
        normalize_val = []
        if normalize:
            for i in range(num_data):
                mat = [[0 for _i in range(ret_val.shape[1])]
                        for _j in range(ret_val.shape[2])]
                if show_logs:
                    print("Normalized %s-th data"%(i+1))
                for j in range(ret_val.shape[1]):
                    for k in range(ret_val.shape[2]):
                        mat[j][k] = ret_val[i][j][k]/126.
                normalize_val.append(mat)
                del mat
            ret_val = np.asarray(normalize_val, dtype=np.float32)
        del normalize_val
        flatten_val = []
        if flatten:
            for i in range(num_data):
                if show_logs:
                    print("Flattened %s-th data"%(i+1))
                flatten_val.append(ret_val[i].flatten('C'))
            ret_val = np.asarray(flatten_val, dtype=np.float32)
        del flatten_val
        if to_one_hot:
            ret_val = one_hot(ret_val[0:num_data], 10)
        if cache_result:
            np.save(filename+"_cache", ret_val)
            print("Saved cached data to %s_cache"%(filename))
        return ret_val

In [3]:
import tensorflow as tf

class ANNLayer(tf.keras.layers.Layer):
    """
    Base class for ANN layers.

    Parameters
    ==========

    num_inputs: int
        Number of inputs to the layer.
    num_outputs: int
        Number of outputs from the layer.
    activation:
        Activation from tensorflow.keras.activations.
    """

    def __init__(self, num_inputs, num_outputs, activation):
        super(ANNLayer, self).__init__(dtype=tf.float32)
        self.num_outputs = num_outputs
        self.activation = activation
        self.kernel_mu = self.add_variable("kernel_mu",
                                            shape=[num_inputs,
                                                   self.num_outputs],
                                            initializer=tf.keras.initializers.TruncatedNormal(),
                                            dtype=tf.float32)

    def call(self, input):
        """
        Implements the feed forward operation when layer 
        is called on the given input and weights.

        input: tf.Tensor
          The input.
        
        Returns
        =======

        tf.Tensor
        """
        prod = self.activation(tf.matmul(input, self.kernel_mu))
        return prod

class BNNLayer(tf.keras.layers.Layer):
    """
    Base class for BNN layers.

    Parameters
    ==========

    num_inputs: int
        Number of inputs to the layer.
    num_outputs: int
        Number of outputs from the layer.
    activation:
        Activation from tensorflow.keras.activations.
    """

    def __init__(self, num_inputs, num_outputs, activation):
        super(BNNLayer, self).__init__(dtype=tf.float32)
        self.num_outputs = num_outputs
        self.activation = activation
        self.kernel_mu = self.add_variable("kernel_mu",
                                            shape=[num_inputs,
                                                   self.num_outputs],
                                            initializer=tf.keras.initializers.TruncatedNormal(),
                                            dtype=tf.float32)
        self.kernel_rho = self.add_variable("kernel_sigma",
                                            shape=[num_inputs,
                                                   self.num_outputs],
                                            initializer=tf.keras.initializers.TruncatedNormal(),
                                            dtype=tf.float32)

    def _reparametrize(self):
        """
        Abstract method which implements the
        reparametrisation trick.
        """
        return None

    def call(self, input, weights):
        """
        Implements the inference operation when layer 
        is called on the given input and weights.

        input: tf.Tensor
          The input.
        weights: tf.Tensor
          The weights
        
        Returns
        =======

        tf.Tensor
        """
        prod = self.activation(tf.matmul(input, weights))
        return prod

class BNNLayer_Normal_Normal(BNNLayer):
    """
    BNN layer which implements reparametrisation
    trick from N(0, 1) to any N(mu, sigma).
    """

    def _reparametrize(self):
        eps_w_shape = self.kernel_mu.shape
        eps_w = tf.random.normal(eps_w_shape, 0, 0.01, dtype=tf.float32)
        term_w = tf.math.multiply(eps_w,
                                  tf.math.log(tf.math.add(
                                  tf.math.exp(tf.clip_by_value(self.kernel_rho, -87.315, 88.722)),
                                  tf.constant(1., shape=eps_w_shape, dtype=tf.float32))))
        return tf.math.add(self.kernel_mu, term_w)

In [4]:
from tensorflow.keras.activations import relu as Relu, elu as Elu, softmax as Softmax
import math

class ANN(tf.keras.Model):
    """
    Artificial Neural Network using point estimates
    of underlying distribution of training data.

    Parameters
    ==========

    input_shape: tuple
      By default, None.
    """

    def __init__(self, input_shape=None):
        super(ANN, self).__init__()
        self.InputLayer = tf.keras.layers.InputLayer(input_shape=(input_shape[1],),
                            batch_size=input_shape[0], dtype=tf.float32)
        self.Dense_1 = ANNLayer(int(input_shape[-1]), 400, activation=Relu)
        self.Dense_2 = ANNLayer(400, 400, activation=Relu)
        self.Output = ANNLayer(400, 10, activation=Softmax)
        self.Layers = [self.Dense_1, self.Dense_2, self.Output]

    def run(self, inputs):
        """
        Performs feed forward operation on the given inputs.

        Parameters
        ==========

        inputs: tf.Tensor

        Returns
        =======

        tf.Tensor
        """
        layer_output = self.InputLayer(inputs)
        i = 0
        for layer in self.Layers:
            layer_output = layer(layer_output)
            i += 1
        return layer_output

    def get_loss(self, inputs, targets, inference=False):
        """
        Computes the total training loss.

        Parameters
        ==========

        inputs: tf.Tensor
            Input to the layers.
        targets: tf.Tensor
            True targets that the model wants to learn from.
        inference: bool
            Used to determine the order of the outputs in the tuple
            being returned.
        
        Returns
        =======

        tuple
          Containing loss and output of neural network for each sample.
        """
        outputs = self.run(inputs)
        loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(targets, outputs))

        if inference:
            return outputs, loss
        return loss, outputs

    def compute_gradients(self, inputs, targets):
        """
        Computes gradients of cost function for given inputs, 
        targets.

        Parameters
        ==========

        inputs: tf.Tensor
        targets: tf.Tensor
        weight: tf.float32 or equivalent
          The weight given to each batch of input data.
        
        Returns
        =======

        list
          Containing gradients, tf.Tensor, w.r.t each variable.
        """
        with tf.GradientTape(persistent=True) as tape:
            _vars = []
            for layer in self.Layers:
                _vars.append(layer.kernel_mu)
            tape.watch(_vars)

            F, _ = self.get_loss(inputs, targets)
            dF = tape.gradient(F, _vars)

        return dF

    def learn(self, inputs, targets, alpha):
        """
        Performs weight updates.

        Parameters
        ==========

        inputs: tf.Tensor
        targets: tf.Tensor
        alpha: tf.float32 or equivalent
          The learning rate.
        
        Returns
        =======

        None
        """
        grads = self.compute_gradients(inputs, targets)
        i = 0
        for layer in self.Layers:
            layer.kernel_mu.assign(tf.math.subtract(layer.kernel_mu, tf.scalar_mul(alpha, grads[i])))
            i += 1

class BNN_Normal_Normal(tf.keras.Model):
    """
    Neural Network which uses, `BNNLayer_Normal_Normal` layers.

    Parameters
    ==========

    input_shape: tuple
      By default, None.
    """

    def __init__(self, input_shape=None):
        super(BNN_Normal_Normal, self).__init__()
        self.InputLayer = tf.keras.layers.InputLayer(input_shape=(input_shape[1],),
                            batch_size=input_shape[0], dtype=tf.float32)
        self.Dense_1 = BNNLayer_Normal_Normal(int(input_shape[-1]), 400, activation=Relu)
        self.Dense_2 = BNNLayer_Normal_Normal(400, 400, activation=Relu)
        self.Output = BNNLayer_Normal_Normal(400, 10, activation=Softmax)
        self.Layers = [self.Dense_1, self.Dense_2, self.Output]

    def run(self, inputs, *weights):
        """
        Produces neural network's outputs for
        given inputs and weights.

        Parameters
        ==========

        inputs: tf.Tensor/np.array
        weights: tf.Tensor

        Returns
        =======

        tf.Tensor
        """
        layer_output = self.InputLayer(inputs)
        i = 0
        for layer in self.Layers:
            layer_output = layer(layer_output, weights[i])
            i += 1
        return layer_output

    def log_prior(self, weights):
        """
        Computes the natural logarithm of scale
        mixture prior of weights.

        Parameters
        ==========

        weights: tf.Tensor

        Returns
        =======

        tf.Tensor

        Note
        ====

        The two standard deviations of the scale mixture are,
        exp(0) and exp(-6). The weight of both normal distributions
        is 0.5.
        """
        shape = weights.shape
        sigma_1 = tf.constant(math.exp(0), shape=shape, dtype=tf.float32)
        sigma_2 = tf.constant(math.exp(-6), shape=shape, dtype=tf.float32)
        def pdf(w, sigma):
            res1 = tf.math.divide(tf.math.square(w), tf.math.square(sigma)*2)
            return tf.math.divide(tf.math.exp(tf.clip_by_value(-res1, -87.315, 88.722)), sigma*(2*math.pi)**0.5)
        part_1 = tf.clip_by_value(0.25*pdf(weights, sigma_1), tf.float32.min//2, tf.float32.max//2)
        part_2 = tf.clip_by_value(0.75*pdf(weights, sigma_2), tf.float32.min//2, tf.float32.max//2)
        return tf.math.reduce_sum(tf.math.log(part_1 + part_2))

    def log_posterior(self, weights, mu, rho):
        """
        Computes the natural logarithm of Gaussian
        posterior on weights.

        Parameters
        ==========

        weights: tf.Tensor
        mu: tf.Tensor
          The mean of the posterior Gaussian distribution.
        rho: tf.Tensor
          Used to compute the variance of the posterior Gaussian distribution.
        
        Returns
        =======

        tf.Tensor
        """
        def pdf(w, mu, sigma):
            res1 = tf.math.divide(tf.math.square(w - mu), tf.math.square(sigma)*2)
            return tf.math.divide(tf.math.exp(tf.clip_by_value(-res1, -87.315, 88.722)), sigma*(2*math.pi)**0.5)
        sigma = tf.math.log(tf.math.add(
                                  tf.math.exp(tf.clip_by_value(rho, -87.315, 88.722)),
                                  tf.constant(1., shape=rho.shape, dtype=tf.float32)))
        log_q = tf.math.log(tf.clip_by_value(pdf(weights, mu, sigma), tf.float32.min//2, tf.float32.max//2))
        return tf.math.reduce_sum(log_q)

    def get_loss(self, inputs, targets, samples, weight=1., inference=False):
        """
        Computes the total training loss.

        Parameters
        ==========

        inputs: tf.Tensor/np.array
            Input to the layers.
        targets: tf.Tensor/np.array
            True targets that the model wants to learn from.
        samples: int
            The number of samples to be drawn for weights.
        weight: tf.float32 or equivalent.
            Weight given to loss of each batch. By default, 1.
        inference: bool
            Used to determine the order of the outputs in the tuple
            being returned.
        
        Returns
        =======

        tuple
          Containing loss and output of neural network for each sample.
        """
        loss = tf.constant(0., dtype=tf.float32)
        outputs_list = []
        for _ in range(samples):
            weights = []
            pw, qw = tf.constant(0, dtype=tf.float32), tf.constant(0, dtype=tf.float32)
            for layer in self.Layers:
                weights.append(layer._reparametrize())
                kernel_mu, kernel_rho = layer.kernel_mu, layer.kernel_rho
                pw += self.log_prior(weights[-1])
                qw += self.log_posterior(weights[-1], kernel_mu, kernel_rho)

            outputs = self.run(inputs, *weights)
            outputs_list.append(outputs)
            cse = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(targets, outputs))
            if inference:
                loss += cse
            else:
                loss += (qw - pw)*weight + tf.cast(cse, tf.float32)

        if inference:
            return outputs_list, loss/samples
        return loss/samples, outputs_list

    def compute_gradients(self, inputs, targets, weight):
        """
        Computes gradients of cost function for given inputs, 
        targets.

        Parameters
        ==========

        inputs: tf.Tensor
        targets: tf.Tensor
        weight: tf.float32 or equivalent
          The weight given to each batch of input data.
        
        Returns
        =======

        list
          Containing gradients, tf.Tensor, w.r.t each variable.
        """
        with tf.GradientTape(persistent=True) as tape:
            _vars = []
            for layer in self.Layers:
                _vars.append(layer.kernel_rho)
            tape.watch(_vars)

            F, _ = self.get_loss(inputs, targets, 10, weight)
            dF = tape.gradient(F, _vars)

        return dF

    def learn(self, inputs, targets, alpha, weight=1.):
        """
        Performs parameter updates.

        Parameters
        ==========

        inputs: tf.Tensor
        targets: tf.Tensor
        alpha: tf.float32 or equivalent
          The learning rate.
        weight: tf.float32 or equivalent
          The weight given to each batch of input data.
        
        Returns
        =======

        None
        """
        grads = self.compute_gradients(inputs, targets, weight)
        i = 0
        for layer in self.Layers:
            layer.kernel_rho.assign(tf.math.subtract(layer.kernel_rho, tf.scalar_mul(alpha, grads[i])))
            i += 1


In [5]:
from datetime import datetime

prefix = "./drive/My Drive/bnn/"

def random_shuffle(x, y):
    """
    Shuffles the dataset.

    Parameters
    ==========

    x: np.array
      The input dataset.
    y: np.array
      The target labels.
    
    Returns
    =======

    tuple
      First element contains x and the second y
      both shuffled.
    """
    indices = np.arange(x.shape[0])
    np.random.shuffle(indices)
    return x[indices], y[indices]

def take_subset(x, y, size, count):
    """
    Takes subset of inputs.

    Parameters
    ==========

    x: np.array
      The input dataset.
    y: np.array
      The target labels.
    size: int
      The size of the subset
    count: int
      To be used for finding the initial
      posistion of the subset.

    Returns
    =======

    tuple
      Both np.array elements.    
    """
    return x[count*size:count*size + size], \
           y[count*size:count*size + size]

def train(input_data_path, target_data_path, checkpoint=None, 
          alpha=0.01, batch_size=1, dataset_size=1,
          epochs=1, cache_result=True, use_cache=False):
    """
    Trains the neural network, `ANN` and saves it's weights under, `bnn/<data-set>/weights`.

    Parameters
    ==========

    input_data_path: str
      The path to the file containing input data. Must be binary.
    target_data_path: str
      The path to the file containing target label data. Must be binary.
    checkpoint: str
      The name of the file from the checkpoint is to be loaded.
      By default, None.
    alpha: tf.float32 or equivalent
      The learning rate.
    batch_size: int
      By default, 1.
    dataset_size: int
      By default, 1.
    epochs: int
      By default 1.
    cache_result: bool
      If True, caches the pre-processed data to 
      cache files of `npy` format. By default, True.
    use_cache: bool
      If True, uses the previously cached data, if possible.
      By default, False.
    
    Returns
    =======

    None
    """
    curr_date_time = str(datetime.now()).replace(' ', '_').replace(':', '_').replace('-', '_').replace('.', '_')
    if checkpoint is not None:
        curr_date_time = checkpoint
        logs = open(prefix + "logs/_"+curr_date_time, 'a')
    else:
        logs = open(prefix + "logs/_"+curr_date_time, 'w+')
    inputs = read_idx(input_data_path, True, True, True, dataset_size, False, cache_result, use_cache)
    targets = read_idx(target_data_path, False, False, True, dataset_size, True, cache_result, use_cache)
    logs.write(str((inputs.shape, targets.shape)) + '\n')
    size = targets.shape[0]
    model = ANN(input_shape=(batch_size, 784))
    if checkpoint is not None:
        model.load_weights(prefix + "weights/mnist/_" + checkpoint)
    logs.write("Number of batches per epoch: " + str(size//batch_size) + '\n')
    logs.write("Number of epochs: " + str(epochs) + '\n')
    logs.write("Initial Loss: " + str(model.get_loss(inputs, targets)[0]) + '\n')
    logs.close()
    logs = open(prefix + "logs/_"+curr_date_time, 'a')
    for epoch in range(epochs):
        inputs, targets = random_shuffle(inputs, targets)
        for batch in range(size//batch_size):
            input_sub, target_sub = take_subset(inputs, targets, batch_size, batch)
            model.learn(input_sub, target_sub, alpha)
        logs.write("Loss at completion of epoch " + str(epoch) + " is " + str(model.get_loss(inputs, targets)[0]) + '\n')
        if epoch%2 == 0:
            model.save_weights(prefix + "weights/mnist/_" + curr_date_time)
            logs.close()
            logs = open(prefix + "logs/_" + curr_date_time, 'a')
    logs.close()

def train_bnn(input_data_path, target_data_path, ann_weights, checkpoint=None, 
              alpha=0.01, batch_size=1, dataset_size=1,
              epochs=1, cache_result=True, use_cache=False):
    """
    Trains the neural network, `BNN_Normal_Normal` and saves it's parameters under, `bnn/<data-set>/weights`.

    Parameters
    ==========

    input_data_path: str
      The path to the file containing input data. Must be binary.
    target_data_path: str
      The path to the file containing target label data. Must be binary.
    ann_weights: str
      The path to the file which contains weights of a pre-trained ANN
      whose weights will be used as means of the Gaussian distributions
      of the weights of the new neural network.
    checkpoint: str
      The name of the file from the checkpoint is to be loaded.
      By default, None.
    alpha: tf.float32 or equivalent
      The learning rate.
    batch_size: int
      By default, 1.
    dataset_size: int
      By default, 1.
    epochs: int
      By default 1.
    cache_result: bool
      If True, caches the pre-processed data to 
      cache files of `npy` format. By default, True.
    use_cache: bool
      If True, uses the previously cached data, if possible.
      By default, False.
    
    Returns
    =======

    None
    """
    curr_date_time = str(datetime.now()).replace(' ', '_').replace(':', '_').replace('-', '_').replace('.', '_')
    if checkpoint is not None:
        curr_date_time = checkpoint
        logs = open(prefix + "logs/"+curr_date_time, 'a')
    else:
        logs = open(prefix + "logs/"+curr_date_time, 'w+')
    inputs = read_idx(input_data_path, True, True, True, dataset_size, False, cache_result, use_cache)
    targets = read_idx(target_data_path, False, False, True, dataset_size, True, cache_result, use_cache)
    logs.write(str((inputs.shape, targets.shape)) + '\n')
    size = targets.shape[0]
    model = BNN_Normal_Normal(input_shape=(batch_size, 784))
    if checkpoint is not None:
        model.load_weights(prefix + "weights/mnist/" + checkpoint)
    else:
        modelANN = ANN(input_shape=(None, 784))
        modelANN.load_weights(ann_weights)
        for layerANN, layerBNN in zip(modelANN.Layers, model.Layers):
          layerBNN.kernel_mu.assign(layerANN.kernel_mu)
    logs.write("Number of batches per epoch: " + str(size//batch_size) + '\n')
    logs.write("Number of epochs: " + str(epochs) + '\n')
    logs.write("Initial Loss: " + str(model.get_loss(inputs, targets, 1)[0]) + '\n')
    logs.close()
    logs = open(prefix + "logs/"+curr_date_time, 'a')
    for epoch in range(epochs):
        inputs, targets = random_shuffle(inputs, targets)
        for batch in range(size//batch_size):
            input_sub, target_sub = take_subset(inputs, targets, batch_size, batch)
            model.learn(input_sub, target_sub, alpha, 1/(size//batch_size))
        logs.write("Loss at completion of epoch " + str(epoch) + " is " + str(model.get_loss(inputs, targets, 10)[0]) + '\n')
        logs.write("Cross entropy at completion of epoch " + str(epoch) + " is " + str(model.get_loss(inputs, targets, 1, 1., True)[1]) + '\n')
        if epoch%2 == 0:
            model.save_weights(prefix + "weights/mnist/" + curr_date_time)
            logs.close()
            logs = open(prefix + "logs/" + curr_date_time, 'a')
    logs.close()

In [6]:
"""
Training `ANN` on MNIST database of handwritten digits.
"""
train(prefix + "datasets/mnist_train_images", prefix + "datasets/mnist_train_labels", 
      None, 0.01, 20000, 60000, 100, False, True)

Loaded cached data from ./drive/My Drive/bnn/datasets/mnist_train_images_cache.npy
Loaded cached data from ./drive/My Drive/bnn/datasets/mnist_train_labels_cache.npy
Instructions for updating:
Please use `layer.add_weight` method instead.


KeyboardInterrupt: ignored

In [7]:
def test(file_path, input_shape, input_data, output_data, cache_result, use_cache):
    """
    Used for evaluating the trained neural network.

    Parameters
    ==========

    file_path: str
      The path to the file from where the saved `BNN_Normal_Normal` is to be loaded.
    input_shape: tuple
    input_data: str
      The path to the file containing input data. Must be binary.
    output_data: str
      The path to the file containing output_data. Must be binary.
    cache_result: bool
      If True, caches the pre-processed data to 
      cache files of `npy` format. By default, True.
    use_cache: bool
      If True, uses the previously cached data, if possible.
      By default, False.
    
    Returns
    =======

    Tuple of the following,

    preds: list containing tf.Tensor
      Predictions made by neural network.
    targets: tf.Tensor
      The target labels.
    total_loss: tf.float32
    """
    model = ANN(input_shape=input_shape)
    model.load_weights(file_path)
    inputs = read_idx(input_data, True, True, True, 10000, False, cache_result, use_cache)
    targets = read_idx(output_data, False, False, True, 10000, True, cache_result, use_cache)
    preds, total_loss = model.get_loss(inputs, targets, True)
    return preds, targets, total_loss

In [8]:
"""
Training loop for benchmarking. 
This cell keeps on optimising the weights of the ANN until the test accuracy keeps on increasing.
"""
curr_acc = 0
while True:
  train(prefix + "datasets/mnist_train_images", prefix + "datasets/mnist_train_labels", 
        "2020_08_02_06_33_12_410605", 0.01, 20000, 60000, 25, False, True)
  preds, targets, _ = test(prefix + "weights/mnist/_2020_08_02_06_33_12_410605", (None, 784), 
                           prefix + "datasets/mnist_test_images", prefix + "datasets/mnist_test_labels",
                           False, True)
  acc = 0
  for pred, target in zip(preds, targets):
    if tf.math.argmax(pred) == tf.math.argmax(target):
      acc += 1
  print("Accuracy: ", acc/100)
  if acc >= curr_acc:
    curr_acc = acc
  else:
    break

Loaded cached data from ./drive/My Drive/bnn/datasets/mnist_train_images_cache.npy
Loaded cached data from ./drive/My Drive/bnn/datasets/mnist_train_labels_cache.npy


KeyboardInterrupt: ignored

In [9]:
"""
Training `BNN_Normal_Normal` on MNIST database of handwritten digits
using the weights of a pre-trained `ANN`.
"""
train_bnn(prefix + "datasets/mnist_train_images", prefix + "datasets/mnist_train_labels", 
          prefix + "weights/mnist/_2020_08_02_06_33_12_410605", None, 0.01, 20000,
          60000, 20, False, True)

Loaded cached data from ./drive/My Drive/bnn/datasets/mnist_train_images_cache.npy
Loaded cached data from ./drive/My Drive/bnn/datasets/mnist_train_labels_cache.npy


KeyboardInterrupt: ignored

In [10]:
def test(file_path, input_shape, input_data, output_data, cache_result, use_cache):
    """
    Used for evaluating the trained neural network.

    Parameters
    ==========

    file_path: str
      The path to the file from where the saved `BNN_Normal_Normal` is to be loaded.
    input_shape: tuple
    input_data: str
      The path to the file containing input data. Must be binary.
    output_data: str
      The path to the file containing output_data. Must be binary.
    cache_result: bool
      If True, caches the pre-processed data to 
      cache files of `npy` format. By default, True.
    use_cache: bool
      If True, uses the previously cached data, if possible.
      By default, False.
    
    Returns
    =======

    Tuple of the following,

    preds: list containing tf.Tensor
      Predictions made by neural network. Shape (10, 10000).
    targets: tf.Tensor
      The target labels.
    total_loss: tf.float32
    """
    model = BNN_Normal_Normal(input_shape=input_shape)
    model.load_weights(file_path)
    inputs = read_idx(input_data, True, True, True, 10000, False, cache_result, use_cache)
    targets = read_idx(output_data, False, False, True, 10000, True, cache_result, use_cache)
    preds, total_loss = model.get_loss(inputs, targets, 10, 1., True)
    return preds, targets, total_loss

In [11]:
preds, targets, _ = test(prefix + "weights/mnist/2020_08_09_08_27_21_674420", (None, 784), 
                         prefix + "datasets/mnist_test_images", prefix + "datasets/mnist_test_labels",
                         False, True)
acc = 0
for i in range(10):
  for pred, target in zip(preds[i], targets):
    if tf.math.argmax(pred) == tf.math.argmax(target):
      acc += 1
print("Accuracy: ", acc/(10*100))

Loaded cached data from ./drive/My Drive/bnn/datasets/mnist_test_images_cache.npy
Loaded cached data from ./drive/My Drive/bnn/datasets/mnist_test_labels_cache.npy
Accuracy:  97.288
