In [1]:
import os

#import tensorflow as tf
import tensorflow.compat.v1 as tf
tf.compat.v1.disable_eager_execution()
print(tf.__version__)

import time

import numpy as np # linear algebra
import matplotlib.pyplot as plt

2.15.0


In [2]:
from tensorflow.keras.datasets import cifar10

# CIFAR-10 image dimensions
img_size = 32

# CIFAR-10 images are 32x32 pixels with 3 color channels (RGB)
input_size = 32 * 32 * 3

# CIFAR-10 has 10 different classes
n_classes = 10
output_size = 10

In [3]:
# Load CIFAR-10 dataset
print('\nLoading CIFAR-10')
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Normalize the data
x_train = x_train.astype(np.float32) / 255.0
x_test = x_test.astype(np.float32) / 255.0

# Flatten the images for a fully connected network (if necessary)
x_train = np.reshape(x_train, [-1, 32*32*3])
x_test = np.reshape(x_test, [-1, 32*32*3])

# Convert class vectors to binary class matrices (one-hot encoding)
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

# Splitting data for validation
print('\nSplitting data')
validation_pct = 0.1
num_train = int(x_train.shape[0] * (1 - validation_pct))
x_valid = x_train[num_train:]
y_valid = y_train[num_train:]
x_train = x_train[:num_train]
y_train = y_train[:num_train]

# Print the number of examples in each set
train_num_examples = x_train.shape[0]
valid_num_examples = x_valid.shape[0]
test_num_examples = x_test.shape[0]

print(f"Training examples: {train_num_examples}")
print(f"Validation examples: {valid_num_examples}")
print(f"Test examples: {test_num_examples}")


Loading CIFAR-10

Splitting data
Training examples: 45000
Validation examples: 5000
Test examples: 10000


In [4]:
# Global Parameters
#--------------------------------
# learning rate
learning_rate = 0.05

#training_epochs = 1000
#batch_size = 30

training_epochs = 100
batch_size = 50

display_step = 10

In [5]:
def layer(x, weight_shape, bias_shape, activation_function='relu'):
    """
    Defines the network layers
    input:
        - x: input vector of the layer
        - weight_shape: shape the the weight maxtrix
        - bias_shape: shape of the bias vector
    output:
        - output vector of the layer after the matrix multiplication and non linear transformation
    """
    
    # comes from the study by He et al. for ReLU layers
    w_std = (2.0/weight_shape[0])**0.5
    #print(weight_shape[0])
    #w_std = 0.5;

    #initialization of the weights
    #you can try either
    w_0 = tf.random_normal_initializer(stddev=w_std)
    #w_0 = tf.random_uniform_initializer(minval=-1,maxval=1)

    b_0 = tf.constant_initializer(value=0)
    
    W = tf.get_variable("W", weight_shape, initializer=w_0)
    b = tf.get_variable("b", bias_shape,   initializer=b_0)
    
    print('Weight Matrix:', W)
    print('Bias Vector:', b)

    # (1) linear activation (not a good idea)
    linear_output = tf.matmul(x, W) + b
    
    # (2) tanh activation
    if activation_function == 'tanh':
        return tf.nn.tanh(linear_output)
    # (3) sigmoid activation
    elif activation_function == 'sigmoid':
        return tf.nn.sigmoid(linear_output)
    # (4) leaky_relu activation
    elif activation_function == 'leaky_relu':
        return tf.nn.leaky_relu(linear_output)
    # (5) relu activation
    elif activation_function == 'relu':
        return tf.nn.relu(linear_output)
    # (6) linear output
    else:
        return linear_output  # Linear activation


In [6]:
def loss_1(output, y):
    """
    computes the average error per data sample 
    by computing the cross-entropy loss over a minibatch
    intput:
        - output: the output of the inference function 
        - y: true value of the sample batch
        
        the two have the same shape (batch_size * num_of_classes)
    output:
        - loss: loss of the corresponding batch (scalar tensor)
    
    """
    dot_product = y * tf.log(output)
    
    #tf.reduce_sum: Computes the sum of elements across dimensions of a tensor.
    xentropy = -tf.reduce_sum(dot_product, 1)
    
    #tf.reduce_mean: Computes the mean of elements across dimensions of a tensor.
    loss = tf.reduce_mean(xentropy)

    return loss

In [7]:
def loss_2(output, y):
    """
    Computes softmax cross entropy between logits and labels and then the loss 
    
    intput:
        - output: the output of the inference function 
        - y: true value of the sample batch
        
        the two have the same shape (batch_size * num_of_classes)
    output:
        - loss: loss of the corresponding batch (scalar tensor)
    
    """

    #mean square error
    #loss = tf.reduce_mean(tf.reduce_sum(tf.square(y-output)))
    
    #Computes softmax cross entropy between logits and labels.
    xentropy = tf.nn.softmax_cross_entropy_with_logits(logits=output, labels=y)
    loss = tf.reduce_mean(xentropy)

    return loss

In [8]:
def training(cost, global_step):
    """
    defines the necessary elements to train the network
    
    intput:
        - cost: the cost is the loss of the corresponding batch
        - global_step: number of batch seen so far, it is incremented by one 
        each time the .minimize() function is called
    """

    tf.summary.scalar("cost", cost)
    
    # tf.train.GradientDescentOptimizer
    # optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    
    # try `tf.train.RMSPropOptimizer` as desired
    optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate)
    
    train_op = optimizer.minimize(cost, global_step=global_step)

    return train_op

In [9]:
def evaluate(output, y):
    """
    evaluates the accuracy on the validation set 
    input:
        -output: prediction vector of the network for the validation set
        -y: true value for the validation set
    output:
        - accuracy: accuracy on the validation set (scalar between 0 and 1)
    """
    #correct prediction is a binary vector which equals one when the output and y match
    #otherwise the vector equals 0
    #tf.cast: change the type of a tensor into another one
    #then, by taking the mean of the tensor, we directly have the average score, so the accuracy
    
    correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
    
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    tf.summary.scalar("validation_error", (1.0 - accuracy))

    return accuracy

In [12]:
def main_function(layer_name):
    if __name__ == '__main__':

        start_time = time.time()

        if not os.path.isdir('./logs/'):
            os.makedirs('./logs/')
        log_files_path = './logs/'

        with tf.Graph().as_default():

            with tf.variable_scope("layer_name"):
                #neural network definition 

                #the input variables are first define as placeholder 
                # a placeholder is a variable/data which will be assigned later 
                # image vector & label
                x = tf.placeholder("float", [None, input_size])   # MNIST data image of shape 28*28=784
                y = tf.placeholder("float", [None, output_size])  # 0-9 digits recognition

                #the network is defined using the inference function defined above in the code
                output = inference(x)

                cost = loss_2(output, y)

                #initialize the value of the global_step variable 
                # recall: it is incremented by one each time the .minimise() is called
                global_step = tf.Variable(0, name='global_step', trainable=False)

                train_op = training(cost, global_step)
                #train_op = training(cost, global_step=None)

                #evaluate the accuracy of the network (done on a validation set)
                eval_op = evaluate(output, y)

                summary_op = tf.summary.merge_all()

                #save and restore variables to and from checkpoints.
                saver = tf.train.Saver()

                #defines a session
                sess = tf.Session()

                # summary writer
                #https://www.tensorflow.org/api_docs/python/tf/summary/FileWriter
                #
                summary_writer = tf.summary.FileWriter(log_files_path + '{layer}/'.format(layer = layer_name), sess.graph)

                #initialization of all the variables
                init_op = tf.global_variables_initializer()
                sess.run(init_op)

                #will work with this later
                #saver.restore(sess, log_files_path+'multi_layer/model-checkpoint-66000')

                loss_trace = []

                # Training cycle
                for epoch in range(training_epochs):

                    avg_cost = 0.

                    #total_batch = int(mnist.train.num_examples/batch_size)
                    total_batch = int((train_num_examples+batch_size-1) / batch_size)

                    # Loop over all batches
                    for i in range(total_batch):

                        #option 1
                        #minibatch_x, minibatch_y = mnist.train.next_batch(batch_size, shuffle=False)

                        #option 2
                        start = i * batch_size
                        end = min(train_num_examples, start + batch_size)
                        minibatch_x = x_train[start:end]
                        minibatch_y = y_train[start:end]

                        # Fit training using batch data
                        #the training is done using the training dataset
                        sess.run(train_op, feed_dict={x: minibatch_x, y: minibatch_y})

                        # Compute average loss
                        avg_cost += sess.run(cost, feed_dict={x: minibatch_x, y: minibatch_y})/total_batch

                    # Display logs per epoch step
                    if epoch % display_step == 0:

                        #the accuracy is evaluated using the validation dataset

                        #option 1
                        #accuracy = sess.run(eval_op, feed_dict={x: mnist.validation.images, y: mnist.validation.labels})

                        #option 2
                        accuracy = sess.run(eval_op, feed_dict={x: x_valid, y: y_valid})
                        loss_trace.append(1-accuracy)    
                        print("Epoch:", '%03d' % epoch, "cost function=", "{:0.7f}".format(avg_cost), " Validation Error:", (1.0 - accuracy))
                        summary_str = sess.run(summary_op, feed_dict={x: minibatch_x, y: minibatch_y})
                        summary_writer.add_summary(summary_str, sess.run(global_step))

                        #save to use later
                        #https://www.tensorflow.org/api_docs/python/tf/train/Saver
                        #saver.save(sess, log_files_path+'model-checkpoint', global_step=global_step)
                        saver.save(sess, log_files_path+'{layer}/model-checkpoint'.format(layer = layer_name), global_step=global_step)

                print("Optimization Finished!")
                #accuracy evaluated with the whole test dataset

                #option 1
                #accuracy = sess.run(eval_op, feed_dict={x: mnist.test.images, y: mnist.test.labels})

                #option 2
                accuracy = sess.run(eval_op, feed_dict={x: x_test, y: y_test})
                print("Test Accuracy:", accuracy)

                elapsed_time = time.time() - start_time
                print('Execution time (seconds) was %0.3f' % elapsed_time)

                # Visualization of the results
                # loss function
                #plt.plot(loss_trace)
                #plt.title('Cross Entropy Loss')
                #plt.xlabel('epoch')
                #plt.ylabel('loss')
                #plt.show()

In [13]:
# updated parameters - 200 neurons
#Network Architecture
# -----------------------------------------
#
# Three hidden layers
#
#------------------------------------------
# number of neurons in layer 1
n_hidden_1 = 200
# number of neurons in layer 2
n_hidden_2 = 200
# number of neurons in layer 3
n_hidden_3 = 200

def inference(x):
    
    with tf.variable_scope("hidden_layer_1"):
        hidden_1 = layer(x, [input_size, n_hidden_1], [n_hidden_1], 'tanh')

    with tf.variable_scope("hidden_layer_2"):
        hidden_2 = layer(hidden_1, [n_hidden_1, n_hidden_2], [n_hidden_2], 'sigmoid')

    with tf.variable_scope("hidden_layer_3"):
        hidden_3 = layer(hidden_2, [n_hidden_2, n_hidden_3], [n_hidden_3], 'leaky_relu')

    with tf.variable_scope("output"):
        output = layer(hidden_3, [n_hidden_3, output_size], [output_size], 'linear')  # Linear activation for output

    return output

main_function('multi_layer_4_1_200')

Weight Matrix: <tf.Variable 'layer_name/hidden_layer_1/W:0' shape=(3072, 200) dtype=float32>
Bias Vector: <tf.Variable 'layer_name/hidden_layer_1/b:0' shape=(200,) dtype=float32>
Weight Matrix: <tf.Variable 'layer_name/hidden_layer_2/W:0' shape=(200, 200) dtype=float32>
Bias Vector: <tf.Variable 'layer_name/hidden_layer_2/b:0' shape=(200,) dtype=float32>
Weight Matrix: <tf.Variable 'layer_name/hidden_layer_3/W:0' shape=(200, 200) dtype=float32>
Bias Vector: <tf.Variable 'layer_name/hidden_layer_3/b:0' shape=(200,) dtype=float32>
Weight Matrix: <tf.Variable 'layer_name/output/W:0' shape=(200, 10) dtype=float32>
Bias Vector: <tf.Variable 'layer_name/output/b:0' shape=(10,) dtype=float32>
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the construc

2023-12-21 20:23:09.607795: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:388] MLIR V1 optimization pass is not enabled


Epoch: 000 cost function= 6.7399019  Validation Error: 0.9024000018835068
Epoch: 010 cost function= 10.1236504  Validation Error: 0.8962000012397766
Epoch: 020 cost function= 10.6184374  Validation Error: 0.9050000011920929
Epoch: 030 cost function= 9.9545677  Validation Error: 0.9024000018835068
Epoch: 040 cost function= 9.8812203  Validation Error: 0.9024000018835068
Epoch: 050 cost function= 10.6476730  Validation Error: 0.9024000018835068
Instructions for updating:
Use standard file APIs to delete files with this prefix.
Epoch: 060 cost function= 10.5869318  Validation Error: 0.9013999998569489
Epoch: 070 cost function= 11.1056826  Validation Error: 0.9024000018835068
Epoch: 080 cost function= 11.1319421  Validation Error: 0.8941999971866608
Epoch: 090 cost function= 11.1444225  Validation Error: 0.9024000018835068
Optimization Finished!
Test Accuracy: 0.1
Execution time (seconds) was 159.389


In [14]:
# updated parameters - 100 neurons
#Network Architecture
# -----------------------------------------
#
# Four hidden layers
#
#------------------------------------------
# number of neurons in layer 1
n_hidden_1 = 100
# number of neurons in layer 2
n_hidden_2 = 100
# number of neurons in layer 3
n_hidden_3 = 100
# number of neurons in layer 4
n_hidden_4 = 100

def inference(x):
    with tf.variable_scope("hidden_layer_1"):
        hidden_1 = layer(x, [input_size, n_hidden_1], [n_hidden_1], 'tanh')

    with tf.variable_scope("hidden_layer_2"):
        hidden_2 = layer(hidden_1, [n_hidden_1, n_hidden_2], [n_hidden_2], 'sigmoid')

    with tf.variable_scope("hidden_layer_3"):
        hidden_3 = layer(hidden_2, [n_hidden_2, n_hidden_3], [n_hidden_3], 'sigmoid')

    with tf.variable_scope("hidden_layer_4"):
        hidden_4 = layer(hidden_3, [n_hidden_3, n_hidden_4], [n_hidden_4], 'relu')

    with tf.variable_scope("output"):
        output = layer(hidden_4, [n_hidden_4, output_size], [output_size], 'linear')  # Linear activation for output

    return output

main_function('multi_layer_4_2_100')

Weight Matrix: <tf.Variable 'layer_name/hidden_layer_1/W:0' shape=(3072, 100) dtype=float32>
Bias Vector: <tf.Variable 'layer_name/hidden_layer_1/b:0' shape=(100,) dtype=float32>
Weight Matrix: <tf.Variable 'layer_name/hidden_layer_2/W:0' shape=(100, 100) dtype=float32>
Bias Vector: <tf.Variable 'layer_name/hidden_layer_2/b:0' shape=(100,) dtype=float32>
Weight Matrix: <tf.Variable 'layer_name/hidden_layer_3/W:0' shape=(100, 100) dtype=float32>
Bias Vector: <tf.Variable 'layer_name/hidden_layer_3/b:0' shape=(100,) dtype=float32>
Weight Matrix: <tf.Variable 'layer_name/hidden_layer_4/W:0' shape=(100, 100) dtype=float32>
Bias Vector: <tf.Variable 'layer_name/hidden_layer_4/b:0' shape=(100,) dtype=float32>
Weight Matrix: <tf.Variable 'layer_name/output/W:0' shape=(100, 10) dtype=float32>
Bias Vector: <tf.Variable 'layer_name/output/b:0' shape=(10,) dtype=float32>
Epoch: 000 cost function= 2.2975094  Validation Error: 0.9013999998569489
Epoch: 010 cost function= 2.2887199  Validation Error

In [15]:
# updated parameters - 50 neurons
#Network Architecture
# -----------------------------------------
#
# Four hidden layers
#
#------------------------------------------
# number of neurons in layer 1
n_hidden_1 = 50
# number of neurons in layer 2
n_hidden_2 = 50
# number of neurons in layer 3
n_hidden_3 = 50
# number of neurons in layer 4
n_hidden_4 = 50

def inference(x):
    with tf.variable_scope("hidden_layer_1"):
        hidden_1 = layer(x, [input_size, n_hidden_1], [n_hidden_1], 'tanh')

    with tf.variable_scope("hidden_layer_2"):
        hidden_2 = layer(hidden_1, [n_hidden_1, n_hidden_2], [n_hidden_2], 'sigmoid')

    with tf.variable_scope("hidden_layer_3"):
        hidden_3 = layer(hidden_2, [n_hidden_2, n_hidden_3], [n_hidden_3], 'sigmoid')

    with tf.variable_scope("hidden_layer_4"):
        hidden_4 = layer(hidden_3, [n_hidden_3, n_hidden_4], [n_hidden_4], 'relu')

    with tf.variable_scope("output"):
        output = layer(hidden_4, [n_hidden_4, output_size], [output_size], 'linear')  # Linear activation for output

    return output

main_function('multi_layer_4_2_50')

Weight Matrix: <tf.Variable 'layer_name/hidden_layer_1/W:0' shape=(3072, 50) dtype=float32>
Bias Vector: <tf.Variable 'layer_name/hidden_layer_1/b:0' shape=(50,) dtype=float32>
Weight Matrix: <tf.Variable 'layer_name/hidden_layer_2/W:0' shape=(50, 50) dtype=float32>
Bias Vector: <tf.Variable 'layer_name/hidden_layer_2/b:0' shape=(50,) dtype=float32>
Weight Matrix: <tf.Variable 'layer_name/hidden_layer_3/W:0' shape=(50, 50) dtype=float32>
Bias Vector: <tf.Variable 'layer_name/hidden_layer_3/b:0' shape=(50,) dtype=float32>
Weight Matrix: <tf.Variable 'layer_name/hidden_layer_4/W:0' shape=(50, 50) dtype=float32>
Bias Vector: <tf.Variable 'layer_name/hidden_layer_4/b:0' shape=(50,) dtype=float32>
Weight Matrix: <tf.Variable 'layer_name/output/W:0' shape=(50, 10) dtype=float32>
Bias Vector: <tf.Variable 'layer_name/output/b:0' shape=(10,) dtype=float32>
Epoch: 000 cost function= 2.2903074  Validation Error: 0.9013999998569489
Epoch: 010 cost function= 2.2887199  Validation Error: 0.90139999