# Neural Network from Scratch in TensorFlow

# Task 1: Introduction and Importing Libraries

In [27]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import nn_utils
%matplotlib inline

print('TensorFlow Version:', tf.__version__)

TensorFlow Version: 2.15.0


# Task 2: Initializing Neural Network

In [8]:
# # 10 features, 100 units in the hidden layer, 10 classes
# net = NeuralNetwork([10, 100, 10])
class NeuralNetwork:
    def __init__(self, layers):
        self.layers = layers
        self.L = len(layers) # number of layers
        self.num_features = layers[0]
        self.num_classes = layers[-1]
        
        self.W = {} # weight
        self.b = {} # bias
        
        # gradient
        self.dW = {}
        self.db = {}
        
        self.setup()
        
    def setup(self):
        for i in range(1, self.L): # start with 1 because 0 is just the number of features
            # can also use uniform
            self.W[i] = tf.Variable(tf.random.normal(shape = (self.layers[i], self.layers[i-1])))
            self.b[i] = tf.Variable(tf.random.normal(shape = (self.layers[i], 1)))

# Task 3: Forward Pass

In [9]:
# Extend the NeuralNetwork class
class NeuralNetwork(NeuralNetwork):
    def forward_pass(self, X):
        A = tf.convert_to_tensor(X, dtype = tf.float32)
        # calculate output for each layer
        for i in range(1, self.L):
            # linear output
            Z = tf.matmul(A, tf.transpose(self.W[i])) + tf.transpose(self.b[i])
            # if layer is not the final layer, there is activation function
            if i != self.L-1:
                A = tf.nn.relu(Z)
            else: # final layer, only linear itself
                A = Z
        return A

# Task 4: Computing Loss and Updating Parameters

In [10]:
# Backward Propagation
class NeuralNetwork(NeuralNetwork):
    def compute_loss(self, A, Y):
        loss = tf.nn.softmax_cross_entropy_with_logits(Y, A) # loss of the batch
        return tf.reduce_mean(loss)
    
    # lr = learning rate
    def update_params(self, lr):
        for i in range(1, self.L):
            self.W[i].assign_sub(lr * self.dW[i])
            self.b[i].assign_sub(lr * self.db[i])

# Task 5: Predict and Info Functions

In [11]:
class NeuralNetwork(NeuralNetwork):
    def predict(self, X):
        A = self.forward_pass(X)
        # looking at column
        return tf.argmax(tf.nn.softmax(A), axis = 1)
    
    def info(self):
        num_params = 0
        for i in range(1, self.L):
            num_params += self.W[i].shape[0] * self.W[i].shape[1]
            num_params += self.b[i].shape[0]
        print('Input Features:', self.num_features)
        print('Number of Classes:', self.num_classes)
        print('Hidden Layers:')
        print('--------------')
        for i in range(1, self.L-1):
            print('Layer {}, Units {}'.format(i, self.layers[i]))
        print('--------------')
        print('Number of parameters:', num_params)

# Task 6: Training on Batch

In [12]:
class NeuralNetwork(NeuralNetwork):
    def train_on_batch(self, X, Y, lr):
        X = tf.convert_to_tensor(X, dtype = tf.float32)
        Y = tf.convert_to_tensor(Y, dtype = tf.float32)

        with tf.GradientTape(persistent = True) as tape:
            A = self.forward_pass(X)
            loss = self.compute_loss(A, Y)
        
        # calculate gradients for all layers
        for i in range(1, self.L):
            self.dW[i] = tape.gradient(loss, self.W[i])
            self.db[i] = tape.gradient(loss, self.b[i])
        
        del tape

        self.update_params(lr)

        return loss.numpy()

# Task 7: Training on Complete Set

In [13]:
class NeuralNetwork(NeuralNetwork):
    def train(self, x_train, y_train, x_test, y_test, epochs, steps_per_epoch, batch_size, lr):
        history = {
            "val_loss": [],
            "train_loss": [],
            "val_acc": []
        }

        for e in range(0, epochs):
            # breaking down into mini batch and create descent
            # one epoch goes through all the examples
            epoch_train_loss = 0.
            print("Epoch {}".format(e), end = ".")
            for i in range(0, steps_per_epoch):
                x_batch = x_train[i*batch_size:(i+1)*batch_size] # entire training set
                y_batch = y_train[i*batch_size:(i+1)*batch_size]

                batch_loss = self.train_on_batch(x_batch, y_batch, lr)
                epoch_train_loss += batch_loss

                # every 1/10 of the progress dot will be printed out
                if i%int(steps_per_epoch/10) == 0:
                    print(end = ".")
            
            history["train_loss"].append(epoch_train_loss/steps_per_epoch)
            val_A = self.forward_pass(x_test)
            val_loss = self.compute_loss(val_A, y_test).numpy()
            history["val_loss"].append(val_loss)
            val_preds = self.predict(x_test)
            val_acc = np.mean(np.argmax(y_test, axis = 1) == val_preds.numpy()) # without mean get total number
            history["val_acc"].append(val_acc)
            print("Val acc:", val_acc) # if do not improve, then there is something wrong
        
        return history


# Task 8: Application

In [28]:
(x_train, y_train), (x_test, y_test) = nn_utils.load_data()
nn_utils.plot_random_examples(x_train, y_train).show()

AttributeError: module 'nn_utils' has no attribute 'load_data'

In [30]:
# 784 features, 2 hidden layers @ 128, 10 classes >> based on the dataset
net = NeuralNetwork([784, 128, 128, 10])
net.info()

Input Features: 784
Number of Classes: 10
Hidden Layers:
--------------
Layer 1, Units 128
Layer 2, Units 128
--------------
Number of parameters: 118282


In [31]:
batch_size = 120
epochs = 5
steps_per_epoch = int(x_train.shape[0]/batch_size)
lr = 3e-3
print("Steps per Epoch:", steps_per_epoch)

NameError: name 'x_train' is not defined

In [None]:
history = net.train(
    x_train, y_train,
    x_test, y_test,
    epochs, steps_per_epoch,
    batch_size, lr
)

# Task 9: Results

In [None]:
nn_utils.plot_results(history).show()

In [None]:
preds = net.predict(x_test) # result in Tensor

In [None]:
nn_utils.plot_random_examples(x_test, y_test, preds.numpy()).show()