In [60]:
import tensorflow as tf
import numpy as np

A Simple Python Class to  implement the Dense neural network layer

Here, W and b are model parameters, and activation  is an element function (usually relu, but softmax for last layer)

NaiveDense creates  two tensor variables, W  and  b, and exposes a __call__() method that applies the forward pass.

```output = activation(dot(W, input)+b)```

It's like tf.keras.layers.Dense


In [61]:
class NaiveDense:
    def __init__(self, input_size, output_size, activation):
        self.activation = activation
        
        w_shape = (input_size, output_size)
        w_initial_value = tf.random.uniform(w_shape, minval=0, maxval=1e-1)
        self.W = tf.Variable(w_initial_value)
        
        b_shape = (output_size,)
        b_initial_value  = tf.zeros(b_shape)
        self.b = tf.Variable(b_initial_value)
        
    def __call__(self, inputs):
        return self.activation(tf.matmul(inputs, self.W)+self.b)
    
    @property
    def weights(self):
        return [self.W, self.b]
        

A simple Sequential Class to chain the dense layers.

It wraps a list of layers and exposes a __call__() method that simply calls the underlying layers on the inputs, in order. It also features a weights property to easily keep track of the layers’ parameters.

In [62]:
class NaiveSequential:
    def __init__(self, layers):
        self.layers = layers
    
    def __call__(self, inputs):
        x = inputs
        for layer in self.layers:
            x = layer(x)
        return x
    
    @property
    def weights(self):
        weights = []
        #Pushing all layers weights one by one in weights array.
        for layer in self.layers:
            weights += layer.weights
            
        return weights
    

Using this NaiveDense class and this NaiveSequential class, we can create a mock Keras model:

In [63]:
model = NaiveSequential([
    NaiveDense(input_size= 28*28, output_size=512, activation=tf.nn.relu),
    NaiveDense(input_size=512, output_size=10, activation=tf.nn.softmax)
])

assert len(model.weights) == 4

A batch Generator to iterate over the MNIST data in mini-batches.

In [64]:
import math

class BatchGenerator:
    def __init__(self, images, labels, batch_size=128):
        assert len(images) == len(labels)
        self.index = 0
        self.images = images
        self.labels = labels
        self.batch_size = batch_size
        self.num_batches = math.ceil(len(images)/batch_size)
        
    def next(self):
        images = self.images[self.index : self.index + self.batch_size]
        labels = self.labels[self.index : self.index + self.batch_size]
        self.index += self.batch_size
        return images, labels

Difficult part is the "training step": updating the weights of the model after running it on one batch of data.

1. Compute the predictions of the model for the images in the batch.
2. Compute the loss value for these predictions, given the actual labels.
3. Compute the gradient of the loss with regard to the model’s weights.
4. Move the weights by a small amount in the direction opposite to the gradient.

TensorFlow tf.GradientTape object is used to compute the gradient.

In [65]:
def one_training_step(model, images_batch, labels_batch):
    with tf.GradientTape() as tape:
        predictions = model(images_batch)
        per_sample_losses = tf.keras.losses.sparse_categorical_crossentropy(labels_batch, predictions)
        average_loss = tf.reduce_mean(per_sample_losses)
    
    gradients = tape.gradient(average_loss, model.weights)
    update_weights(gradients, model.weights)
    return average_loss

Update weights by moving weights by "a bit" in a direction that will reduce the loss on this batch. Magnitude of the move is determined by the "learning rate," typically a small quantity.

In [66]:
# learning_rate = 1e-3

# def update_weights(gradients, weights):
#     for g, w  in zip(gradients, weights):
#         w.assign_sub(g*learning_rate)
        
#In practice we don't implement update weight step like above by hand.
#Instead we use an Optimizer instance from Keras, like:

from tensorflow.keras import optimizers

optimizer = optimizers.SGD(learning_rate=1e-3)

def update_weights(gradients, weights):
    optimizer.apply_gradients(zip(gradients, weights))


An epoch of training consists of repeating the step for each batch in the training data, and the full training loop is simply the repetition of one epoch.

In [67]:
def fit(model, images, labels, epochs, batch_size=128):
    for epoch_counter in range(epochs):
        print(f"Epoch {epoch_counter}")
        
        batch_generator = BatchGenerator(images, labels)
        for batch_counter in range(batch_generator.num_batches):
            images_batch, labels_batch = batch_generator.next()
            loss = one_training_step(model, images_batch, labels_batch)
            if  batch_counter % 100== 0:
                print(f"loss at batch {batch_counter}: {loss:.2f}")
    

Running the model

In [68]:
from tensorflow.keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((60000, 28*28))
train_images = train_images.astype("float32") / 255

test_images = test_images.reshape((10000, 28*28))
test_images = test_images.astype("float32") / 255

fit(model, train_images, train_labels, epochs=10, batch_size=128)

Epoch 0
loss at batch 0: 5.13
loss at batch 100: 2.22
loss at batch 200: 2.19
loss at batch 300: 2.10
loss at batch 400: 2.22
Epoch 1
loss at batch 0: 1.90
loss at batch 100: 1.87
loss at batch 200: 1.82
loss at batch 300: 1.72
loss at batch 400: 1.84
Epoch 2
loss at batch 0: 1.57
loss at batch 100: 1.57
loss at batch 200: 1.50
loss at batch 300: 1.43
loss at batch 400: 1.52
Epoch 3
loss at batch 0: 1.32
loss at batch 100: 1.33
loss at batch 200: 1.24
loss at batch 300: 1.22
loss at batch 400: 1.29
Epoch 4
loss at batch 0: 1.12
loss at batch 100: 1.15
loss at batch 200: 1.04
loss at batch 300: 1.06
loss at batch 400: 1.12
Epoch 5
loss at batch 0: 0.98
loss at batch 100: 1.01
loss at batch 200: 0.90
loss at batch 300: 0.94
loss at batch 400: 1.00
Epoch 6
loss at batch 0: 0.87
loss at batch 100: 0.91
loss at batch 200: 0.80
loss at batch 300: 0.85
loss at batch 400: 0.91
Epoch 7
loss at batch 0: 0.79
loss at batch 100: 0.82
loss at batch 200: 0.72
loss at batch 300: 0.78
loss at batch 40

Evaluating the model by taking argmax of the predictions over the test images and comparing it to the expected labels

In [69]:
predictions = model(test_images)
predictions = predictions.numpy()
predicted_labels = np.argmax(predictions, axis=1)
matches = predicted_labels == test_labels
print(f"accuracy: {matches.mean():.2f}")

accuracy: 0.82
