In [None]:
from tensorflow.keras.datasets import mnist
import tensorflow as tf
import math
import numpy as np
import matplotlib.pyplot as plt

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
plt.imshow(x_train[1], cmap='gray')

In [None]:
x_train = (x_train / 255).astype(np.float32)
x_test = (x_test / 255).astype(np.float32)

In [None]:
class MLP:
    def __init__(self):
        # 784 -> 200 -> 200 -> 10
        self.W1 = tf.Variable(tf.random.normal(shape=(200, 784), mean=0.0, stddev=1 / math.sqrt(784)))
        self.b1 = tf.Variable(tf.zeros((200, 1)))
        self.W2 = tf.Variable(tf.random.normal(shape=(200, 200), mean=0.0, stddev=1 / math.sqrt(200)))
        self.b2 = tf.Variable(tf.zeros((200, 1)))
        self.W3 = tf.Variable(tf.random.normal(shape=(10, 200), mean=0.0, stddev=1 / math.sqrt(200)))
        self.b3 = tf.Variable(tf.zeros((10, 1)))

    def get_weights(self):
        return [self.W1, self.b1, self.W2, self.b2, self.W3, self.b3]

    def call(self, x):
        # x.shape =   784 x batch_size

        z1 = self.W1 @ x + self.b1
        a1 = tf.sigmoid(z1) # 200 x batch_size
        z2 = self.W2 @ a1 + self.b2
        a2 = tf.sigmoid(z2) # 200 x batch_size
        z3 = self.W3 @ a2 + self.b3
        y_hat = tf.math.softmax(z3) # 200 x batch_size
        return y_hat

In [None]:
mlp = MLP()

In [None]:
def accuracy(y_true, y_pred):
    num_equals = tf.reduce_sum(tf.cast(tf.math.equal(y_true, y_pred), dtype=tf.float32))
    num_all = tf.cast(tf.size(y_true), dtype=tf.float32)
    return (num_equals / num_all).numpy().item()

In [None]:
EPOCHS = 10
BATCH_SIZE = 32
ALPHA = 1e-2

for epoch in range(EPOCHS):
    print(f"inizio l'epoca {epoch+1}")
    for i in range(0, 60_000, BATCH_SIZE):
        # compone il  batch
        x_batch = x_train[i:i+BATCH_SIZE]
        y_batch = y_train[i:i+BATCH_SIZE]

        # passo forward
        with tf.GradientTape() as tape:
            y_batch_hat = mlp.call(tf.transpose(tf.reshape(x_batch, shape=(BATCH_SIZE, -1)), perm=(1,0)))
            losses = tf.keras.losses.sparse_categorical_crossentropy(y_batch, y_batch_hat, axis=0)
            mean_loss = tf.reduce_mean(losses)
        
        if (i//BATCH_SIZE)%200 == 0:
            print(mean_loss.numpy().item())

        # calcolo del gradiente
        grads = tape.gradient(mean_loss, mlp.get_weights())

        # mini-batch gradient descent
        for weight, grad in zip(mlp.get_weights(), grads):
            weight.assign(weight - ALPHA*grad)

    # fine dell'epoca. Valuto l'accuratezza sul test set
    y_test_hat = mlp.call(tf.transpose(tf.reshape(x_test, shape=(10_000, -1)), perm=(1,0)))
    test_preds = tf.argmax(y_test_hat)
    acc = accuracy(tf.cast(y_test, dtype=tf.int64), test_preds)
    print(f'accuratezza sul test set = {acc}')