In [2]:
import tensorflow as tf

In [3]:
class NaiveDense:
    def __init__(self, input_size, output_size, activation):
        self.activation = activation
        w_shape = (input_size, output_size)
        w_initial = tf.random.uniform(w_shape, minval = 0, maxval = 1e-1)
        self.W = tf.Variable(w_initial)
        
        b_shape = (output_size,)
        b_initial = tf.zeros(b_shape)
        self.b = tf.Variable(b_initial)
        
    def __call__(self, inputs):
        A = tf.matmul(inputs, self.W)
        return self.activation(A + self.b)
    
    @property
    def weights(self):
        return [self.W, self.b]

In [4]:
class NaiveSequential:
    def __init__(self, layers):
        self.layers = layers
    
    def __call__(self, inputs):
        x = inputs
        for layer in self.layers:
            x = layer(x)
        return x
    
    @property
    def weights(self):
        weights = []
        for layer in self.layers:
            weights += layer.weights
        return weights


In [5]:
model = NaiveSequential([NaiveDense(input_size=28 * 28, output_size=512, activation=tf.nn.relu),
                        NaiveDense(input_size=512, output_size=10, activation=tf.nn.softmax)])
assert len(model.weights) == 4
print(model.weights)

[<tf.Variable 'Variable:0' shape=(784, 512) dtype=float32, numpy=
array([[5.2536141e-02, 6.8787746e-02, 3.1176150e-02, ..., 5.7678249e-02,
        5.6425691e-02, 7.5108610e-02],
       [1.1675107e-02, 7.2693823e-05, 4.6605121e-02, ..., 2.3337245e-02,
        9.1218069e-02, 3.4834411e-02],
       [1.2419260e-02, 5.4947890e-02, 6.3348711e-02, ..., 7.9210999e-04,
        3.8046706e-02, 6.3851647e-02],
       ...,
       [5.7920363e-02, 6.9742143e-02, 6.1024975e-02, ..., 6.2181104e-02,
        6.1143186e-02, 5.5132043e-02],
       [5.3363550e-02, 2.8427100e-02, 2.1193469e-02, ..., 3.8916279e-02,
        9.2123091e-02, 3.6185481e-02],
       [9.1769099e-03, 8.6427830e-02, 4.3111898e-02, ..., 9.5324151e-02,
        6.9187187e-02, 5.9669878e-02]], dtype=float32)>, <tf.Variable 'Variable:0' shape=(512,) dtype=float32, numpy=
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0.

In [6]:
import math

class BatchGenerator:
    def __init__(self, images, labels, batch_size=128):
        assert len(images) == len(labels)
        self.index = 0
        self.images = images
        self.labels = labels
        self.batch_size = batch_size
        self.num_batches = math.ceil(len(images) / batch_size)
    
    def next(self):
        images = self.images[self.index : self.index + self.batch_size]
        labels = self.labels[self.index : self.index + self.batch_size]
        self.index += self.batch_size
        return images, labels
    


In [31]:
def one_training_step(model, images_batch, labels_batch):
    with tf.GradientTape() as tape:
        predictions = model(images_batch)
        per_sample_losses = tf.keras.losses.sparse_categorical_crossentropy(labels_batch, predictions)
        average_loss = tf.reduce_mean(per_sample_losses)
    gradients = tape.gradient(average_loss, model.weights)
    update_weights(gradients, model.weights)
    return average_loss

In [32]:
learning_rate = 1e-3
def update_weights(gradients, weights):
    for g, w in zip(gradients, weights):
        w.assign_sub(g * learning_rate)

In [33]:
def fit(model, images, labels, epochs, batch_size=128):
    for epoch_counter in range(epochs):
        print(f"Epoch {epoch_counter}")
        batch_generator = BatchGenerator(images, labels)
        for batch_counter in range(batch_generator.num_batches):
            images_batch, labels_batch = batch_generator.next()
            loss = one_training_step(model, images_batch, labels_batch)
            if batch_counter % 100 == 0:
                print(f"loss at batch {batch_counter}: {loss:.2f}")

In [34]:
from tensorflow.keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [35]:
train_images = train_images.reshape((60000, 28*28))
train_images = train_images.astype("float32") / 255
test_images = test_images.reshape((10000, 28*28))
test_images = test_images.astype("float32") / 255


In [36]:
fit(model, train_images, train_labels, epochs=20, batch_size=128)

Epoch 0
loss at batch 0: 0.35
loss at batch 100: 0.31
loss at batch 200: 0.28
loss at batch 300: 0.35
loss at batch 400: 0.46
Epoch 1
loss at batch 0: 0.33
loss at batch 100: 0.30
loss at batch 200: 0.28
loss at batch 300: 0.35
loss at batch 400: 0.46
Epoch 2
loss at batch 0: 0.33
loss at batch 100: 0.30
loss at batch 200: 0.27
loss at batch 300: 0.35
loss at batch 400: 0.46
Epoch 3
loss at batch 0: 0.33
loss at batch 100: 0.30
loss at batch 200: 0.27
loss at batch 300: 0.35
loss at batch 400: 0.46
Epoch 4
loss at batch 0: 0.32
loss at batch 100: 0.30
loss at batch 200: 0.27
loss at batch 300: 0.35
loss at batch 400: 0.45
Epoch 5
loss at batch 0: 0.32
loss at batch 100: 0.29
loss at batch 200: 0.27
loss at batch 300: 0.34
loss at batch 400: 0.45
Epoch 6
loss at batch 0: 0.32
loss at batch 100: 0.29
loss at batch 200: 0.27
loss at batch 300: 0.34
loss at batch 400: 0.45
Epoch 7
loss at batch 0: 0.32
loss at batch 100: 0.29
loss at batch 200: 0.27
loss at batch 300: 0.34
loss at batch 40

In [None]:
w = tf.Variable(tf.random.uniform((2,3)))

In [35]:
print(w)

<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[0.8875872 , 0.13927293, 0.38986433],
       [0.3410679 , 0.8972597 , 0.2995832 ]], dtype=float32)>


In [36]:
b = tf.Variable(tf.random.uniform((3,)))

In [37]:
print(b)

<tf.Variable 'Variable:0' shape=(3,) dtype=float32, numpy=array([0.96188235, 0.39057565, 0.6048988 ], dtype=float32)>


In [38]:
x = tf.random.uniform((5,2))

In [39]:
print(x)

tf.Tensor(
[[0.69592273 0.89160204]
 [0.86045563 0.7796519 ]
 [0.48028994 0.07891655]
 [0.9005668  0.64324784]
 [0.4388777  0.27938116]], shape=(5, 2), dtype=float32)


In [41]:
A = tf.matmul(x, w)

In [42]:
print(A)

tf.Tensor(
[[0.92178893 0.89692175 0.53842443]
 [1.0296437  0.8193884  0.5690316 ]
 [0.4532151  0.13770002 0.21089   ]
 [1.0187228  0.7025849  0.5438051 ]
 [0.48483017 0.31180125 0.25480068]], shape=(5, 3), dtype=float32)


In [None]:
print(A + b)

In [44]:
print(tf.nn.relu(A+b))

tf.Tensor(
[[1.8836713  1.2874974  1.1433232 ]
 [1.991526   1.209964   1.1739304 ]
 [1.4150975  0.52827567 0.8157888 ]
 [1.9806051  1.0931606  1.1487039 ]
 [1.4467125  0.7023769  0.8596995 ]], shape=(5, 3), dtype=float32)


In [37]:
predictions = model(test_images)

In [38]:
predictons = predictions.numpy()
import numpy as np
predicted_labels = np.argmax(predictions, axis=1)
matches = predicted_labels == test_labels
print(matches)

[ True  True  True ...  True  True  True]


In [39]:
type(matches)

numpy.ndarray

In [40]:
matches.mean()

0.9017