In [2]:
#%pip install tensorflow
import numpy as np
import tensorflow as tf
import random

In [10]:
class Network:
    def __init__(self, sizes: list, learning_rate) -> None:
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.learning_rate = learning_rate
        self.biases = [np.random.randn(x, 1) for x in sizes[1:]]
        self.weights = [np.random.randn(x, y) / np.sqrt(x)  for x, y in zip(sizes[1:], sizes[:-1])]

    def feed_forward(self, activations):
        for x, y in zip(self.biases, self.weights):
            activations = self.sigmoid(np.dot(y, activations) + x)
        
        return activations.reshape(-1)

    @staticmethod
    def sigmoid(x):
        return 1.0 / (1.0 + np.exp(-x))

    @staticmethod
    def cost(activations_out, y_vector):
        y = y_vector.reshape(-1, 1)
        return activations_out - y

    def sigmoid_derivate(self, z):
        return self.sigmoid(z) * (1.0 - self.sigmoid(z))
    
    def mini_batches(self, batch_size, shuffled_training_data):
        return [shuffled_training_data[k:k+batch_size] for k in range(0, len(shuffled_training_data), batch_size)]
    
    def backpropagation(self, x, y):
        # activations[-1] - output layer

        biases = [np.zeros(b.shape) for b in self.biases]
        weights = [np.zeros(w.shape) for w in self.weights]

        activation = x # input layer
        activations = [x]
        out_vector = []
      
        for b, w in zip(self.biases, self.weights):
            activation = activation.reshape(-1, 1)
            a = np.dot(w, activation) + b
            out_vector.append(a)
            activation = self.sigmoid(a)
            activations.append(activation.reshape(-1, 1))

        delta = self.cost(activations[-1], y) * self.sigmoid_derivate(out_vector[-1]) 
         
        biases[-1] = delta
        weights[-1] = np.dot(delta, activations[-2].transpose())

        for layer in range(2, self.num_layers): 
            delta = np.dot(self.weights[-layer + 1].transpose(), delta) * self.sigmoid_derivate(out_vector[-layer]) 
            biases[-layer] = delta
            weights[-layer] = np.dot(delta, activations[-layer - 1].transpose()) 

        return (biases, weights)
    
    def update_batch(self, mini_batch):
        new_weights = [np.zeros(w.shape) for w in self.weights]
        new_biases = [np.zeros(b.shape) for b in self.biases]


        for x, y in mini_batch:
            biases, weights = self.backpropagation(x, y)

            new_weights = [nw + w for nw, w in zip(new_weights, weights)]
            new_biases = [nb + b for nb, b in zip(new_biases, biases)]

            self.weights = [w - (self.learning_rate / len(mini_batch)) * nw for w, nw in zip(self.weights, new_weights)]
            self.biases = [b - (self.learning_rate / len(mini_batch)) * nb for b, nb in zip(self.biases, new_biases)]


    def train(self, data, epochs, batch_size=10, test_data=None):
        n = len(data)
        test_data_len = len(test_data) if test_data else None

        for e in range(epochs):
            shuffled_data = random.sample(data, k=n)
            mini_batches = self.mini_batches(batch_size, shuffled_data)
            for batch in mini_batches:
                self.update_batch(batch)

            if test_data:
                print(f"Epoch {e}: {self.evaluate(test_data)} / {test_data_len}")
            else:
                print(f"Epoch {e} complete!")

    def evaluate(self, data):
        results = [(np.argmax(self.feed_forward(x)), y) for (x, y) in data]
        
        return sum(int(x == np.argmax(y)) for (x, y) in results)



In [11]:
def vectorized_result(digit):
    e = np.zeros((10, 1))
    e[digit] = 1.0
    return e

In [12]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

X_train_flat = [np.reshape(x, (784, 1)) for x in X_train]
X_test_flat = [np.reshape(x, (784, 1)) for x in X_test]

X_train_normalized = [x / 255.0 for x in X_train_flat]
X_test_normalized = [x / 255.0 for x in X_test_flat]

y_train_vector = [vectorized_result(x) for x in y_train]
y_test_vector = [vectorized_result(x) for x in y_test]

train_data = list(zip(X_train_normalized, y_train_vector))
test_data = list(zip(X_test_normalized, y_test_vector))


In [None]:
n = Network([784, 100, 50, 10], 0.5)
n.train(data=train_data, epochs=10, batch_size=10, test_data=test_data)
