In [22]:
import numpy as np

In [74]:
import numpy as np

def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

def sigmoid_prime(z):
    s = sigmoid(z)
    return s * (1 - s)

class Network(object):

    def __init__(self, sizes):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]

    def feedforward(self, a):
        for w, b in zip(self.weights, self.biases):
            a = sigmoid(np.dot(w, a) + b)
        return a

    def SGD(self, train_data, epochs, batch_size, eta, test_data=None):
        if test_data:
            n_test = len(test_data)
        n = len(train_data)
        for i in range(epochs):
            np.random.shuffle(train_data)
            mini_batches = [train_data[k:k+batch_size] for k in range(0, n, batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)

            if test_data:
                print(f"Epoch {i}: {self.evaluate(test_data)} / {n_test}")
            else:
                print(f"Epoch {i} complete")

    def update_mini_batch(self, mini_batch, eta):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.back_propagation(x, y)
            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [w - (eta / len(mini_batch)) * nw for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b - (eta / len(mini_batch)) * nb for b, nb in zip(self.biases, nabla_b)]

    def back_propagation(self, x, y):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]

        activation = x
        activations = [x]  # List to store all activations layer by layer
        zs = []            # List to store all z vectors layer by layer

        for w, b in zip(self.weights, self.biases):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)

        # Backward pass
        delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].T)

        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-l+1].T, delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].T)

        return nabla_b, nabla_w

    def evaluate(self, test_data):
        test_results = [(np.argmax(self.feedforward(x)), np.argmax(y)) for (x, y) in test_data]
        return sum(int(pred == actual) for (pred, actual) in test_results)

    def cost_derivative(self, output_activations, y):
        return output_activations - y


# Loading MNIST dataset

In [25]:
import sklearn
from sklearn.datasets import fetch_openml
mnist=fetch_openml('mnist_784', version=1)

In [83]:

x,y=mnist['data'],mnist['target']

In [86]:
x=x.to_numpy()

In [88]:
def one_hot_encode(y, num_classes=10):
    y_encoded=np.zeros((len(y),num_classes))
    for i in range(len(y)):
        y_encoded[i][int(y[i])]=1
    return y_encoded

In [89]:
x=x.astype(np.float32) / 255.0 # Normalize the data 
x=np.round(x,3) 
y=one_hot_encode(y.to_numpy().astype(np.int32))

In [91]:
np.argmax(x[0])
x[0][161]

np.float32(1.0)

In [92]:
y

array([[0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], shape=(70000, 10))

In [93]:
data = [(x[i].reshape(-1, 1), y[i].reshape(-1, 1)) for i in range(len(x))]
train_data,test_data=data[:60000],data[60000:]

In [94]:
network=Network([784,20,20,10])
network.SGD(train_data,epochs=50,batch_size=10,eta=3,test_data=test_data)

Epoch 0: 9080 / 10000
Epoch 1: 9241 / 10000
Epoch 2: 9254 / 10000
Epoch 3: 9300 / 10000
Epoch 4: 9350 / 10000
Epoch 5: 9354 / 10000
Epoch 6: 9363 / 10000
Epoch 7: 9372 / 10000
Epoch 8: 9435 / 10000
Epoch 9: 9406 / 10000
Epoch 10: 9394 / 10000
Epoch 11: 9402 / 10000
Epoch 12: 9451 / 10000
Epoch 13: 9410 / 10000
Epoch 14: 9454 / 10000
Epoch 15: 9419 / 10000
Epoch 16: 9418 / 10000
Epoch 17: 9458 / 10000
Epoch 18: 9460 / 10000
Epoch 19: 9467 / 10000
Epoch 20: 9418 / 10000
Epoch 21: 9485 / 10000
Epoch 22: 9458 / 10000
Epoch 23: 9466 / 10000
Epoch 24: 9479 / 10000
Epoch 25: 9494 / 10000
Epoch 26: 9484 / 10000
Epoch 27: 9482 / 10000
Epoch 28: 9498 / 10000
Epoch 29: 9434 / 10000
Epoch 30: 9470 / 10000
Epoch 31: 9477 / 10000
Epoch 32: 9490 / 10000
Epoch 33: 9491 / 10000
Epoch 34: 9476 / 10000
Epoch 35: 9471 / 10000
Epoch 36: 9490 / 10000
Epoch 37: 9503 / 10000
Epoch 38: 9491 / 10000
Epoch 39: 9510 / 10000
Epoch 40: 9520 / 10000
Epoch 41: 9498 / 10000
Epoch 42: 9479 / 10000
Epoch 43: 9514 / 1000