<a href="https://colab.research.google.com/github/datapirate09/Neural-Network-Builder/blob/main/NeuralNetwork.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [45]:
import numpy as np
import time
import random

In [46]:
class NeuralNetwork:
  def __init__(self, layers, activation='sigmoid'):
    #layers is a list of elements where each element shows no of neurons in that layer
    self.no_of_layers = len(layers)
    self.sizes = layers
    self.weights = [np.random.randn(y, x) for x, y in zip(layers[:-1], layers[1:])]
    self.biases = [np.random.randn(y,1) for y in layers[1:]]
    self.activation = activation


  def get_shape_of_weights(self):
    for i,layer in enumerate(self.weights):
      print(f"Layer {i} has shape {layer.shape}")


  def get_shape_of_bias(self):
    for i,layer in enumerate(self.biases):
      print(f"Layer {i} has shape {layer.shape}")


  def sigmoid(self, z):
    return 1.0/(1.0+np.exp(-z))


  def sigmoid_derivative(self, z):
    return self.sigmoid(z)*(1-self.sigmoid(z))


  def activation_function(self,activation_input):
    if self.activation == 'sigmoid':
      return self.sigmoid(activation_input)


  def activation_function_derivative(self, activation_input):
    if self.activation == 'sigmoid':
      return self.sigmoid_derivative(activation_input)


  def forward_propagation(self, a):
    for i in range(self.no_of_layers-1):
      a = self.activation_function(np.dot(self.weights[i],a) + self.biases[i])
    return a


  def update_weights_and_biases(self, input_data, batch_size=10, learning_rate=3.0):
    random.shuffle(input_data)
    mini_batches = [input_data[k:k+batch_size] for k in range(0, len(input_data), batch_size)]
    for batch in mini_batches:
        self.update_mini_batch(batch, learning_rate)


  def update_mini_batch(self, mini_batch, learning_rate=3.0):
    b_diff = [np.zeros(b.shape) for b in self.biases]
    w_diff = [np.zeros(w.shape) for w in self.weights]
    for data_item in mini_batch:
        x, y = data_item  # training input and label
        activations, z_vector = self.get_activations(x)
        b_error, w_error = self.back_propagation(activations, z_vector, y)
        b_diff = [bd + be for bd, be in zip(b_diff, b_error)]
        w_diff = [wd + we for wd, we in zip(w_diff, w_error)]

    self.weights = [w - (learning_rate / len(mini_batch)) * nw
                    for w, nw in zip(self.weights, w_diff)]
    self.biases = [b - (learning_rate / len(mini_batch)) * nb
                   for b, nb in zip(self.biases, b_diff)]


  def get_activations(self, input_data):
    activation = input_data.reshape(-1, 1)  # Ensure it's a column vector
    activations = [activation]
    z_vector = []

    for b, w in zip(self.biases, self.weights):
        z = np.dot(w, activation) + b
        z_vector.append(z)
        activation = self.activation_function(z)
        activations.append(activation)

    return activations, z_vector



  def cost_derivative(self, output_activations, y):
        return (output_activations-y)


  def back_propagation(self, activations, z_vector, y):
    b_error = [np.zeros(b.shape) for b in self.biases]
    w_error = [np.zeros(w.shape) for w in self.weights]
    delta = self.cost_derivative(activations[-1], y) * self.activation_function_derivative(z_vector[-1])
    b_error[-1] = delta
    w_error[-1] = np.dot(delta, activations[-2].transpose())
    for l in range(2, self.no_of_layers):
        z = z_vector[-l]
        sp = self.activation_function_derivative(z)
        delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
        b_error[-l] = delta
        w_error[-l] = np.dot(delta, activations[-l-1].transpose())
    return (b_error, w_error)


  def fit(self, training_data, training_labels, epochs=20, batch_size = 10, learning_rate = 3.0):
    n = len(training_data)
    training_data_combined = list(zip(training_data, training_labels))
    for iteration in range(epochs):
      start_time = time.time()
      self.update_weights_and_biases(training_data_combined, batch_size, learning_rate)
      accuracy, loss = self.evaluate(training_data_combined)
      duration = time.time() - start_time
      print(f"{n}/{n} ━━━━━━━━━━━━━━━━━━━━ {duration:.0f}s  - accuracy: {accuracy:.4f} - loss: {loss:.4f}")


  def evaluate(self, data):
    correct = 0
    total_loss = 0
    for x, y in data:
        output = self.forward_propagation(x)
        predicted = np.argmax(output)
        actual = np.argmax(y)
        if predicted == actual:
            correct += 1
        total_loss += np.sum((output - y) ** 2)
    accuracy = correct / len(data)
    avg_loss = total_loss / len(data)
    return accuracy, avg_loss


In [47]:
!wget https://github.com/mnielsen/neural-networks-and-deep-learning/raw/master/data/mnist.pkl.gz -O mnist.pkl.gz


--2025-04-06 12:54:52--  https://github.com/mnielsen/neural-networks-and-deep-learning/raw/master/data/mnist.pkl.gz
Resolving github.com (github.com)... 140.82.121.4
Connecting to github.com (github.com)|140.82.121.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/mnielsen/neural-networks-and-deep-learning/master/data/mnist.pkl.gz [following]
--2025-04-06 12:54:53--  https://raw.githubusercontent.com/mnielsen/neural-networks-and-deep-learning/master/data/mnist.pkl.gz
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 17051982 (16M) [application/octet-stream]
Saving to: ‘mnist.pkl.gz’


2025-04-06 12:54:53 (183 MB/s) - ‘mnist.pkl.gz’ saved [17051982/17051982]



In [48]:
import gzip
import pickle

file_path = "mnist.pkl.gz"

with gzip.open(file_path, 'rb') as f:
    mnist_data = pickle.load(f, encoding='latin1')

training_data, validation_data, test_data = mnist_data
print(training_data[0].shape)
print(training_data[1].shape)

def vectorized_label(j):
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

X_train = [x.reshape(-1, 1) for x in training_data[0]]
y_train = [vectorized_label(y) for y in training_data[1]]

(50000, 784)
(50000,)


In [49]:
model = NeuralNetwork([784,30,10])

In [50]:
model.fit(X_train, y_train)


50000/50000 ━━━━━━━━━━━━━━━━━━━━ 10s  - accuracy: 0.9054 - loss: 0.1541
50000/50000 ━━━━━━━━━━━━━━━━━━━━ 10s  - accuracy: 0.9245 - loss: 0.1258
50000/50000 ━━━━━━━━━━━━━━━━━━━━ 9s  - accuracy: 0.9336 - loss: 0.1101
50000/50000 ━━━━━━━━━━━━━━━━━━━━ 10s  - accuracy: 0.9407 - loss: 0.1010
50000/50000 ━━━━━━━━━━━━━━━━━━━━ 8s  - accuracy: 0.9420 - loss: 0.0989
50000/50000 ━━━━━━━━━━━━━━━━━━━━ 10s  - accuracy: 0.9472 - loss: 0.0920
50000/50000 ━━━━━━━━━━━━━━━━━━━━ 9s  - accuracy: 0.9497 - loss: 0.0853
50000/50000 ━━━━━━━━━━━━━━━━━━━━ 8s  - accuracy: 0.9500 - loss: 0.0867
50000/50000 ━━━━━━━━━━━━━━━━━━━━ 9s  - accuracy: 0.9549 - loss: 0.0793
50000/50000 ━━━━━━━━━━━━━━━━━━━━ 9s  - accuracy: 0.9575 - loss: 0.0734
50000/50000 ━━━━━━━━━━━━━━━━━━━━ 8s  - accuracy: 0.9568 - loss: 0.0742
50000/50000 ━━━━━━━━━━━━━━━━━━━━ 9s  - accuracy: 0.9562 - loss: 0.0759
50000/50000 ━━━━━━━━━━━━━━━━━━━━ 9s  - accuracy: 0.9581 - loss: 0.0727
50000/50000 ━━━━━━━━━━━━━━━━━━━━ 9s  - accuracy: 0.9602 - loss: 0.0682
50