<a href="https://colab.research.google.com/github/datapirate09/Neural-Network-Builder/blob/main/NeuralNetwork.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import time
import random

In [None]:
class NeuralNetwork:
  def __init__(self, layers, activation='sigmoid'):
    #layers is an array of objects where each object gives us the type of that layer
    self.layer_info = layers
    self.no_of_layers = len(layers)
    self.sizes = [layer.no_of_neurons for layer in layers]
    self.layer_activations = [layer.activation for layer in layers[1:]]
    for i in range(1, self.no_of_layers):
      self.layer_info[i].weights = np.random.randn(self.layer_info[i].no_of_neurons, self.layer_info[i-1].no_of_neurons)
      self.layer_info[i].biases = np.random.randn(self.layer_info[i].no_of_neurons, 1)

  def layers(self):
    return self.layer_info

  def summary(self):
    total_params = 0
    for i in range(1,self.no_of_layers):
      layer_params_count = self.layer_info[i].weights.size + self.layer_info[i].biases.size
      print(f"L{i+1} params: {layer_params_count}")
      total_params += layer_params_count
    print(f"Total no of parameters:{total_params}")

  def sigmoid(self, z):
    return 1.0/(1.0+np.exp(-z))

  def sigmoid_derivative(self, z):
    return self.sigmoid(z)*(1-self.sigmoid(z))

  def activation_function(self,activation_input, activation_fn_name='sigmoid'):
    if activation_fn_name == 'sigmoid':
      return self.sigmoid(activation_input)
    elif activation_fn_name == 'softmax':
      return self.softmax(activation_input)

  def activation_function_derivative(self, activation_input, activation_fn_name='sigmoid'):
    if activation_fn_name == 'sigmoid':
      return self.sigmoid_derivative(activation_input)

  def forward_propagation(self, a):
    for i in range(self.no_of_layers-1):
      a = self.activation_function(np.dot(self.layer_info[i+1].weights,a) + self.layer_info[i+1].biases, activation_fn_name = self.layer_activations[i])
    return a

  def update_weights_and_biases(self, input_data, batch_size=10, learning_rate=3.0):
    random.shuffle(input_data)
    mini_batches = [input_data[k:k+batch_size] for k in range(0, len(input_data), batch_size)]
    for batch in mini_batches:
        self.update_mini_batch(batch, learning_rate)

  def update_mini_batch(self, mini_batch, learning_rate=3.0):
    b_diff = [np.zeros(b.biases.shape) for b in self.layer_info[1:]]
    w_diff = [np.zeros(w.weights.shape) for w in self.layer_info[1:]]
    for data_item in mini_batch:
        x, y = data_item
        activations, z_vector = self.get_activations(x)
        b_error, w_error = self.back_propagation(activations, z_vector, y)
        b_diff = [bd + be for bd, be in zip(b_diff, b_error)]
        w_diff = [wd + we for wd, we in zip(w_diff, w_error)]
    for i in range(1, self.no_of_layers):
      self.layer_info[i].weights -= (learning_rate / len(mini_batch)) * w_diff[i-1]
      self.layer_info[i].biases -= (learning_rate / len(mini_batch)) * b_diff[i-1]

  def get_activations(self, input_data):
    activation = input_data.reshape(-1, 1)
    activations = [activation]
    z_vector = []

    for i in range(1,self.no_of_layers):
      layer = self.layer_info[i]
      z = np.dot(layer.weights, activation) + layer.biases
      z_vector.append(z)
      activation_fn_name = self.layer_activations[i-1]
      activation = self.activation_function(z, activation_fn_name)
      activations.append(activation)

    return activations, z_vector

  def softmax(self, z):
    exp_z = np.exp(z - np.max(z))
    return exp_z / np.sum(exp_z, axis=0, keepdims=True)

  def cost_derivative(self, output_activations, y):
    return (output_activations-y)

  def back_propagation(self, activations, z_vector, y):
    b_error = [np.zeros(b.biases.shape) for b in self.layer_info[1:]]
    w_error = [np.zeros(w.weights.shape) for w in self.layer_info[1:]]
    delta = self.cost_derivative(activations[-1], y) * self.activation_function_derivative(z_vector[-1], self.layer_activations[-1])
    # delta = activations[-1] - y
    b_error[-1] = delta
    w_error[-1] = np.dot(delta, activations[-2].transpose())
    for l in range(2, self.no_of_layers):
        z = z_vector[-l]
        activation_name = self.layer_activations[-l]
        sp = self.activation_function_derivative(z, activation_name)
        delta = np.dot(self.layer_info[-l+1].weights.transpose(), delta) * sp
        b_error[-l] = delta
        w_error[-l] = np.dot(delta, activations[-l-1].transpose())
    return (b_error, w_error)

  def fit(self, training_data, training_labels, validation_data=None, validation_labels=None, epochs=20, batch_size = 10, learning_rate = 3.0, callbacks=None):
    n = len(training_data)
    training_data_combined = list(zip(training_data, training_labels))
    if validation_data != None and validation_labels != None:
      validation_data_combined = list(zip(validation_data, validation_labels))
    early_stopping = None
    model_check_point = None
    reduce_lr_on_plateau = None
    if callbacks is None:
      callbacks = []
    for cb in callbacks:
      if isinstance(cb, EarlyStopping):
        early_stopping = cb
      if isinstance(cb, ModelCheckPoint):
        model_check_point = cb
      if isinstance(cb, ReduceLROnPlateau):
        reduce_lr_on_plateau = cb
    for iteration in range(epochs):
      start_time = time.time()
      self.update_weights_and_biases(training_data_combined, batch_size, learning_rate)
      accuracy, loss = self.evaluate(training_data_combined)
      logs = {"accuracy": accuracy, "loss": loss}
      duration = time.time() - start_time
      if validation_data != None and validation_labels != None:
        val_accuracy, val_loss = self.evaluate(validation_data_combined)
        logs["val_accuracy"] = val_accuracy
        logs["val_loss"] = val_loss
        print(f"{n}/{n} ━━━━━━━━━━━━━━━━━━━━ {duration:.0f}s  - accuracy: {accuracy:.4f} - loss: {loss:.4f} - val_accuracy: {val_accuracy:.4f} - val_loss: {val_loss:.4f}")
      else:
        print(f"{n}/{n} ━━━━━━━━━━━━━━━━━━━━ {duration:.0f}s  - accuracy: {accuracy:.4f} - loss: {loss:.4f}")
      if early_stopping:
        early_stopping.on_epoch_end(iteration, logs)
        if early_stopping.stop_training:
            break
      if model_check_point:
        model_check_point.on_epoch_end(iteration, logs)
        if model_check_point.show_check_point:
          best_epoch_details = model_check_point.get_best_epoch_details()
          print(f"accuracy: {best_epoch_details['accuracy']:.4f} - loss: {best_epoch_details['loss']:.4f} - val_accuracy: {best_epoch_details['val_accuracy']:.4f} - val_loss: {best_epoch_details['val_loss']:.4f}")
      if reduce_lr_on_plateau:
        factor = reduce_lr_on_plateau.on_epoch_end(iteration, logs)
        if factor is not None:
          new_lr = max(learning_rate * factor, reduce_lr_on_plateau.min_lr)
          print(f"Reducing learning rate from {learning_rate:.5f} to {new_lr:.5f}")
          learning_rate = new_lr

  def evaluate(self, data):
    correct = 0
    total_loss = 0
    for x, y in data:
        output = self.forward_propagation(x)
        predicted = np.argmax(output)
        actual = np.argmax(y)
        if predicted == actual:
            correct += 1
        total_loss += np.sum((output - y) ** 2)
    accuracy = correct / len(data)
    avg_loss = total_loss / len(data)
    return accuracy, avg_loss

  def predict(self, data):
    predictions = []
    for item in data:
      predictions.append(self.forward_propagation(item))
    return predictions

class EarlyStopping:
  def __init__(self, monitor='val_loss', patience=0):
    self.patience = patience
    self.current_count = 0
    self.monitor = monitor
    self.best_value = None
    self.stop_training = False

  def on_epoch_end(self, epoch, epoch_info):
    current = epoch_info.get(self.monitor)
    if current is None:
      return
    if self.best_value == None or current < self.best_value:
      self.best_value = current
      self.current_count = 0
    else:
      self.current_count += 1
      if self.current_count > self.patience:
        self.stop_training = True


class ModelCheckPoint:
  def __init__(self, monitor='val_loss'):
    self.monitor = monitor
    self.show_check_point = False
    self.previous_best = None
    self.improved_epoch_details = []
  def on_epoch_end(self, epoch, epoch_info):
    current = epoch_info.get(self.monitor)
    if self.previous_best == None or current < self.previous_best:
      self.previous_best = current
      self.store_epoch_details(epoch_info)
      self.show_check_point = True
    else:
      self.show_check_point = False
  def store_epoch_details(self, epoch_info):
    self.improved_epoch_details.append(epoch_info)
  def get_best_epoch_details(self):
    return self.improved_epoch_details[-1]


class ReduceLROnPlateau:
  def __init__(self, monitor='val_loss', factor=0.1, patience=10, mode='min', min_lr=0.0):
    self.monitor = monitor
    self.factor = factor
    self.patience = patience
    self.mode = mode
    self.min_lr = min_lr
    self.current_count = 0
    self.optimal_val = None

  def on_epoch_end(self, epoch, epoch_info):
    current = epoch_info.get(self.monitor)
    if current is None:
      return None
    if self.optimal_val == None or self.check_optimal_condition(current):
      self.optimal_val = current
      self.current_count = 0
      return None
    else:
      self.current_count += 1
      if self.current_count > self.patience:
        return self.factor

  def check_optimal_condition(self, value):
    if self.mode == 'min':
      if value < self.optimal_val:
        return True
    elif self.mode == 'max':
      if value > self.optimal_val:
        return True
    return False


In [None]:
class Dense:
  def __init__(self, no_of_neurons, activation='sigmoid'):
    self.no_of_neurons = no_of_neurons
    self.activation = activation
    self.weights = None
    self.biases = None

  def get_weights(self):
    return self.weights, self.biases


In [None]:
class Input:
  def __init__(self, no_of_neurons):
    self.no_of_neurons = no_of_neurons

In [4]:
class RNN:
  def __init__(self, input_size, hidden_size, output_size, activation = 'tanh'):
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.output_size = output_size
    self.activation = activation
    self.Wxh = np.random.randn(self.hidden_size, self.input_size)
    self.Whh = np.random.randn(self.hidden_size, self.hidden_size)
    self.Wyh = np.random.randn(self.output_size, self.hidden_size)
    self.bh = np.random.randn(self.hidden_size, 1)
    self.by = np.random.randn(self.output_size, 1)

  def tanh(x):
    return np.tanh(x)

  def tanh_derivative(h):
    return 1 - h ** 2

  def update_weights_and_biases(self, input_data, batch_size=10, learning_rate=3.0):
    random.shuffle(input_data)
    mini_batches = [input_data[k:k+batch_size] for k in range(0, len(input_data), batch_size)]

    for batch in mini_batches:
        self.update_mini_batch(batch, learning_rate)

  def update_mini_batch(self, mini_batch, learning_rate=3.0):
    dWxh_total = np.zeros_like(self.Wxh)
    dWhh_total = np.zeros_like(self.Whh)
    dWyh_total = np.zeros_like(self.Wyh)
    dbh_total = np.zeros_like(self.bh)
    dby_total = np.zeros_like(self.by)

    for data_item in mini_batch:
        x, y = data_item
        ys, hs = self.forward_prop(x)
        grads = self.backward_prop(x, y, hs, ys)
        dWxh_total += grads['dWxh']
        dWhh_total += grads['dWhh']
        dWyh_total += grads['dWyh']
        dbh_total += grads['dbh']
        dby_total += grads['dby']

    self.Wxh -= learning_rate * dWxh_total / len(mini_batch)
    self.Whh -= learning_rate * dWhh_total / len(mini_batch)
    self.Wyh -= learning_rate * dWyh_total / len(mini_batch)
    self.bh -= learning_rate * dbh_total / len(mini_batch)
    self.by -= learning_rate * dby_total / len(mini_batch)


  def fit(self, sequences, targets, epochs=20, batch_size=10, learning_rate=3.0):
    for epoch in range(epochs):
      self.update_weights_and_biases(list(zip(sequences, targets)), batch_size, learning_rate)
      print(f"Epoch {epoch + 1}/{epochs} completed.")


  def forward_prop(self, sequence):
    h = np.zeros((self.hidden_size, 1))
    hs = [h]
    ys = []

    for x_t in sequence:
        x_t = x_t.reshape(self.input_size, 1)
        a = np.dot(self.Wxh, x_t) + np.dot(self.Whh, h) + self.bh
        h = self.tanh(a)
        y = np.dot(self.Wyh, h) + self.by
        hs.append(h)
        ys.append(y)
    return ys, hs[1:]

  def backward_prop(self, sequence, targets, hs, ys):
    dWxh = np.zeros_like(self.Wxh)
    dWhh = np.zeros_like(self.Whh)
    dWyh = np.zeros_like(self.Wyh)
    dbh = np.zeros_like(self.bh)
    dby = np.zeros_like(self.by)

    h0 = np.zeros_like(hs[0])
    hs_full = [h0] + hs
    dh_next = np.zeros_like(hs[0])

    for t in reversed(range(len(sequence))):
        x_t = sequence[t].reshape(self.input_size, 1)
        y_t = targets[t].reshape(self.output_size, 1)
        h_t = hs_full[t + 1]
        h_prev = hs_full[t]
        y_pred = ys[t]

        dy = y_pred - y_t
        dWyh += np.dot(dy, h_t.T)
        dby += dy

        dh = np.dot(self.Wyh.T, dy) + dh_next
        da = dh * self.tanh_derivative(h_t)

        dWxh += np.dot(da, x_t.T)
        dWhh += np.dot(da, h_prev.T)
        dbh += da

        dh_next = np.dot(self.Whh.T, da)

    return {
        'dWxh': dWxh,
        'dWhh': dWhh,
        'dWyh': dWyh,
        'dbh': dbh,
        'dby': dby
    }



In [None]:
from collections import defaultdict
class Tokenizer:
  def __init__(self, num_words = 100, lower=True):
    self.num_words = num_words
    self.lower = lower
    self.word_count = defaultdict(int)
    self.word_index = {}
  def fit_on_texts(self, inputs):
    for input in inputs:
      input = input.split()
      if self.lower:
        input = [x.lower() for x in input]
      for word in input:
        self.word_count[word] += 1
    sorted_words = sorted(self.word_count.items(), key=lambda x: -x[1])
    for i,word in enumerate(sorted_words[:self.num_words]):
      self.word_index[word[0]] = i+1
  def texts_to_sequences(self, input):
    sequence = []
    input = input.split()
    for word in input:
      sequence.append(self.word_index.get(word,0))
    return sequence
  def pad_sequences(self, seq, truncating='post', padding='post', maxlen=100):
    if len(seq) > maxlen:
        if truncating == 'post':
            seq = seq[:maxlen]
        else:
            seq = seq[-maxlen:]
    elif len(seq) < maxlen:
        pad_length = maxlen - len(seq)
        if padding == 'post':
            seq = seq + [0] * pad_length
        else:
            seq = [0] * pad_length + seq
    return seq



In [None]:
# !pip install nlp
# !pip install datasets

# from datasets import load_dataset

# dataset = load_dataset('emotion')
# print(dataset.shape)
# dataset
# train = dataset['train']
# validation = dataset['validation']
# test = dataset['test']
# train_tweets = [tweet['text'] for tweet in train]
# train_labels = [tweet['label'] for tweet in train]
# print(train_tweets[0])
# print(train_labels[0])

In [None]:
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(train_tweets)
word_index = tokenizer.word_index

# print("Word Index:", word_index)
print("First Tweet:", train_tweets[0])
tokenized_seq = tokenizer.texts_to_sequences(train_tweets[0])
print("Tokenized Sequence:", tokenized_seq)


First Tweet: i didnt feel humiliated
Tokenized Sequence: [1, 138, 2, 678]


In [None]:
maxlen = 50
sequences = tokenizer.texts_to_sequences(train_tweets[0])
padded = tokenizer.pad_sequences(sequences, truncating='post', padding='post', maxlen=maxlen)
print(padded)

[1, 138, 2, 678, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [None]:
!wget https://github.com/mnielsen/neural-networks-and-deep-learning/raw/master/data/mnist.pkl.gz -O mnist.pkl.gz


--2025-04-09 15:20:08--  https://github.com/mnielsen/neural-networks-and-deep-learning/raw/master/data/mnist.pkl.gz
Resolving github.com (github.com)... 20.27.177.113
Connecting to github.com (github.com)|20.27.177.113|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/mnielsen/neural-networks-and-deep-learning/master/data/mnist.pkl.gz [following]
--2025-04-09 15:20:09--  https://raw.githubusercontent.com/mnielsen/neural-networks-and-deep-learning/master/data/mnist.pkl.gz
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 17051982 (16M) [application/octet-stream]
Saving to: ‘mnist.pkl.gz’


2025-04-09 15:20:10 (33.3 MB/s) - ‘mnist.pkl.gz’ saved [17051982/17051982]



In [None]:
import gzip
import pickle

file_path = "mnist.pkl.gz"

with gzip.open(file_path, 'rb') as f:
    mnist_data = pickle.load(f, encoding='latin1')

training_data, validation_data, test_data = mnist_data
print(training_data[0].shape)
print(training_data[1].shape)

def vectorized_label(j):
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

X_train = [x.reshape(-1, 1) for x in training_data[0]]
y_train = [vectorized_label(y) for y in training_data[1]]

X_val = [x.reshape(-1, 1) for x in validation_data[0]]
y_val = [vectorized_label(y) for y in validation_data[1]]

(50000, 784)
(50000,)


In [None]:
layer1 = Input(784)
layer2 = Dense(30, activation='sigmoid')
layer3 = Dense(10,activation='sigmoid')
model = NeuralNetwork([layer1, layer2, layer3])
model.summary()
layers = model.layers()
W1,b1 = layers[1].get_weights()
W2,b2 = layers[2].get_weights()
print(f"W1 shape = {W1.shape}, b1 shape = {b1.shape}")
print(f"W2 shape = {W2.shape}, b2 shape = {b2.shape}")

L2 params: 23550
L3 params: 310
Total no of parameters:23860
W1 shape = (30, 784), b1 shape = (30, 1)
W2 shape = (10, 30), b2 shape = (10, 1)


In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3)
model_check_point = ModelCheckPoint(monitor='val_loss')
model.fit(X_train, y_train, X_val, y_val, callbacks=[early_stopping,model_check_point])


50000/50000 ━━━━━━━━━━━━━━━━━━━━ 9s  - accuracy: 0.9087 - loss: 0.1493 - val_accuracy: 0.9176 - val_loss: 0.1367
accuracy: 0.9087 - loss: 0.1493 - val_accuracy: 0.9176 - val_loss: 0.1367
50000/50000 ━━━━━━━━━━━━━━━━━━━━ 9s  - accuracy: 0.9245 - loss: 0.1252 - val_accuracy: 0.9239 - val_loss: 0.1244
accuracy: 0.9245 - loss: 0.1252 - val_accuracy: 0.9239 - val_loss: 0.1244
50000/50000 ━━━━━━━━━━━━━━━━━━━━ 8s  - accuracy: 0.9371 - loss: 0.1054 - val_accuracy: 0.9363 - val_loss: 0.1045
accuracy: 0.9371 - loss: 0.1054 - val_accuracy: 0.9363 - val_loss: 0.1045
50000/50000 ━━━━━━━━━━━━━━━━━━━━ 9s  - accuracy: 0.9393 - loss: 0.1003 - val_accuracy: 0.9365 - val_loss: 0.1033
accuracy: 0.9393 - loss: 0.1003 - val_accuracy: 0.9365 - val_loss: 0.1033
50000/50000 ━━━━━━━━━━━━━━━━━━━━ 9s  - accuracy: 0.9455 - loss: 0.0919 - val_accuracy: 0.9428 - val_loss: 0.0949
accuracy: 0.9455 - loss: 0.0919 - val_accuracy: 0.9428 - val_loss: 0.0949
50000/50000 ━━━━━━━━━━━━━━━━━━━━ 8s  - accuracy: 0.9420 - loss: 0

In [None]:
X_test = [x.reshape(-1, 1) for x in test_data[0]]
y_test = [vectorized_label(y) for y in test_data[1]]
testing_data_combined = list(zip(X_test, y_test))

accuracy, avg_loss = model.evaluate(testing_data_combined)
print(accuracy)
print(avg_loss)

predictions = model.predict(X_test)
print(predictions[0])


0.9441
0.09584845820981042
[[5.90443612e-06]
 [5.66600827e-04]
 [5.70669822e-05]
 [9.34571025e-04]
 [1.01822058e-07]
 [6.74290551e-06]
 [4.64898698e-11]
 [9.99465235e-01]
 [1.13471439e-08]
 [2.94557186e-08]]


In [None]:
import tensorflow
from tensorflow.keras.datasets import fashion_mnist
import numpy as np

def vectorized_label(j):
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

(X_train_tf, y_train_tf), (X_test_tf, y_test_tf) = fashion_mnist.load_data()

X_train_tf = [x.reshape(-1, 1) / 255.0 for x in X_train_tf]
y_train_tf = [vectorized_label(y) for y in y_train_tf]

X_test_tf = [x.reshape(-1, 1) / 255.0 for x in X_test_tf]
y_test = [vectorized_label(y) for y in y_test_tf]

model = NeuralNetwork([784, 64, 10])
model.fit(X_train_tf, y_train_tf, epochs=10, batch_size=32, learning_rate=3.0)





60000/60000 ━━━━━━━━━━━━━━━━━━━━ 17s  - accuracy: 0.7493 - loss: 0.3547
60000/60000 ━━━━━━━━━━━━━━━━━━━━ 17s  - accuracy: 0.7744 - loss: 0.3132
60000/60000 ━━━━━━━━━━━━━━━━━━━━ 17s  - accuracy: 0.7856 - loss: 0.3009
60000/60000 ━━━━━━━━━━━━━━━━━━━━ 17s  - accuracy: 0.7882 - loss: 0.2916
60000/60000 ━━━━━━━━━━━━━━━━━━━━ 17s  - accuracy: 0.7971 - loss: 0.2780
60000/60000 ━━━━━━━━━━━━━━━━━━━━ 17s  - accuracy: 0.7972 - loss: 0.2803
60000/60000 ━━━━━━━━━━━━━━━━━━━━ 17s  - accuracy: 0.8072 - loss: 0.2646
60000/60000 ━━━━━━━━━━━━━━━━━━━━ 17s  - accuracy: 0.8101 - loss: 0.2587
60000/60000 ━━━━━━━━━━━━━━━━━━━━ 17s  - accuracy: 0.8092 - loss: 0.2620
60000/60000 ━━━━━━━━━━━━━━━━━━━━ 17s  - accuracy: 0.8123 - loss: 0.2603


In [None]:
testing_data_combined = list(zip(X_test_tf, y_test))

accuracy, avg_loss = model.evaluate(testing_data_combined)
print(accuracy)
print(avg_loss)

0.7957
0.28854674926965623
