<a href="https://colab.research.google.com/github/aminaalisheva/NN-from-scratch-and-DL-Regularization-and-Optimization/blob/main/NN_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Neural Network from scratch



In [None]:
import numpy as np

class Value:
    def __init__(self, data, _prev=(), _op='', requires_grad=False, label=''):
        self.data = data
        self._prev = _prev
        self._op = _op
        self.label = label
        self.grad = 0.0
        self.requires_grad = requires_grad
        self._backward = lambda: None

    def __add__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, (self, other), '+', self.requires_grad or other.requires_grad)

        def _backward():
            if self.requires_grad:
                self.grad += out.grad
            if other.requires_grad:
                other.grad += out.grad
        out._backward = _backward

        return out

    def __mul__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), '*', self.requires_grad or other.requires_grad)

        def _backward():
            if self.requires_grad:
                self.grad += other.data * out.grad
            if other.requires_grad:
                other.grad += self.data * out.grad
        out._backward = _backward

        return out

    def __pow__(self, power):
        assert isinstance(power, (int, float)), "Only supports scalar powers."
        out = Value(self.data**power, (self,), f"**{power}", self.requires_grad)

        def _backward():
            if self.requires_grad:
                self.grad += (power * self.data**(power - 1)) * out.grad
        out._backward = _backward
        return out

    def exp(self):
        out = Value(np.exp(self.data), (self,), "exp", self.requires_grad)

        def _backward():
            if self.requires_grad:
                self.grad += out.data * out.grad
        out._backward = _backward
        return out

    def log(self):
        if self.data <= 0:
            raise ValueError("Logarithm is not defined for non-positive values.")
        out = Value(np.log(self.data), (self,), 'log', self.requires_grad)

        def _backward():
            if self.requires_grad:
                self.grad += (1.0 / self.data) * out.grad
        out._backward = _backward

        return out

    def __sub__(self, other):
        return self + (Value(-1) * other)

    def __truediv__(self, other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data / other.data, (self, other), '/', self.requires_grad or other.requires_grad)

        def _backward():
            if self.requires_grad:
                self.grad += (1.0 / other.data) * out.grad
            if other.requires_grad:
                other.grad += (-self.data / (other.data ** 2)) * out.grad
        out._backward = _backward

        return out

    def sigmoid(self):
        s = 1.0 / (1 + np.exp(-self.data))
        out = Value(s, (self,), 'sigmoid', self.requires_grad)

        def _backward():
            if self.requires_grad:
                self.grad += s * (1.0 - s) * out.grad
        out._backward = _backward

        return out

    def backward(self):
        topo = self.build_topo()
        self.grad = 1.0

        for node in reversed(topo):
            node._backward()

    def build_topo(self):
        topo = []
        visited = set()

        def _build_topo(node):
            if node not in visited:
                visited.add(node)
                for child in node._prev:
                    _build_topo(child)
                topo.append(node)

        _build_topo(self)
        return topo

    def optimize(self, learning_rate=0.01):
        for node in self.build_topo():
            if node.requires_grad:
                node.data -= learning_rate * node.grad

    def zero_grad(self):
        for node in self.build_topo():
            node.grad = 0.0

    def __repr__(self):
        return f'Value({self.data})'

In [None]:
class Neuron:
  def __init__(self, N, activation=True):
    self.W = [Value(np.random.uniform(-1, 1), label=f'w{i}', requires_grad=True) for i in range(N)]
    self.b = Value(0, label='b', requires_grad=True)
    self.activation = activation

  def __call__(self, X):
    out = self.b
    for x, w in zip(X, self.W):
        out = out + (x * w)
    return out.sigmoid() if self.activation else out


class Layer:
  def __init__(self, N, count, activation=True):
    self.neurons = [Neuron(N, activation) for _ in range(count)]

  def __call__(self, X):
    return [n(X) for n in self.neurons]


class MLP:
  def __init__(self, N, counts):
    dims = [N] + counts
    self.layers = [
      Layer(dims[i], dims[i + 1], activation=(i < len(dims) - 2)) # Last layer has activation=False
      for i in range(len(dims) - 1)
    ]

  def __call__(self, X):
    out = X
    for layer in self.layers:
      out = layer(out)
    return out

In [None]:
from sklearn import datasets
from sklearn.model_selection import train_test_split

iris = datasets.load_iris()

X = iris.data  # 50x3 4-dimensional samples
y = iris.target # 3 classes (0, 1, 2)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

print(f'Train data shape: {X_train.shape}, {y_train.shape}')
print(f'Test data shape: {X_test.shape}, {y_test.shape}')
print(f'Input Samples:\n {X_train[:5]}')
print(f'Labels:\n {y_train[:5]}')

Train data shape: (120, 4), (120,)
Test data shape: (30, 4), (30,)
Input Samples:
 [[6.3 2.8 5.1 1.5]
 [5.2 4.1 1.5 0.1]
 [4.4 3.  1.3 0.2]
 [5.  3.3 1.4 0.2]
 [4.8 3.4 1.9 0.2]]
Labels:
 [2 0 0 0 0]


In [None]:
# converting numpy float arrays into Value lists
X_train = [[Value(x) for x in X] for X in X_train]
X_test = [[Value(x) for x in X] for X in X_test]
y_train = [Value(y) for y in y_train]
y_test = [Value(y) for y in y_test]

# Training Custom MLP Classifier

In [None]:
class Classifier:
    def __init__(self, layer_sizes=[2, 3, 3]):
      self.layer_sizes = layer_sizes
      self.nn = None
      self.L = Value(0.0)
      self.iterations = 0

    def one_hot_encode(self, y, num_classes):
      return [[Value(1.0) if i == label.data else Value(0.0) for i in range(num_classes)] for label in y]

    def softmax(self, logits):
      exp_logits = [logit.exp() for logit in logits]
      exp_sum = sum(exp_logits, Value(0.0))
      return [exp / exp_sum for exp in exp_logits]

    def argmax(self, values):
      return max(range(len(values)), key=lambda i: values[i].data)

    def forward(self, Xs):
      raw_outputs = [self.nn(X) for X in Xs]
      out = [self.softmax(logit) for logit in raw_outputs]
      return out

    def predict(self, Xs):
      preds = self.forward(Xs)
      return [self.argmax(pred) for pred in preds]

    def train(self, X_train, y_train, learning_rate=0.01):
      self.L.zero_grad()
      preds = self.forward(X_train)
      y_train_one_hot = self.one_hot_encode(y_train, num_classes=self.layer_sizes[-1])
      self.L = self.cross_entropy_loss(y_train_one_hot, preds)
      self.L.backward()
      self.L.optimize(learning_rate)
      print(f'Loss: {self.L.data:.4f}')

    def fit(self, X_train, y_train, learning_rate=0.01, num_epochs=50):
      if not self.nn:
        self.nn = MLP(len(X_train[0]), self.layer_sizes)
      for i in range(num_epochs):
        print(f'Training epoch {self.iterations + i + 1}')
        self.train(X_train, y_train, learning_rate)
      self.iterations += num_epochs

    def cross_entropy_loss(self, y_true, y_pred):
      losses = []
      for true, pred in zip(y_true, y_pred):
        correct_class_index = self.argmax([t for t in true])
        clipped_prob = pred[correct_class_index]
        log_prob = clipped_prob.log()
        losses.append(log_prob * Value(-1))
      total_loss = sum(losses, Value(0.0))
      avg_loss = total_loss / Value(len(losses))
      return avg_loss

    def accuracy_score(self, y_test, preds):
      y_test_values = np.array([y.data for y in y_test])
      preds_values = np.array(preds)
      correct = np.sum(y_test_values == preds_values)
      return correct / len(y_test)

In [None]:
model = Classifier([3])
# model = Classifier([4, 3])

In [None]:
model.fit(X_train, y_train, learning_rate=0.1, num_epochs=500)

Training epoch 1
Loss: 2.3298
Training epoch 2
Loss: 1.4518
Training epoch 3
Loss: 0.7925
Training epoch 4
Loss: 0.7252
Training epoch 5
Loss: 0.6637
Training epoch 6
Loss: 0.6410
Training epoch 7
Loss: 0.6225
Training epoch 8
Loss: 0.6078
Training epoch 9
Loss: 0.5949
Training epoch 10
Loss: 0.5872
Training epoch 11
Loss: 0.5796
Training epoch 12
Loss: 0.5820
Training epoch 13
Loss: 0.5782
Training epoch 14
Loss: 0.5951
Training epoch 15
Loss: 0.5869
Training epoch 16
Loss: 0.6182
Training epoch 17
Loss: 0.5926
Training epoch 18
Loss: 0.6318
Training epoch 19
Loss: 0.5886
Training epoch 20
Loss: 0.6310
Training epoch 21
Loss: 0.5800
Training epoch 22
Loss: 0.6242
Training epoch 23
Loss: 0.5707
Training epoch 24
Loss: 0.6166
Training epoch 25
Loss: 0.5620
Training epoch 26
Loss: 0.6091
Training epoch 27
Loss: 0.5537
Training epoch 28
Loss: 0.6020
Training epoch 29
Loss: 0.5458
Training epoch 30
Loss: 0.5950
Training epoch 31
Loss: 0.5384
Training epoch 32
Loss: 0.5883
Training epoch 33

In [None]:
preds = model.predict(X_train)
print(f'Custom MLP classifier accuracy on train Data: {model.accuracy_score(y_train, preds):.2f}')

Custom MLP classifier accuracy on train Data: 0.97


In [None]:
preds = model.predict(X_test)
print(f'Custom MLP classifier accuracy on test Data: {model.accuracy_score(y_test, preds):.2f}')

Custom MLP classifier accuracy on test Data: 1.00
