<a href="https://colab.research.google.com/github/jalaneunos/neural_network_architectures/blob/main/mlp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [95]:
import torch
import torch.nn as nn
import numpy as np

# 2 layer ANN without PyTorch nn modules

## Activation functions

In [96]:
def relu(x):
    return torch.maximum(torch.tensor(0.0), x)

def sigmoid(x):
    return 1 / (1 + torch.exp(-x))

In [196]:
class Layer:
  def __init__(self, num_neurons, input_dimensions):
    self.weights = torch.rand((input_dimensions, num_neurons), dtype=torch.float64)
    self.bias = torch.zeros(num_neurons)
    self.most_recent_input = None # used to calculate backprop
    self.most_recent_u = None # before activation function

class LinearLayer(Layer):
  def __init__(self, num_neurons, input_dimensions):
    super().__init__(num_neurons, input_dimensions)
    self.activation = lambda x: x

  def forward(self, x):
    self.most_recent_input = x
    u = torch.transpose(self.weights, 0, 1) @ x + self.bias
    y = self.activation(u)
    return y

  def derivative_activation(self, x): return 1

  def __repr__(self):
    return f"Linear Layer with {len(self.bias)} neurons"


class ReLULayer(Layer):
  def __init__(self, num_neurons, input_dimensions):
    super().__init__(num_neurons, input_dimensions)
    self.activation = relu

  def forward(self, x):
    self.most_recent_input = x
    u = torch.transpose(self.weights, 0, 1) @ x + self.bias
    self.most_recent_u = u
    y = self.activation(u)
    return y

  def derivative_activation(self, x): return torch.gt(x, 0).type(x.dtype)

  def __repr__(self):
    return f"ReLU Layer with {len(self.bias)} neurons"



class ANN:
  def __init__(self, num_of_layers, input_dimensions, output_dimensions, layer_width, lr=0.001):
    self.layers = []
    for i in range(num_of_layers):
      if i == 0:
        layer = ReLULayer(layer_width, input_dimensions)
      elif i == num_of_layers - 1: # output layer
        layer = LinearLayer(output_dimensions, layer_width)
      else:
        layer = ReLULayer(layer_width, layer_width)
      self.layers.append(layer)
    self.lr = lr

  def forward(self, x):
    for layer in self.layers:
      x = layer.forward(x)
    return x

  def loss(self, d, y):
    return -(d - y)

  def backward(self, d, y):
    loss = self.loss(d, y)

    out = self.layers[-1]
    hidden = self.layers[1]
    hidden_loss = out.weights @ loss * hidden.derivative_activation(hidden.most_recent_u)

    out.bias -= self.lr * loss
    out.weights -= self.lr * loss * out.most_recent_input.view(-1, 1)

    hidden.bias -= self.lr * hidden_loss
    hidden.weights -= self.lr * hidden.most_recent_input @ hidden_loss







In [197]:
ann = ANN(3, 3, 1, 4)

In [198]:
x = torch.tensor([1, 2, 3], dtype=torch.float64)

In [199]:
out = ann.forward(x)

In [200]:
ann.layers

[ReLU Layer with 4 neurons,
 ReLU Layer with 4 neurons,
 Linear Layer with 1 neurons]

In [201]:
X = torch.tensor([
    [1.5, 2.3, 0.7],
    [0.6, 1.1, 3.4],
    [2.2, 0.5, 1.1],
    [3.3, 2.1, 0.9],
    [1.0, 1.5, 2.0],
    [2.5, 0.3, 2.2],
    [1.8, 2.4, 1.5],
    [0.9, 0.8, 2.3]
], dtype=torch.float64)

D = torch.tensor([4.2, 2.8, 3.5, 5.7, 3.0, 3.8, 4.1, 2.5],  dtype=torch.float64)

## Training the neural network

In [202]:
for epoch in range(20):
  mse = 0
  for i in range(len(X)):
    x, d = X[i], D[i]
    y = ann.forward(x)
    mse += ann.loss(d, y) ** 2
    ann.backward(d, y)
  print(f'Epoch {epoch}: MSE: {mse / len(X)}')


Epoch 0: MSE: tensor([8.9002], dtype=torch.float64)
Epoch 1: MSE: tensor([1.5796], dtype=torch.float64)
Epoch 2: MSE: tensor([1.2452], dtype=torch.float64)
Epoch 3: MSE: tensor([1.2212], dtype=torch.float64)
Epoch 4: MSE: tensor([1.2207], dtype=torch.float64)
Epoch 5: MSE: tensor([1.2210], dtype=torch.float64)
Epoch 6: MSE: tensor([1.2207], dtype=torch.float64)
Epoch 7: MSE: tensor([1.2202], dtype=torch.float64)
Epoch 8: MSE: tensor([1.2195], dtype=torch.float64)
Epoch 9: MSE: tensor([1.2189], dtype=torch.float64)
Epoch 10: MSE: tensor([1.2182], dtype=torch.float64)
Epoch 11: MSE: tensor([1.2175], dtype=torch.float64)
Epoch 12: MSE: tensor([1.2168], dtype=torch.float64)
Epoch 13: MSE: tensor([1.2162], dtype=torch.float64)
Epoch 14: MSE: tensor([1.2155], dtype=torch.float64)
Epoch 15: MSE: tensor([1.2148], dtype=torch.float64)
Epoch 16: MSE: tensor([1.2141], dtype=torch.float64)
Epoch 17: MSE: tensor([1.2135], dtype=torch.float64)
Epoch 18: MSE: tensor([1.2128], dtype=torch.float64)
Epo