<a href="https://colab.research.google.com/github/codingiamnot/Neural-Networks-2024/blob/main/Lab03/tema3_rn_drop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import math
from torchvision.datasets import MNIST

def download_mnist(is_train: bool):
    dataset = MNIST(root="./data",
        transform=lambda x: np.array(x).flatten(),
        download=True,
        train=is_train)

    mnist_data = []
    mnist_labels = []

    for image, label in dataset:
        mnist_data.append(image)
        mnist_labels.append(label)

    return mnist_data, mnist_labels


train_X, train_Y = download_mnist(True)
test_X, test_Y = download_mnist(False)

train_X = np.array(train_X)
train_Y = np.array(train_Y)
test_X = np.array(test_X)
test_Y = np.array(test_Y)


#normalize the input data

train_X = np.divide(train_X, 256)
test_X = np.divide(test_X, 256)

#conver ouput to one hot encoding

def oneHot(value):
  ans = np.zeros(10,)
  ans[value] = 1
  return ans

train_Y = np.array([oneHot(value) for value in train_Y])
test_Y = np.array([oneHot(value) for value in test_Y])

In [12]:
import sys

def sigmoid(x):
  if x >= 100:
    return 1

  if x <= -100:
    return 0

  return 1 / (1 + math.exp(-x))

sigmoid_np = np.vectorize(sigmoid)

def sigmoid_deriv(x):
  value = sigmoid(x)
  return value * (1-value)

sigmoid_deriv_np = np.vectorize(sigmoid_deriv)



def softmax(x):
  return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)


class NeuralNetwork:

  def __init__(self, hidden_nodes=100, learning_rate=0.01):
    self.w12 = np.random.randn(784, hidden_nodes)
    self.w23 = np.random.randn(hidden_nodes, 10)
    self.b2 = np.random.randn(hidden_nodes, )
    self.b3 = np.random.randn(10, )
    self.learning_rate = learning_rate

  def forward(self, x):
    x = np.dot(x, self.w12) + self.b2
    x = sigmoid_np(x)
    x = np.dot(x, self.w23) + self.b3
    x = softmax(x)
    return x

  def backpropagation(self, x, y, p_on):
    active_hidden = np.random.choice([0, 1],
                                     size=(self.w12.shape[1],),
                                     p=[1-p_on, p_on])

    active_hidden = np.tile(active_hidden, (x.shape[0], 1))

    #values of the hidden layer
    values2a = np.dot(x, self.w12) + self.b2
    values2z = sigmoid_np(values2a)

    #deactivate nodes
    values2z = np.multiply(values2z, active_hidden)


    #values of the output layer
    values3 = np.dot(values2z, self.w23) + self.b3
    values3 = softmax(values3)

    gradient_values3 = y - values3
    gradient_w23 = np.dot(values2z.T, gradient_values3)
    gradient_b3 = np.sum(gradient_values3, axis=0)

    #gradient after activation function
    gradient_values2 = np.dot(gradient_values3, self.w23.T)
    #drop deactivated nodes
    gradient_values2 = np.multiply(gradient_values2, active_hidden)
    #gradient before activation function
    gradient_values2 = np.multiply(sigmoid_deriv_np(values2a), gradient_values2)

    gradient_w12 = np.dot(x.T, gradient_values2)
    gradient_b2 = np.sum(gradient_values2, axis=0)


    self.w23 += self.learning_rate * gradient_w23
    self.b3 += self.learning_rate * gradient_b3

    self.w12 += self.learning_rate * gradient_w12
    self.b2 += self.learning_rate * gradient_b2


  def train(self, x, y, epochs=1, batch_size=100, p_on=0.5):

    while epochs > 0:
      epochs -= 1

      print(epochs, "epochs left")

      permutation = np.random.permutation(x.shape[0])
      x = x[permutation]
      y = y[permutation]

      for i in range(0, x.shape[0], batch_size):
        batch_x = x[i:i+batch_size]
        batch_y = y[i:i+batch_size]

        self.backpropagation(batch_x, batch_y, p_on)


  def accuracy(self, x, y):
    pred_y = self.forward(x)

    labels = np.argmax(y, axis=1)
    predictions = np.argmax(pred_y, axis=1)
    return np.mean(predictions == labels)

In [17]:
import time

model = NeuralNetwork(learning_rate=0.02)
print("before training", model.accuracy(train_X, train_Y), model.accuracy(test_X, test_Y))

time_start = time.time()
model.train(train_X, train_Y, epochs=20, p_on=0.8)
time_end = time.time()

print((time_end - time_start) / 60)

print("after training", model.accuracy(train_X, train_Y), model.accuracy(test_X, test_Y))



before training 0.09636666666666667 0.0949
19 epochs left
18 epochs left
17 epochs left
16 epochs left
15 epochs left
14 epochs left
13 epochs left
12 epochs left
11 epochs left
10 epochs left
9 epochs left
8 epochs left
7 epochs left
6 epochs left
5 epochs left
4 epochs left
3 epochs left
2 epochs left
1 epochs left
0 epochs left
4.97994441986084
after training 0.9801 0.9637
