In [1]:
import numpy as np

class nn:
  # layerDim = []
  # warr = []
  # barr = []
  # activations = []
  # cachesZ = []
  # cacheA = []
  # layer = 0

  def __init__ (self, layerDim, activations):
    self.layerDim = layerDim
    self.activations = activations
    self.layer = len(layerDim)
    self.warr = []
    self.barr = []
    self.cachesZ = []
    self.cacheA = []
    self.initParams()

  # def __str__ (self):
  #   return str(self.layerDim) + " " + str(self.activations)

  def initParams (self):
    for i in range(1, self.layer):
        if self.activations[i-1] == 'relu':
            self.warr.append(np.random.randn(self.layerDim[i], self.layerDim[i-1]) * np.sqrt(2. / self.layerDim[i-1]))
        else:  # for sigmoid or tanh
            self.warr.append(np.random.randn(self.layerDim[i], self.layerDim[i-1]) * np.sqrt(1. / self.layerDim[i-1]))
        self.barr.append(np.zeros((self.layerDim[i], 1)))
  
  def sigmoid (self, Z):
    Z = np.clip(Z, -500, 500) 
    return 1/(1+np.exp(-Z))
  
  def relu (self, Z):
    return np.maximum(0, Z)
  
  def tanh (self, Z):
    return np.tanh(Z)
  
  def activationsfunc (self, Z, activation):
    if activation == 'sigmoid':
      return self.sigmoid(Z)
    elif activation == 'relu':
      return self.relu(Z)
    elif activation == 'tanh':
      return self.tanh(Z)
    else:
      return Z
  
  def activationsDerivative(self, Z, activation):
    if activation == 'sigmoid':
      sig = self.sigmoid(Z)
      return sig * (1 - sig)
    elif activation == 'relu':
      return (Z > 0).astype(Z.dtype)
    elif activation == 'tanh':
      return 1 - np.power(self.tanh(Z), 2)
    else:
      return 1
  
  def singleForward (self, A_prev, W, b, activation):
    Z = W @ A_prev + b
    # A = Z
    A = self.activationsfunc(Z, activation)

    self.cachesZ.append(Z)
    self.cacheA.append(A)

    return A

  def forwardProp (self, X):
    A = X
    self.cachesZ = []
    self.cacheA = []
    self.cacheA.append(A)

    for i in range(self.layer-1):
      A = self.singleForward(A, self.warr[i], self.barr[i], self.activations[i])
    
    return A
  
  def loss (self, Y, A):
    m = Y.shape[1]
    A = np.clip(A, 1e-10, 1 - 1e-10) 
    return - np.sum(Y * np.log(A)) / m
  
  def lossDerivative (self, Y, A):
    m = Y.shape[1]
    return A - Y
  
  def singleBackward (self, dA, W, b, Z, A_prev, activation):
    m = A_prev.shape[1]

    dZ = dA * self.activationsDerivative(Z, activation)
    dW = 1/m * (dZ @ A_prev.T)
    db = 1/m * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = W.T @ dZ

    return dA_prev, dW, db
  
  def backwardProp (self, Y, A, learning_rate):
    m = Y.shape[1]
    dA = self.lossDerivative(Y, A)

    for i in range(self.layer-1, 0, -1):
      dA, dW, db = self.singleBackward(dA, self.warr[i-1], self.barr[i-1], self.cachesZ[i-1], self.cacheA[i-1], self.activations[i-1])
      self.warr[i-1] -= learning_rate * dW
      self.barr[i-1] -= learning_rate * db

  def train (self, X, Y, learning_rate, iterations, print_loss=False):
    for i in range(iterations):
      A = self.predict(X)
      # print("forwardProp done", A)
      self.backwardProp(Y, A, learning_rate)
      if i % 100 == 0 and print_loss:
        print(f'Loss after {i} iterations: {self.loss(Y, A)}')

  def softmax (self, Z):
    expZ = np.exp(Z - np.max(Z))
    return expZ / expZ.sum(axis=0, keepdims=True)
  
  def predict (self, X):
    return self.softmax(self.forwardProp(X))
  
  def accuracy (self, X, Y):
      A = self.predict(X)
      return np.mean(np.argmax(Y, axis=0) == np.argmax(A, axis=0))

  def precision (self, X, Y):
      A = self.predict(X)
      A = (A == A.max(axis=0, keepdims=1)).astype(int)
      true_positive = np.sum((Y == 1) & (A == 1))
      predicted_positive = np.sum(A == 1)
      return true_positive / predicted_positive if predicted_positive > 0 else 0
  
      

In [2]:
import cv2
import tensorflow as tf
import matplotlib.pyplot as plt

%matplotlib inline

In [3]:
mnist = tf.keras.datasets.mnist
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()

Y_train_mod = np.zeros((10, Y_train.shape[0]))

for i in range(Y_train.shape[0]):
  Y_train_mod[Y_train[i], i] = 1

Y_test_mod = np.zeros((10, Y_test.shape[0]))

for i in range(Y_test.shape[0]):
  Y_test_mod[Y_test[i], i] = 1

X_train_mod = X_train.reshape(X_train.shape[0], -1).T / 255
X_test_mod = X_test.reshape(X_test.shape[0], -1).T / 255

print("X shape:", X_train.shape)

X shape: (60000, 28, 28)


In [4]:
model = nn([28* 28, 128, 64, 10], ['relu', 'relu', ''])
model.train(X_train_mod, Y_train_mod, 0.1, 1000, True)

print(model.predict(X_train_mod[:, 0:1]))
print(Y_train_mod[:, 0:1])

Loss after 0 iterations: 2.3215299556773163
Loss after 100 iterations: 0.42139840993879174
Loss after 200 iterations: 0.32535695949093707
Loss after 300 iterations: 0.2845333127264297
Loss after 400 iterations: 0.25629660240643926
Loss after 500 iterations: 0.23370530404867382
Loss after 600 iterations: 0.2146618997189257
Loss after 700 iterations: 0.19820013372335835
Loss after 800 iterations: 0.1839268663914595
Loss after 900 iterations: 0.17148610487039417
[[2.29575172e-04]
 [4.59109633e-06]
 [1.10395291e-03]
 [1.24591314e-01]
 [2.74319382e-07]
 [8.73777113e-01]
 [5.15325900e-07]
 [1.29430721e-04]
 [9.78447375e-05]
 [6.53879835e-05]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]]


In [5]:
print("Train accuracy:", model.accuracy(X_train_mod, Y_train_mod))
print("Test accuracy:", model.accuracy(X_test_mod, Y_test_mod))

print("Train precision:", model.precision(X_train_mod, Y_train_mod))
print("Test precision:", model.precision(X_test_mod, Y_test_mod))

Train accuracy: 0.95465
Test accuracy: 0.9506
Train precision: 0.95465
Test precision: 0.9506


In [6]:
print(model.predict(X_train_mod[:, 0:1]))
print(Y_train_mod[:, 0:1])

[[2.29575172e-04]
 [4.59109633e-06]
 [1.10395291e-03]
 [1.24591314e-01]
 [2.74319382e-07]
 [8.73777113e-01]
 [5.15325900e-07]
 [1.29430721e-04]
 [9.78447375e-05]
 [6.53879835e-05]]
[[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]]


In [7]:
p = model.predict(X_train_mod[:, 0:1])
i, j = np.unravel_index(p.argmax(), p.shape)
print(i)
print(Y_train[0])

5
5
