In [1]:
import numpy as np

In [2]:
def activation(x,type):

  if type == 'sigmoid':
    return 1/(1 + np.exp(-x))

  elif type == 'tanh':
    return (np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x))

  elif type == 'ReLU':
    return np.maximum(0,x)

In [3]:
import numpy as np

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

In [4]:
x = np.array([1, 2, 3, 4])
x = np.reshape(x,(4,1))
print(softmax(x))

[[0.0320586 ]
 [0.08714432]
 [0.23688282]
 [0.64391426]]


In [5]:
class Neural_Network():


  #INITIALISING FUNCTION
  def __init__(self,input_nodes,layers,output_nodes):
    self.input_nodes = input_nodes  #dimension of the input vector
    self.layers = layers   #number of layers that we want in netork
    self.output_nodes = output_nodes  #number of output classes

    #convert to small_named variables for comfort
    self.n = self.input_nodes
    self.L = self.layers
    self.k = self.output_nodes

    self.W = []  # W will be a list of matrices of size nxn
    self.B = []  # B will be a list of matrices of size 1xn

    #weights & biases initialisation for layers 1 to L-1
    for i in range(0, self.L-1):
        W_i = np.random.rand(self.n, self.n)/np.sqrt(self.n) # creates nxn matrix
        B_i = np.random.rand(1, self.n)  # creates 1xn matrix
        self.W.append(W_i)
        self.B.append(B_i)

    #weights & biases initialisation for last layer
    W_i = np.random.rand(self.n,self.k)/np.sqrt(self.n) # creates nxk matrix
    B_i = np.random.rand(1,self.k) # creates 1xk matrix
    self.W.append(W_i)
    self.B.append(B_i)

  #FORWARD PROPAGATION THROUGH THE LAYERS
  def forward(self,X):
    Y_hat = []
    # Initialize lists A and H dynamically based on the value of L
    A = [[] for _ in range(self.layers)]
    H = [[] for _ in range(self.layers)]

    for x in X:


      #input layer
      a0 = np.dot(self.W[0],x.T)  + self.B[0].T   #a1 = W1.x + b1
      A[0].append(a0)
      h0 = activation(a0,type='sigmoid')  #h1 = activation(a1)
      H[0].append(h0)

      #middle layers
      for i in range(1,self.L-1):
          # Access the value in H[i-1] corresponding to the current x
          h_prev = H[i - 1][-1]  # Get the last element added to H[i-1]
          a = np.dot(self.W[i], h_prev) + self.B[i].T  # a = Wi.h(i-1) + Bi
          A[i].append(a)
          h = activation(a, type='sigmoid')
          H[i].append(h)

      #output layer
      a_L = np.dot(self.W[self.L-1].T,H[self.L-2][-1]) + self.B[self.L-1].T
      A[self.L-1].append(a_L)
      y_hat = softmax(a_L)
      Y_hat.append(y_hat)

    return Y_hat, A, H    #each y_hat is a kx1 matrix



  #BACKPROPAGATION THROUGH THE LAYERS
  def backward(self,X,Y,lr,max_iter):

    for i in range(max_iter):
      print(f"Iteration: {i}")
      dW = []
      dB = []
      dA = [[] for _ in range(self.layers)]
      dH = [[] for _ in range(self.layers)]

      for i in range(0, self.L-1):
        dW_i = np.zeros((self.n, self.n))  # creates nxn matrix
        dB_i = np.zeros((1, self.n))  # creates 1xn matrix
        dW.append(dW_i)
        dB.append(dB_i)

      dW_i = np.zeros((self.n, self.k))  # creates nxk matrix for last layer
      dB_i = np.zeros((1, self.k))  # creates 1xk matrix for last layer
      dW.append(dW_i)
      dB.append(dB_i)

      Y_hat, A, H  = self.forward(X)

      for x, y, y_hat in zip(X, Y, Y_hat):

        #backpropagate through output layer
        grad_A_L_minus_1 = (y_hat - y)
        dA[self.L-1].append(grad_A_L_minus_1)

        #backpropagate through middle layers
        for j in range(self.L-1, 0, -1):
          # dw = np.dot(dA[j][-1], H[j-1][-1].T)
          # dw = np.dot(dA[j][-1], H[j-1][-1])
          dw = np.dot(H[j-1][-1],dA[j][-1].T)
          dW[j] += dw
          dB[j] += dA[j][-1].T
          grad_H_j_minus_1 = np.dot(self.W[j], dA[j][-1])
          dH[j-1].append(grad_H_j_minus_1)
          grad_A_j_minus_1 = grad_H_j_minus_1 * (activation(A[j-1][-1], type='sigmoid') * (1 - activation(A[j-1][-1], type='sigmoid')))
          dA[j-1].append(grad_A_j_minus_1)

        #backpropagate through input layer
        dw = np.dot(grad_A_j_minus_1, x)
        dW[0] += dw
        dB[0] += grad_A_j_minus_1.T

      #update the weights and biases
      for j in range(self.L):
        self.W[j] -= lr * dW[j]
        self.B[j] -= lr * dB[j]

    return self.W,self.B



In [6]:
model = Neural_Network(5, 3, 4)

In [7]:
for w in model.W:
  print(w.shape)

(5, 5)
(5, 5)
(5, 4)


In [8]:
for b in model.B:
  print(b.shape)

(1, 5)
(1, 5)
(1, 4)


In [9]:
# Define the dimensions
m = 10  # number of data points
n = 5   # dimension of input vectors
k = 4   # dimension of output vectors

# Prepare X list with random values
X = [np.random.rand(1, n) for _ in range(m)]  # Change this line

# Prepare Y list with one-hot encoding
Y = []
for _ in range(m):
    y = np.zeros((k, 1))  # Change this line
    index = np.random.randint(0, k)
    y[index, 0] = 1
    Y.append(y)



In [10]:
for x in X:
  print(x.shape)

(1, 5)
(1, 5)
(1, 5)
(1, 5)
(1, 5)
(1, 5)
(1, 5)
(1, 5)
(1, 5)
(1, 5)


In [11]:
for y in Y:
  print(y.shape)

(4, 1)
(4, 1)
(4, 1)
(4, 1)
(4, 1)
(4, 1)
(4, 1)
(4, 1)
(4, 1)
(4, 1)


In [12]:
# Print X and Y for verification
print("X:")
for x in X:
    print(x)

print("\nY:")
for y in Y:
    print(y)

X:
[[0.83540027 0.98790886 0.35249253 0.52903989 0.26622984]]
[[0.65888177 0.6920244  0.67041196 0.60053716 0.17148742]]
[[0.1765761  0.54905878 0.35372272 0.293912   0.56889457]]
[[0.13615811 0.17715232 0.78572611 0.11217067 0.76849085]]
[[0.66534995 0.87819419 0.57387038 0.45826797 0.17966043]]
[[0.43590001 0.38706228 0.01796358 0.46848686 0.35800629]]
[[0.4265563  0.66600119 0.09332317 0.48203121 0.02181804]]
[[0.63559331 0.82699398 0.92358515 0.17818485 0.83126402]]
[[0.13081486 0.30320537 0.9574772  0.66262477 0.90789369]]
[[0.65900853 0.13923932 0.77301342 0.99977872 0.54865561]]

Y:
[[1.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [1.]]
[[0.]
 [0.]
 [0.]
 [1.]]
[[0.]
 [1.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [1.]]
[[0.]
 [0.]
 [0.]
 [1.]]
[[0.]
 [1.]
 [0.]
 [0.]]
[[0.]
 [0.]
 [0.]
 [1.]]
[[1.]
 [0.]
 [0.]
 [0.]]
[[0.]
 [1.]
 [0.]
 [0.]]


In [13]:
Y_pred, A1, H1 = model.forward(X)
print("Y_pred:")
for y_pred in Y_pred:
    print(y_pred)

Y_pred:
[[0.24790788]
 [0.20198106]
 [0.23489555]
 [0.3152155 ]]
[[0.24787174]
 [0.20200263]
 [0.23491931]
 [0.31520632]]
[[0.24803052]
 [0.20213385]
 [0.23462348]
 [0.31521215]]
[[0.24787193]
 [0.20217422]
 [0.23481256]
 [0.31514129]]
[[0.24789646]
 [0.20200246]
 [0.23488936]
 [0.31521172]]
[[0.24824686]
 [0.20214184]
 [0.23430712]
 [0.31530419]]
[[0.24827781]
 [0.202094  ]
 [0.23428774]
 [0.31534045]]
[[0.24758216]
 [0.20201491]
 [0.23533756]
 [0.31506537]]
[[0.24763764]
 [0.20206397]
 [0.23522332]
 [0.31507508]]
[[0.24774453]
 [0.20201481]
 [0.23509262]
 [0.31514804]]


In [14]:
# Convert Y and Y_hat from lists to numpy arrays
Y = np.array(Y)
Y_hat = np.array(Y_pred)

# Compute the loss using -Y * log(Y_hat)
loss = -Y * np.log(Y_hat)

# Calculate the mean loss
mean_loss = np.mean(loss)

print("Mean loss:", mean_loss)

Mean loss: 0.33400723427829193


In [15]:
for y_pred in Y_pred:
  print(np.sum(y_pred))

1.0
1.0
1.0
1.0
0.9999999999999999
1.0
1.0
1.0
0.9999999999999999
1.0


In [16]:
for i in model.W:
  print(i.shape)

(5, 5)
(5, 5)
(5, 4)


In [17]:
for i in model.B:
  print(i.shape)

(1, 5)
(1, 5)
(1, 4)


In [22]:
lr = 0.01
max_iter = 100
model.backward(X,Y,lr,max_iter)

Iteration: 0
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9
Iteration: 10
Iteration: 11
Iteration: 12
Iteration: 13
Iteration: 14
Iteration: 15
Iteration: 16
Iteration: 17
Iteration: 18
Iteration: 19
Iteration: 20
Iteration: 21
Iteration: 22
Iteration: 23
Iteration: 24
Iteration: 25
Iteration: 26
Iteration: 27
Iteration: 28
Iteration: 29
Iteration: 30
Iteration: 31
Iteration: 32
Iteration: 33
Iteration: 34
Iteration: 35
Iteration: 36
Iteration: 37
Iteration: 38
Iteration: 39
Iteration: 40
Iteration: 41
Iteration: 42
Iteration: 43
Iteration: 44
Iteration: 45
Iteration: 46
Iteration: 47
Iteration: 48
Iteration: 49
Iteration: 50
Iteration: 51
Iteration: 52
Iteration: 53
Iteration: 54
Iteration: 55
Iteration: 56
Iteration: 57
Iteration: 58
Iteration: 59
Iteration: 60
Iteration: 61
Iteration: 62
Iteration: 63
Iteration: 64
Iteration: 65
Iteration: 66
Iteration: 67
Iteration: 68
Iteration: 69
Iteration: 70
Iteration: 71
It

([array([[0.39540001, 0.28638299, 0.37015259, 0.17353702, 0.28080354],
         [0.03157553, 0.03968496, 0.19541294, 0.31352642, 0.23864056],
         [0.07633042, 0.3314542 , 0.25604676, 0.38146242, 0.1382166 ],
         [0.09454837, 0.16324795, 0.41935755, 0.01732612, 0.32372174],
         [0.07765604, 0.28472473, 0.13219726, 0.02414974, 0.44348914]]),
  array([[0.0296419 , 0.3166244 , 0.04556888, 0.41921947, 0.0976139 ],
         [0.17843047, 0.27801242, 0.39837184, 0.33284873, 0.23595605],
         [0.27629108, 0.09276567, 0.42218493, 0.34138581, 0.03001548],
         [0.08027111, 0.4662519 , 0.37524392, 0.0836895 , 0.33887921],
         [0.11584244, 0.3602577 , 0.41393688, 0.2831725 , 0.13403562]]),
  array([[ 0.12629952,  0.62337349, -0.12584381,  0.37727448],
         [ 0.30026364,  0.42367905, -0.32811272,  0.30126615],
         [ 0.17951591,  0.25449799, -0.12080963,  0.43166108],
         [ 0.22550803,  0.31864396, -0.40051052,  0.35284575],
         [ 0.26331982,  0.27377844

In [23]:
pred,A1,H1 = model.forward(X)

In [24]:
print(pred)

[array([[0.19699723],
       [0.2972117 ],
       [0.00830176],
       [0.49748932]]), array([[0.19697306],
       [0.2972438 ],
       [0.00830229],
       [0.49748086]]), array([[0.19746006],
       [0.29718606],
       [0.00841792],
       [0.49693596]]), array([[0.19715333],
       [0.29736897],
       [0.00836041],
       [0.4971173 ]]), array([[0.1970311 ],
       [0.29722047],
       [0.00831466],
       [0.49743377]]), array([[0.19803234],
       [0.29693955],
       [0.00854568],
       [0.49648243]]), array([[0.19809738],
       [0.29685357],
       [0.00855771],
       [0.49649134]]), array([[0.19621605],
       [0.29759881],
       [0.00813934],
       [0.4980458 ]]), array([[0.19635626],
       [0.29758764],
       [0.00816907],
       [0.49788702]]), array([[0.19660992],
       [0.29741435],
       [0.00822067],
       [0.49775506]])]


In [25]:
# Convert Y and Y_hat from lists to numpy arrays
Y = np.array(Y)
pred = np.array(pred)

# Compute the loss using -Y * log(Y_hat)
loss = -Y * np.log(pred)

# Calculate the mean loss
mean_loss = np.mean(loss)

print("Mean loss:", mean_loss)

Mean loss: 0.25963431799549147
