In [15]:
# Multiclass Classification

import numpy as np

x = np.array([0.8, 0.6, 0.7])
y = np.array([0, 1, 0])
w1 = np.array([[0.2, 0.4, 0.1],
              [0.5, 0.3, 0.2],
              [0.3, 0.7, 0.8]])
w2 = np.array([[0.6, 0.4, 0.5],
              [0.1, 0.2, 0.3],
              [0.3, 0.7, 0.2]])
b1 = np.array([0.1, 0.2, 0.3])
b2 = np.array([0.1, 0.2, 0.3])
print(f"\nOld weights (w1):\n{w1}")
print(f"\nOld biases (b1):\n{b1}")
print(f"\nOld weights (w2):\n{w2}")
print(f"\nold biases (b2):\n{b2}")

# alpha
learning_rate = 0.01
epochs = 10000

def relu(x):
  return np.maximum(0,x)

def relu_derivative(x):
  return 1*(x>0)

def softmax(A):
    exps = np.exp(A - np.max(A, axis=-1, keepdims=True))
    return exps / np.sum(exps, axis=-1, keepdims=True)

# Loss = summation(y_true * log(y_pred))
def lossFunction(y_true,y_pred):
  return -np.sum(y_true*np.log(y_pred))

for epoch in range(epochs):

  # Forward propagation

  # h1 = w1(x)+b1
  h1 = np.dot(x, w1)+b1
  # a1 = sig(h1)
  a1 = relu(h1)

  # h2 = w2(a1)+b2
  h2 = np.dot(a1, w2)+b2
  # a2 = softmax(h2)
  a2 = softmax(h2)

  # L = (h2-y)^2
  loss = lossFunction(y, a2)
  #error = a2 - y
  #Error = (a2 - y)
  error = a2 - y

print(error)
print(a2)
print(loss)



Old weights (w1):
[[0.2 0.4 0.1]
 [0.5 0.3 0.2]
 [0.3 0.7 0.8]]

Old biases (b1):
[0.1 0.2 0.3]

Old weights (w2):
[[0.6 0.4 0.5]
 [0.1 0.2 0.3]
 [0.3 0.7 0.2]]

old biases (b2):
[0.1 0.2 0.3]
[ 0.25502746 -0.58413061  0.32910315]
[0.25502746 0.41586939 0.32910315]
0.8773840448515435


In [16]:
  # d(L)/d(w2) = Error * a1
  d_L_d_w2 = error.dot(a1.T)

  # d(L)/d(b2) = Error
  d_L_d_b2 = np.mean(error, axis=0)

  d_L_d_hidden = np.dot(error, w2.T)
  d_hidden_d_input = relu_derivative(h1)

  # d(L)/d(w1) = Error * w2 * x
  d_L_d_w1 = np.dot(x.T, d_hidden_d_input * d_L_d_hidden)

  # d(L)/d(b1) = Error * w2
  d_L_d_b1 = np.mean(d_hidden_d_input * d_L_d_hidden, axis=0)


  # Update weights
  w2 -= learning_rate * d_L_d_w2
  b2 -= learning_rate * d_L_d_b2

  w1 -= learning_rate *d_L_d_w1
  b1 -= learning_rate * d_L_d_b1

  if (epoch + 1) % 1000 == 0:
    print(f"Epoch:{epoch + 1}, Loss: {loss:.4f}")


# Print final outputs
print("\nFinal outputs (a2):\n", a2)
print("Final Loss:\n", loss)

results = []
# for input_pair in inputs:
#           hidden_input = np.dot(input_pair, weights_input_hidden)+bias_hidden
#           hidden_output = sigmoid(hidden_input)
#           final_input = np.dot(hidden_output, weights_hidden_output)+bias_output
#           final_output = sigmoid(final_input)
#           results.append((input_pair,np.round(final_output[0],2)))
# print(results)

print(f"\nUpdated weights (w1):\n{w1}")
print(f"\nUpdated biases (b1):\n{b1}")
print(f"\nUpdated weights (w2):\n{w2}")
print(f"\nUpdated biases (b2):\n{b2}")
print(f"\nOutput (a2) after epoch {epoch + 1}:\n{a2}")


Epoch:10000, Loss: 0.8774

Final outputs (a2):
 [0.25502746 0.41586939 0.32910315]
Final Loss:
 0.8773840448515435

Updated weights (w1):
[[0.20115017 0.40115017 0.10115017]
 [0.50115017 0.30115017 0.20115017]
 [0.30115017 0.70115017 0.80115017]]

Updated biases (b1):
[0.10058413 0.20058413 0.30058413]

Updated weights (w2):
[[0.60149895 0.40149895 0.50149895]
 [0.10149895 0.20149895 0.30149895]
 [0.30149895 0.70149895 0.20149895]]

Updated biases (b2):
[0.1 0.2 0.3]

Output (a2) after epoch 10000:
[0.25502746 0.41586939 0.32910315]
