In [1]:
#Binary Classification
import numpy as np

def sigmoid(x):
    return 1/ (1+np.exp(-x))

def sigmoid_derivative(x):
    return x*(1-x)

def mean_square_error_loss(y_true,y_pred):
  return np.mean((y_true - y_pred)**2) # Use subtraction for MSE

input = np.array([[0,0],[0,1],[1,0],[1,1]]) # Correct the input array format
output = np.array([[0],[1],[1],[0]]) # Correct the output array format

input_size = 2
hidden_size = 2
output_size = 1

weights_input_hidden = np.random.rand(input_size,hidden_size)
bias_hidden = np.random.rand(hidden_size)
weights_hidden_output = np.random.randn(hidden_size,output_size)
bias_output = np.random.rand(output_size)

learning_rate = 0.1
epochs = 20000

for epoch in range(epochs):
  hidden_input = np.dot(input,weights_input_hidden)+bias_hidden
  hidden_output = sigmoid(hidden_input)

  final_input = np.dot(hidden_output,weights_hidden_output)+bias_output
  final_output = sigmoid(final_input)

  loss = mean_square_error_loss(output,final_output)

  error_output = final_output - output # Fix variable name 'outputs'
  gradient_output = error_output*sigmoid_derivative(final_output)

  error_hidden = np.dot(gradient_output,weights_hidden_output.T)
  gradient_hidden = error_hidden*sigmoid_derivative(hidden_output)

  weights_hidden_output -= learning_rate*np.dot(hidden_output.T,gradient_output)
  bias_output -= learning_rate*np.sum(gradient_output,axis=0)
  weights_input_hidden -= learning_rate*np.dot(input.T,gradient_hidden)
  bias_hidden -= learning_rate*np.sum(gradient_hidden,axis=0)

  if (epoch + 1)%2000 == 0: # Correct indentation
    print(f"Epoch:{epoch + 1},loss:{loss:.6f}")

results = [] # Fix variable name 'result'
for input_pair in input: # Fix variable name 'inputs', iterate over 'input'
  hidden_input = np.dot(input_pair,weights_input_hidden)+bias_hidden
  hidden_output = sigmoid(hidden_input)
  final_input = np.dot(hidden_output,weights_hidden_output)+bias_output
  final_output = sigmoid(final_input)
  results.append((input_pair,np.round(final_output[0],2))) # Append a tuple
print(results)

Epoch:2000,loss:0.188099
Epoch:4000,loss:0.138928
Epoch:6000,loss:0.131366
Epoch:8000,loss:0.128966
Epoch:10000,loss:0.127840
Epoch:12000,loss:0.127197
Epoch:14000,loss:0.126785
Epoch:16000,loss:0.126500
Epoch:18000,loss:0.126291
Epoch:20000,loss:0.126131
[(array([0, 0]), 0.03), (array([0, 1]), 0.5), (array([1, 0]), 0.97), (array([1, 1]), 0.5)]


In [2]:
# Multiclass Classification

import numpy as np

x = np.array([0.8, 0.6, 0.7])
y = np.array([0, 1, 0])
w1 = np.array([[0.2, 0.4, 0.1],
              [0.5, 0.3, 0.2],
              [0.3, 0.7, 0.8]])
w2 = np.array([[0.6, 0.4, 0.5],
              [0.1, 0.2, 0.3],
              [0.3, 0.7, 0.2]])
b1 = np.array([0.1, 0.2, 0.3])
b2 = np.array([0.1, 0.2, 0.3])
print(f"\nOld w1:\n{w1}")
print(f"\nOld b1:\n{b1}")
print(f"\nOld w2:\n{w2}")
print(f"\nOld b2:\n{b2}")

# alpha
learning_rate = 0.01
epochs = 1000

def relu(x):
  return np.maximum(0,x)

def relu_derivative(x):
  return 1*(x>0)

def softmax(x):
  e_x = np.exp(x - np.max(x))
  return e_x / e_x.sum()

# Loss = summation(y_true * log(y_pred))
def lossFunction(y_true,y_pred):
  return -np.sum(y_true*np.log(y_pred))

for epoch in range(epochs):

  # Forward propagation

  # h1 = w1(x)+b1
  h1 = np.dot(x, w1)+b1
  # a1 = relu(h1)
  a1 = relu(h1)

  # h2 = w2(a1)+b2
  h2 = np.dot(a1, w2)+b2
  # a2 = softmax(h2)
  a2 = softmax(h2)

  loss = lossFunction(y, a2)

  # Error = (y_pred - y)
  error = a2 - y


Old w1:
[[0.2 0.4 0.1]
 [0.5 0.3 0.2]
 [0.3 0.7 0.8]]

Old b1:
[0.1 0.2 0.3]

Old w2:
[[0.6 0.4 0.5]
 [0.1 0.2 0.3]
 [0.3 0.7 0.2]]

Old b2:
[0.1 0.2 0.3]


In [3]:

  # Backward propagation

  # d(L)/d(w2) = Error * a1
  d_L_d_w2 = error.dot(a1.T)

  # d(L)/d(b2) = Error
  d_L_d_b2 = error

  d_L_d_hidden = np.dot(error, w2.T)
  d_hidden_d_input = relu_derivative(h1)

  # d(L)/d(w1) = Error * w2 * h1 * x
  d_L_d_w1 = np.dot(x.T, d_hidden_d_input * d_L_d_hidden)

  # d(L)/d(b1) = Error * w2 * h1
  d_L_d_b1 = d_hidden_d_input * d_L_d_hidden


  # Update weights
  w2 -= learning_rate * d_L_d_w2
  b2 -= learning_rate * d_L_d_b2

  w1 -= learning_rate *d_L_d_w1
  b1 -= learning_rate * d_L_d_b1

  if (epoch + 1) % 1000 == 0:
    print(f"Epoch:{epoch + 1}, Loss: {loss:.4f}")


# Print final outputs
print("\nFinal outputs (a2):\n", a2)
print("Final Loss:\n", loss)

#results = []
# for input_pair in inputs:
#           hidden_input = np.dot(input_pair, weights_input_hidden)+bias_hidden
#           hidden_output = sigmoid(hidden_input)
#           final_input = np.dot(hidden_output, weights_hidden_output)+bias_output
#           final_output = sigmoid(final_input)
#           results.append((input_pair,np.round(final_output[0],2)))
# print(results)

print(f"\nUpdated w1:\n{w1}")
print(f"\nUpdated b1:\n{b1}")
print(f"\nUpdated w2:\n{w2}")
print(f"\nUpdated b2:\n{b2}")
print(f"\nOutput (a2) after epoch {epoch + 1}:\n{a2}")


Epoch:1000, Loss: 0.8774

Final outputs (a2):
 [0.25502746 0.41586939 0.32910315]
Final Loss:
 0.8773840448515435

Updated w1:
[[0.20115017 0.40115017 0.10115017]
 [0.50115017 0.30115017 0.20115017]
 [0.30115017 0.70115017 0.80115017]]

Updated b1:
[0.09916084 0.19992592 0.30266563]

Updated w2:
[[0.60149895 0.40149895 0.50149895]
 [0.10149895 0.20149895 0.30149895]
 [0.30149895 0.70149895 0.20149895]]

Updated b2:
[0.09744973 0.20584131 0.29670897]

Output (a2) after epoch 1000:
[0.25502746 0.41586939 0.32910315]
