In [13]:
import numpy as np

x_input = np.array([[1], [1]])  # [1, 1] input
W_hidden = np.array([[0.1, 0.1], [0.1, 0.2], [0.2, 0.1]])  # 3 neurons, 2 inputs
b_hidden = np.array([[0.1], [0], [0.1]])     # 3 biases
W_output = np.array([[1, 2, 2], [1, 2, 1], [1, 1, 1]])  # 3 neurons, 3 inputs from hidden layer
b_output = np.array([[1], [1], [1]])     # 3 biases

z_hidden = np.dot(W_hidden, x_input) + b_hidden
z_output = np.dot(W_output, z_hidden) + b_output
print(f"Hidden neurons: {z_hidden}\n")
print(f"Output neurons: {z_output}\n")

softmax_probs = np.exp(z_output) / np.sum(np.exp(z_output))
print(f"softmax_probs: {softmax_probs}")

p_true_class = softmax_probs[1]
cross_entropy_loss = -np.log(p_true_class)
print(f"cross_entropy_loss: {cross_entropy_loss}\n")

y_true = np.array([[0], [1], [0]])
grad_logits = softmax_probs - y_true
print(f"grad_logits: {grad_logits}\n")

grad_W_output = np.outer(grad_logits, z_hidden)  # Gradient w.r.t. W_output
grad_b_output = grad_logits                      # Gradient w.r.t. b_output
grad_hidden = np.dot(W_output.T, grad_logits)    # Gradient w.r.t. hidden layer z
grad_W_hidden = np.outer(grad_hidden, x_input)   # Gradient w.r.t. W_hidden
grad_b_hidden = grad_hidden                      # Gradient w.r.t. b_hidden

print("Gradientes direita:\n", grad_W_output)
print("Bias direita:\n", grad_b_output)
print("Gradientes esquerda:\n", grad_W_hidden)
print("Bias esquerda:\n", grad_b_hidden, "\n")


learning_rate = 0.1
W_new_direita = W_output - learning_rate * grad_W_output
B_new_direita = b_output - learning_rate * grad_b_output
W_new_esquerda = W_hidden - learning_rate * grad_W_hidden
b_new_esquerda = b_hidden - learning_rate * grad_b_hidden

print("Pesos novos direita:\n", W_new_direita)
print("Bias novos direita:\n", B_new_direita)
print("Pesos novos esquerda:\n", W_new_esquerda)
print("Bias novos esquerda:\n", b_new_esquerda, "\n")

Hidden neurons: [[0.3]
 [0.3]
 [0.4]]

Output neurons: [[2.7]
 [2.3]
 [2. ]]

softmax_probs: [[0.46148762]
 [0.3093444 ]
 [0.22916797]]
cross_entropy_loss: [1.17330004]

grad_logits: [[ 0.46148762]
 [-0.6906556 ]
 [ 0.22916797]]

Gradientes direita:
 [[ 0.13844629  0.13844629  0.18459505]
 [-0.20719668 -0.20719668 -0.27626224]
 [ 0.06875039  0.06875039  0.09166719]]
Bias direita:
 [[ 0.46148762]
 [-0.6906556 ]
 [ 0.22916797]]
Gradientes esquerda:
 [[ 5.55111512e-17  5.55111512e-17]
 [-2.29167972e-01 -2.29167972e-01]
 [ 4.61487623e-01  4.61487623e-01]]
Bias esquerda:
 [[ 5.55111512e-17]
 [-2.29167972e-01]
 [ 4.61487623e-01]] 

Pesos novos direita:
 [[0.98615537 1.98615537 1.9815405 ]
 [1.02071967 2.02071967 1.02762622]
 [0.99312496 0.99312496 0.99083328]]
Bias novos direita:
 [[0.95385124]
 [1.06906556]
 [0.9770832 ]]
Pesos novos esquerda:
 [[0.1        0.1       ]
 [0.1229168  0.2229168 ]
 [0.15385124 0.05385124]]
Bias novos esquerda:
 [[0.1       ]
 [0.0229168 ]
 [0.05385124]] 

