In [10]:
import numpy as np

In [11]:
def relu(z):
  return np.maximum(0, z)

def forward(w, x, b):
  return np.dot(w, x) + b

def cross_entropy(y_pred, y):
  epsilon = 1e-12 # Evita log(0)
  return -np.sum(y * np.log(y_pred + epsilon))

def softmax(z):
  return np.exp(z) / np.sum(np.exp(z))

def relu_derivada(z):
  return (z > 0).astype(float)

In [12]:
def fit(x, y, W1, b1, W2, b2):
  # Primer paso del forward (hidden layer)
  z1 = forward(W1, x, b1)
  a1 = relu(z1)

  # Segundo y último paso del forward (output layer)
  z2 = forward(W2, a1, b2)
  y_pred = softmax(z2)

  # Propagación retrógrada (gradiente de la salida)
  dz2 = y_pred - y
  dW2 = np.dot(dz2, a1.T)
  db2 = dz2

  # Propagación retrógrada (gradiente de la hidden layer)
  da1 = np.dot(W2.T, dz2)
  dz1 = da1 * relu_derivada(z1)
  dW1 = np.dot(dz1, x.T)
  db1 = dz1

  return dW1, db1, dW2, db2, y_pred

In [13]:
import tensorflow as tf

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() # MNIST

In [14]:
# Normalización: de [0,255] a [0,1]
x_train = x_train.astype(np.float32) / 255.0
x_test = x_test.astype(np.float32) / 255.0

In [15]:
# One-hot encoding para las etiquetas
y_train = np.eye(10)[y_train]  # (60000, 10)
y_test = np.eye(10)[y_test]

In [16]:
ciclos = 5
learning_rate = 0.01

input_size = 784
hidden_size = 128
output_size = 10

In [17]:
# Inicialización de pesos y sesgos
np.random.seed(0)
W1 = np.random.randn(hidden_size, input_size) * np.sqrt(2. / input_size)
W2 = np.random.randn(output_size, hidden_size) * np.sqrt(2. / hidden_size)
b1 = np.zeros((hidden_size, 1))
b2 = np.zeros((output_size, 1))

In [18]:
for ciclo in range(ciclos):
  total_loss = 0

  for i in range(len(x_train)):

    x = x_train[i].reshape(-1, 1)  # Convertir (28,28) a (784,1)
    y = y_train[i].reshape(-1, 1) # Vector columna

    dW1, db1, dW2, db2, y_pred = fit(x, y, W1, b1, W2, b2)

    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2

    total_loss += cross_entropy(y_pred, y)
  avg_loss = total_loss / len(x_train)
  print(f"Época {ciclo + 1}/{ciclos} - Pérdida promedio: {avg_loss:.4f}")

Época 1/5 - Pérdida promedio: 0.2115
Época 2/5 - Pérdida promedio: 0.0968
Época 3/5 - Pérdida promedio: 0.0658
Época 4/5 - Pérdida promedio: 0.0482
Época 5/5 - Pérdida promedio: 0.0365


In [19]:
correct = 0
for i in range(len(x_test)):
    x = x_test[i].reshape(-1, 1) # Vector columna
    y = y_test[i]

    # Forward
    z1 = np.dot(W1, x) + b1
    a1 = relu(z1)
    z2 = np.dot(W2, a1) + b2
    y_pred = softmax(z2)

    if np.argmax(y_pred) == np.argmax(y):
        correct += 1

accuracy = correct / len(x_test)
print(f"Precisión en test: {accuracy * 100:.2f}%")

Precisión en test: 97.08%
