In [1]:
import numpy as np

Funciones de activación

In [2]:
activation = lambda h: np.tanh(h)
derivate_act = lambda h: 1 - np.tanh(h)**2

In [3]:
# entradas de la compuerta
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
# salidas
Y = np.array([[0], [1], [1], [0]])

# Semilla aleatoria para reproducibilidad
np.random.seed(42)

hidden_dim = 1
input_dim = 2

# Pesos entre capa de entrada y capa oculta (2x1)
weights_input_hidden = np.random.rand(input_dim, hidden_dim) 
# Pesos entre capa oculta y capa de salida (3x1)
weights_hidden_output = np.random.rand(hidden_dim + input_dim, 1)

b0 = np.random.rand(hidden_dim)
b1 = np.random.rand(1)

# Definimos la tasa de aprendizaje
learning_rate = 0.1

In [4]:
def NN(x, w_ih, w_ho, b1, b0):#+
    # Vj = np.zeros((X.shape[0], w_ih.shape[1]))
    oi = np.zeros((X.shape[0], w_ho.shape[1]))

    for mu in range(X.shape[0]):    # itero en todos los ejemplos
        # input -> hidden
        Vj = activation(np.dot(X[mu], w_ih) + b0)
        # print(Vj[mu].shape, x[mu].shape)
        # concateno la salida de la oculta con las entradas
        concatenated_input = np.concatenate((x[mu], Vj))
        # hidden -> output
        oi[mu] = activation(np.dot(concatenated_input, w_ho) + b1)
    
    return oi

In [19]:
epochs = 10000


for i in range(epochs):
    # Initialize gradients
    grad_w_0_1 = np.zeros_like(weights_input_hidden)
    grad_w_1_2 = np.zeros_like(weights_hidden_output)
    grad_b0 = np.zeros_like(b0)
    grad_b1 = np.zeros_like(b1)

    # Forward projection for all examples
    hj = np.dot(X, weights_input_hidden) + b0  # Shape: (num_samples, hidden_dim)
    Vj = activation(hj)  # Apply activation function element-wise

    concatenate_input = np.concatenate((X, Vj), axis=1)  # Shape: (num_samples, input_dim + hidden_dim)

    hi = np.dot(concatenate_input, weights_hidden_output) + b1  # Shape: (num_samples, output_dim)
    oi = activation(hi)  # Apply activation function element-wise

    # Backpropagation
    # hidden -> output
    delta_i_mu = derivate_act(hi) * (Y - oi)  # Shape: (num_samples, output_dim)
    grad_w_1_2 += np.dot(concatenate_input.T, delta_i_mu)  # Update weights (input_dim + hidden_dim, output_dim)
    grad_b1 += delta_i_mu.sum(axis=0)  # Sum over the samples

    # input -> hidden
    delta_j_mu = derivate_act(hj) * np.dot(delta_i_mu, weights_hidden_output[input_dim:].T)  # Shape: (num_samples, hidden_dim)
    grad_w_0_1 += np.dot(X.T, delta_j_mu)  # Update weights (input_dim, hidden_dim)
    grad_b0 += delta_j_mu.sum(axis=0)  # Sum over the samples

    # Actualizo los pesos y los bias
    weights_input_hidden += learning_rate * grad_w_0_1
    weights_hidden_output += learning_rate * grad_w_1_2
    b0 += learning_rate * grad_b0
    b1 += learning_rate * grad_b1

    # # Reset gradients for the next epoch
    # grad_w_0_1.fill(0)
    # grad_w_1_2.fill(0)
    # grad_b0.fill(0)
    # grad_b1.fill(0)


In [103]:
epochs = 1000

for i in range(epochs):
    grad_w_0_1 = np.zeros_like(weights_input_hidden)
    grad_w_1_2 = np.zeros_like(weights_hidden_output)
    grad_b0 = np.zeros_like(b0)
    grad_b1 = np.zeros_like(b1)
    
    for mu, x in enumerate(X):  # Itero en los ejemplos
        # Forward projection
        hj = np.dot(x, weights_input_hidden) + b0
        Vj = activation(hj)

        concatenate_input = np.concatenate((x, Vj))

        # print('concatenate_input shape ', concatenate_input.shape)

        hi = np.dot(concatenate_input, weights_hidden_output) + b1
        oi = activation(hi)

        # Backpropagation
        # hidden -> output
        delta_i_mu = derivate_act(hi) * (Y[mu] - oi)
        grad_w_1_2 += np.outer(concatenate_input, delta_i_mu)
        grad_b1 += delta_i_mu.flatten()

        # input -> hidden
        delta_j_mu = derivate_act(hj) * np.dot(weights_hidden_output[input_dim:], delta_i_mu)
        grad_w_0_1 += np.outer(x, delta_j_mu)
        grad_b0 += delta_j_mu.flatten()

    # Actualizo los pesos y los bias
    weights_input_hidden += learning_rate * grad_w_0_1
    weights_hidden_output += learning_rate * grad_w_1_2
    b0 += learning_rate * grad_b0
    b1 += learning_rate * grad_b1

In [6]:
epochs = 1000


# Function to perform training with mini-batch gradient descent
def train_model(X, Y, weights_input_hidden, weights_hidden_output, b0, b1, learning_rate, epochs, batch_size=32):
    num_samples = X.shape[0]  # Total number of samples

    for i in range(epochs):
        # Shuffle data at the beginning of each epoch
        indices = np.random.permutation(num_samples)
        x_shuffled = X[indices]
        y_shuffled = Y[indices]

        for start in range(0, num_samples, batch_size):
            end = min(start + batch_size, num_samples)
            X_batch = x_shuffled[start:end]
            Y_batch = y_shuffled[start:end]

            # Initialize gradients
            grad_w_0_1 = np.zeros_like(weights_input_hidden)
            grad_w_1_2 = np.zeros_like(weights_hidden_output)
            grad_b0 = np.zeros_like(b0)
            grad_b1 = np.zeros_like(b1)

            # Forward projection for the mini-batch
            hj = np.dot(X_batch, weights_input_hidden) + b0  # Shape: (batch_size, hidden_dim)
            Vj = activation(hj)  # Apply activation function element-wise

            concatenate_input = np.concatenate((X_batch, Vj), axis=1)  # Shape: (batch_size, input_dim + hidden_dim)

            hi = np.dot(concatenate_input, weights_hidden_output) + b1  # Shape: (batch_size, output_dim)
            oi = activation(hi)  # Apply activation function element-wise

            # Backpropagation
            # hidden -> output
            delta_i_mu = derivate_act(hi) * (Y_batch - oi)  # Shape: (batch_size, output_dim)
            grad_w_1_2 += np.dot(concatenate_input.T, delta_i_mu)  # Update weights (input_dim + hidden_dim, output_dim)
            grad_b1 += delta_i_mu.sum(axis=0)  # Sum over the batch

            # input -> hidden
            delta_j_mu = derivate_act(hj) * np.dot(delta_i_mu, weights_hidden_output[X_batch.shape[1]:].T)  # Shape: (batch_size, hidden_dim)
            grad_w_0_1 += np.dot(X_batch.T, delta_j_mu)  # Update weights (input_dim, hidden_dim)
            grad_b0 += delta_j_mu.sum(axis=0)  # Sum over the batch

            # Actualizo los pesos y los bias
            weights_input_hidden += learning_rate * grad_w_0_1
            weights_hidden_output += learning_rate * grad_w_1_2
            b0 += learning_rate * grad_b0
            b1 += learning_rate * grad_b1

# Usage
train_model(X, Y, weights_input_hidden, weights_hidden_output, b0, b1, learning_rate, epochs, 2)


In [7]:
print(NN(X, weights_input_hidden, weights_hidden_output, b1, b0))

[[0.00238793]
 [0.92260247]
 [0.9228231 ]
 [0.01504687]]
