In [1]:
import numpy as np

def random_initialization(layers_dims):
    """
    Initialize weights and biases for any number of layers.

    Args:
    layers_dims -- List containing the dimensions of each layer, including input and output.

    Returns:
    parameters -- Dictionary containing weights and biases for all layers.
    """
    parameters = {}
    for l in range(1, len(layers_dims)):
        parameters[f'W{l}'] = np.random.randn(layers_dims[l-1], layers_dims[l]) * 0.01
        parameters[f'b{l}'] = np.zeros((1, layers_dims[l]))
    return parameters

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))  # Stability improvement
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def forward_propagation(X, parameters):
    """
    Perform forward propagation through the network.

    Args:
    X -- Input data.
    parameters -- Dictionary containing weights and biases.

    Returns:
    activations -- Dictionary containing activations for each layer.
    caches -- Dictionary containing linear outputs for each layer (useful for backpropagation).
    """
    activations = {0: X}
    caches = {}
    L = len(parameters) // 2

    for l in range(1, L + 1):
        W = parameters[f'W{l}']
        b = parameters[f'b{l}']
        Z = np.dot(activations[l-1], W) + b
        caches[l] = Z

        if l == L:
            activations[l] = softmax(Z)  # Output layer with softmax
        else:
            activations[l] = relu(Z)

    return activations, caches

def backward_propagation(X, Y, parameters, activations, caches):
    """
    Perform backward propagation to calculate gradients.

    Args:
    X -- Input data.
    Y -- True labels (one-hot encoded).
    parameters -- Dictionary containing weights and biases.
    activations -- Dictionary containing activations from forward propagation.
    caches -- Dictionary containing linear outputs from forward propagation.

    Returns:
    gradients -- Dictionary containing gradients of weights and biases.
    """
    gradients = {}
    L = len(parameters) // 2
    m = X.shape[0]

    # Gradient for output layer
    dZ = activations[L] - Y
    for l in reversed(range(1, L + 1)):
        W = parameters[f'W{l}']
        dW = np.dot(activations[l-1].T, dZ) / m
        db = np.sum(dZ, axis=0, keepdims=True) / m

        gradients[f'dW{l}'] = dW
        gradients[f'db{l}'] = db

        if l > 1:
            dZ = np.dot(dZ, W.T) * relu_derivative(caches[l-1])

    return gradients

def update_parameters(parameters, gradients, learning_rate):
    """
    Update parameters using gradient descent.

    Args:
    parameters -- Dictionary containing weights and biases.
    gradients -- Dictionary containing gradients of weights and biases.
    learning_rate -- Learning rate for gradient descent.

    Returns:
    parameters -- Updated parameters.
    """
    for l in range(1, len(parameters) // 2 + 1):
        parameters[f'W{l}'] -= learning_rate * gradients[f'dW{l}']
        parameters[f'b{l}'] -= learning_rate * gradients[f'db{l}']
    return parameters

def compute_loss(Y_pred, Y_true):
    """
    Compute the cross-entropy loss.

    Args:
    Y_pred -- Predicted probabilities from the model.
    Y_true -- True labels (one-hot encoded).

    Returns:
    loss -- Cross-entropy loss.
    """
    m = Y_true.shape[0]
    loss = -np.sum(Y_true * np.log(Y_pred + 1e-8)) / m  # Add epsilon for numerical stability
    return loss

def train_model(X, Y, layers_dims, learning_rate=0.01, epochs=1000):
    """
    Train the neural network with the specified architecture and parameters.

    Args:
    X -- Input data.
    Y -- True labels (one-hot encoded).
    layers_dims -- List specifying the number of neurons in each layer.
    learning_rate -- Learning rate for gradient descent.
    epochs -- Number of iterations for training.

    Returns:
    parameters -- Trained weights and biases.
    losses -- List of loss values for each epoch.
    """
    parameters = random_initialization(layers_dims)
    losses = []

    for epoch in range(epochs):
        activations, caches = forward_propagation(X, parameters)
        loss = compute_loss(activations[len(layers_dims) - 1], Y)
        losses.append(loss)

        gradients = backward_propagation(X, Y, parameters, activations, caches)
        parameters = update_parameters(parameters, gradients, learning_rate)

        if epoch % 100 == 0:
            print(f"Epoch {epoch}/{epochs} - Loss: {loss:.4f}")

    return parameters, losses


In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import numpy as np

# Cargar el conjunto de datos Iris
data = load_iris()
X = data.data  # Características (4)
Y = data.target.reshape(-1, 1)  # Etiquetas (0, 1, 2)

# Normalizar las características
X = X / X.max(axis=0)

# Codificar las etiquetas como one-hot
encoder = OneHotEncoder(sparse=False)
Y_onehot = encoder.fit_transform(Y)

# Dividir en conjuntos de entrenamiento y prueba
X_train, X_test, Y_train, Y_test = train_test_split(X, Y_onehot, test_size=0.2, random_state=42)

# Definir la arquitectura de la red neuronal
layers_dims = [X_train.shape[1], 10, 3]  # 4 (entrada), 10 (capa oculta), 3 (salida)

# Entrenar el modelo
parameters, losses = train_model(X_train, Y_train, layers_dims, learning_rate=0.01, epochs=1000)

# Validar en el conjunto de prueba
activations, _ = forward_propagation(X_test, parameters)
Y_pred = np.argmax(activations[len(layers_dims) - 1], axis=1)
Y_true = np.argmax(Y_test, axis=1)

# Evaluar la precisión
accuracy = np.mean(Y_pred == Y_true)
print(f"Precisión en el conjunto de prueba: {accuracy * 100:.2f}%")


TypeError: OneHotEncoder.__init__() got an unexpected keyword argument 'sparse'