In [None]:
Codigo red neuronal con Dropout y con capas de pooling

In [8]:
from keras.datasets import mnist
import random
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# No necesitamos tantos datos.
(X_train, y_train), (X_test, y_test) = mnist.load_data()
#random.seed(123) # Vamos a controlar la aleatoriedad en adelante. 
X, y = zip(*random.sample(list(zip(X_train, y_train)), 2000))

# Sí necesitamos que la forma de X sea la de un vector, en lugar de una matriz. 
X, y = np.array(X, dtype='float64'), np.array(y, dtype='float64')
X = np.reshape(X, (X.shape[0], -1))

# Normalizamos Min-Max
X= MinMaxScaler().fit_transform(X)

# Dividimos la muestra en dos, una para entrenar y otra para testing, como tenemos 
# muestra de sobra nos damos el lujo de testear con la misma cantidad que entrenamos.
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=123)

# Necesitamos que y_train sea un valor categórico, en lugar de un dígito entero.
y_train_value = y_train # Guardaremos y_train como valor para un observación más abajo.
from keras.utils import to_categorical
y_train = to_categorical(y_train)

In [9]:
class DropoutLayer(Layer):
    def init(self, rate):
        # 'rate' es la fracción de neuronas que se apagan, p.ej., 0.2 significa que el 20% se apagarán
        self.rate = rate
        self.mask = None

    def forward_propagation(self, input_data, training=True):
        if training:
            # Crear una máscara binaria aleatoria de la misma forma que los datos de entrada
            self.mask = np.random.binomial(1, 1 - self.rate, size=input_data.shape)
            # Apagar las neuronas según la máscara y escalar los valores restantes
            self.output = input_data * self.mask / (1 - self.rate)
        else:
            # En la fase de evaluación, no se usa Dropout, simplemente pasamos los datos
            self.output = input_data
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        # La máscara también se aplica en la retropropagación
        return output_error * self.mask / (1 - self.rate)


In [10]:
import numpy as np

# Clase base para Capa
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    # Computes the output Y of a layer for a given input X
    def forward_propagation(self, input):
        raise NotImplementedError

    # Computes dE/dX for a given dE/dY (and update parameters if any)
    def backward_propagation(self, output_error, learning_rate):
        raise NotImplementedError

# Clase para capas densas (fully connected)
class FCLayer(Layer):
    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5

    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)

        # Actualizar los parámetros sin regularización L2
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

# Clase para Capa de Activación
class ActivationLayer(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        return self.activation_prime(self.input) * output_error

# Clase para Dropout
class DropoutLayer(Layer):
    def __init__(self, rate):
        self.rate = rate
        self.mask = None

    def forward_propagation(self, input_data, training=True):
        if training:
            self.mask = np.random.binomial(1, 1 - self.rate, size=input_data.shape)
            self.output = input_data * self.mask / (1 - self.rate)
        else:
            self.output = input_data
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        return output_error * self.mask / (1 - self.rate)

# Clase para Capa Convolucional
class Conv2D(Layer):
    def __init__(self, filters, kernel_size, activation):
        self.filters = filters
        self.kernel_size = kernel_size
        self.activation = activation
        self.weights = np.random.randn(filters, kernel_size[0], kernel_size[1]) * 0.01
        self.bias = np.zeros((filters, 1))

    def forward_propagation(self, input_data):
        self.input = input_data
        # Implementar la operación de convolución
        h, w = input_data.shape[1], input_data.shape[2]
        fh, fw = self.kernel_size
        self.output = np.zeros((self.filters, h - fh + 1, w - fw + 1))

        for f in range(self.filters):
            for i in range(h - fh + 1):
                for j in range(w - fw + 1):
                    self.output[f, i, j] = np.sum(input_data[:, i:i + fh, j:j + fw] * self.weights[f]) + self.bias[f]

        # Aplicar la función de activación
        self.output = self.activation(self.output)
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        # Implementar la retropropagación
        # Este es un ejemplo simplificado, necesitarás implementarlo adecuadamente
        input_error = np.zeros(self.input.shape)
        for f in range(self.filters):
            for i in range(self.output.shape[1]):
                for j in range(self.output.shape[2]):
                    input_error[:, i:i + self.kernel_size[0], j:j + self.kernel_size[1]] += (
                        output_error[f, i, j] * self.weights[f]
                    )
                    self.weights[f] -= learning_rate * output_error[f, i, j] * self.input[:, i:i + self.kernel_size[0], j:j + self.kernel_size[1]]
                    self.bias[f] -= learning_rate * output_error[f, i, j]
        return input_error

# Clase para MaxPooling
class MaxPooling2D(Layer):
    def __init__(self, pool_size):
        self.pool_size = pool_size

    def forward_propagation(self, input_data):
        self.input = input_data
        h, w = input_data.shape[1], input_data.shape[2]
        ph, pw = self.pool_size
        self.output = np.zeros((input_data.shape[0], h // ph, w // pw))

        for i in range(0, h, ph):
            for j in range(0, w, pw):
                self.output[:, i // ph, j // pw] = np.max(input_data[:, i:i + ph, j:j + pw], axis=(1, 2))

        return self.output

    def backward_propagation(self, output_error, learning_rate):
        input_error = np.zeros(self.input.shape)
        h, w = self.input.shape[1], self.input.shape[2]
        ph, pw = self.pool_size

        for i in range(0, h, ph):
            for j in range(0, w, pw):
                input_error[:, i:i + ph, j:j + pw] += (
                    (output_error[:, i // ph, j // pw][:, np.newaxis, np.newaxis] *
                     (self.input[:, i:i + ph, j:j + pw] == np.max(self.input[:, i:i + ph, j:j + pw], axis=(1, 2), keepdims=True)))
                )

        return input_error

# Clase para AveragePooling
class AveragePooling2D(Layer):
    def __init__(self, pool_size):
        self.pool_size = pool_size

    def forward_propagation(self, input_data):
        self.input = input_data
        h, w = input_data.shape[1], input_data.shape[2]
        ph, pw = self.pool_size
        self.output = np.zeros((input_data.shape[0], h // ph, w // pw))

        for i in range(0, h, ph):
            for j in range(0, w, pw):
                self.output[:, i // ph, j // pw] = np.mean(input_data[:, i:i + ph, j:j + pw], axis=(1, 2))

        return self.output

    def backward_propagation(self, output_error, learning_rate):
        input_error = np.zeros(self.input.shape)
        h, w = self.input.shape[1], self.input.shape[2]
        ph, pw = self.pool_size

        for i in range(0, h, ph):
            for j in range(0, w, pw):
                input_error[:, i:i + ph, j:j + pw] += output_error[:, i // ph, j // pw][:, np.newaxis, np.newaxis] / (ph * pw)

        return input_error

# Clase Red
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    # add layer to network
    def add(self, layer):
        self.layers.append(layer)

    # set loss to use
    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

    # predict output for given input
    def predict(self, input_data):
        if input_data.ndim == 1:
            input_data = np.array([[x] for x in input_data])
        samples = len(input_data)
        result = []

        for i in range(samples):
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            result.append(output)
        return result

    # train the network
    def fit(self, x_train, y_train, epochs, learning_rate, x_val=None, y_val=None):
        if x_train[0].ndim == 1:
            x_train = np.array([[x] for x in x_train])
        samples = len(x_train)

        for i in range(epochs):
            err = 0
            for j in range(samples):
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward_propagation(output)

                err += self.loss(y_train[j], output)

                error = self.loss_prime(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)

            err /= samples
            err = np.mean(err)
            print('epoch %d/%d   error=%f' % (i + 1, epochs, err))

# Funciones de Activación
def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return 1 - np.tanh(x) ** 2

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_prime(x):
    sig = sigmoid(x)
    return sig * (1 - sig)

def relu(x):
    return np.maximum(0, x)

def relu_prime(x):
    return np.where(x > 0, 1, 0)


# Funciones de pérdida
def mse(y_true, y_hat):
    return (y_true - y_hat) ** 2

def mse_prime(y_true, y_hat):
    return 2 * (y_hat - y_true)

def bce(y_true, y_hat):
    epsilon = 1e-15
    y_hat = np.clip(y_hat, epsilon, 1 - epsilon)
    return -(y_true * np.log(y_hat) + (1 - y_true) * np.log(1 - y_hat))

def bce_prime(y_true, y_hat):
    epsilon = 1e-15
    y_hat = np.clip(y_hat, epsilon, 1 - epsilon)
    return -(y_true / y_hat) + (1 - y_true) / (1 - y_hat)

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score

# Suponiendo que tienes tus datos X y y
# Dividimos los datos en entrenamiento y validación
X_train_full, X_val, y_train_full, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Crear instancia de Network
model = Network()

# Necesitamos identificar cuántos nodos tiene nuestra entrada, y eso depende del tamaño de X.
entrada_dim = len(X_train_full[0])

# Agregamos capas al modelo
model.add(FCLayer(entrada_dim, 128))
model.add(ActivationLayer(relu, relu_prime))
model.add(DropoutLayer(rate=0.2))  # Añadiendo Dropout con un 20% de tasa
model.add(FCLayer(128, 64))
model.add(ActivationLayer(sigmoid, sigmoid_prime))
model.add(DropoutLayer(rate=0.2))  # Añadiendo Dropout
model.add(FCLayer(64, 10))
model.add(ActivationLayer(sigmoid, sigmoid_prime))

# Asignamos función de pérdida
model.use(bce, bce_prime)

# Entrenamos el modelo con datos de entrenamiento y validación
model.fit(X_train_full, y_train_full, epochs=30, learning_rate=0.1, x_val=X_val, y_val=y_val)

# Usamos el modelo para predecir sobre los datos de prueba (X_test)
y_hat = model.predict(X_test)

# Transformamos la salida en un vector one-hot encoded, es decir, obtenemos la clase predicha
for i in range(len(y_hat)):
    y_hat[i] = np.argmax(y_hat[i])

# Reportamos los resultados del modelo
matriz_conf = confusion_matrix(y_test, y_hat)
print('MATRIZ DE CONFUSIÓN para modelo ANN')
print(matriz_conf, '\n')
print('La exactitud de testeo del modelo ANN es: {:.3f}'.format(accuracy_score(y_test, y_hat)))


epoch 1/30   error=0.280083
epoch 2/30   error=0.178386
epoch 3/30   error=0.143613
epoch 4/30   error=0.131579
epoch 5/30   error=0.108898
epoch 6/30   error=0.097367
epoch 7/30   error=0.092943
epoch 8/30   error=0.075254
epoch 9/30   error=0.070132
epoch 10/30   error=0.072033
epoch 11/30   error=0.063394
epoch 12/30   error=0.085374
epoch 13/30   error=0.064705
epoch 14/30   error=0.072877
epoch 15/30   error=0.058864
epoch 16/30   error=0.059161
epoch 17/30   error=0.047232
epoch 18/30   error=0.045643
epoch 19/30   error=0.047172
epoch 20/30   error=0.046038
epoch 21/30   error=0.041085
epoch 22/30   error=0.039939
epoch 23/30   error=0.043294
epoch 24/30   error=0.039943
epoch 25/30   error=0.030989
epoch 26/30   error=0.039445
epoch 27/30   error=0.026245
epoch 28/30   error=0.021470
epoch 29/30   error=0.028014
epoch 30/30   error=0.024740
MATRIZ DE CONFUSIÓN para modelo ANN
[[ 98   0   0   2   0   1   0   1   0   2]
 [  0  99   0   0   0   0   0   0   1   0]
 [  1   1  74   7