<h1>Implementamos las librerias y los datos de MNIST</h1>

In [16]:
from keras.datasets import mnist
import random
from sklearn.preprocessing import MinMaxScaler
import numpy as np

(X_train, y_train), (X_test, y_test) = mnist.load_data() 
X, y = zip(*random.sample(list(zip(X_train, y_train)), 2000))
 
X, y = np.array(X, dtype='float64'), np.array(y, dtype='float64')
X = np.reshape(X, (X.shape[0], -1))

X= MinMaxScaler().fit_transform(X)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=123)

y_train_value = y_train
from keras.utils import to_categorical
y_train = to_categorical(y_train)

<h1>Declaramos Clase Base</h1>

In [17]:
# Clase base para CapaY
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    # Computes the output Y of a layer for a given input X
    def forward_propagation(self, input):
        raise NotImplementedError

    # Computes dE/dX for a given dE/dY (and update parameters if any)
    def backward_propagation(self, output_error, learning_rate):
        raise NotImplementedError


<h1>Pooling</h1>

In [None]:
# Función para construir un modelo CNN con pooling
def build_cnn_model_with_pooling():
    model = Network()

    # Primera capa convolucional seguida de MaxPooling
    model.add(Conv2D(32, (3, 3), activation=relu))  # Capa convolucional
    model.add(MaxPooling2D((2, 2)))  # Max Pooling con ventana de 2x2

    # Segunda capa convolucional seguida de AveragePooling
    model.add(Conv2D(64, (3, 3), activation=relu))  # Otra capa convolucional
    model.add(AveragePooling2D((2, 2)))  # Average Pooling con ventana de 2x2

    # Tercera capa convolucional seguida de MaxPooling
    model.add(Conv2D(128, (3, 3), activation=relu))  # Otra capa convolucional
    model.add(MaxPooling2D((2, 2)))  # Max Pooling con ventana de 2x2

    # Aplanar para conectarlo a la capa densa
    model.add(Flatten())  # Aplanamos la salida de las capas convolucionales
    model.add(FCLayer(128, 1))  # Capa densa final para clasificación binaria

    return model

<h1>La clase Dropout</h1>

In [18]:
class DropoutLayer(Layer):
    def init(self, rate):
        # 'rate' es la fracción de neuronas que se apagan, p.ej., 0.2 significa que el 20% se apagarán
        self.rate = rate
        self.mask = None

    def forward_propagation(self, input_data, training=True):
        if training:
            # Crear una máscara binaria aleatoria de la misma forma que los datos de entrada
            self.mask = np.random.binomial(1, 1 - self.rate, size=input_data.shape)
            # Apagar las neuronas según la máscara y escalar los valores restantes
            self.output = input_data * self.mask / (1 - self.rate)
        else:
            # En la fase de evaluación, no se usa Dropout, simplemente pasamos los datos
            self.output = input_data
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        # La máscara también se aplica en la retropropagación
        return output_error * self.mask / (1 - self.rate)

    def backward_propagation(self, output_error, learning_rate):
        # Implementar la retropropagación
        # Este es un ejemplo simplificado, necesitarás implementarlo adecuadamente
        input_error = np.zeros(self.input.shape)
        for f in range(self.filters):
            for i in range(self.output.shape[1]):
                for j in range(self.output.shape[2]):
                    input_error[:, i:i + self.kernel_size[0], j:j + self.kernel_size[1]] += (
                        output_error[f, i, j] * self.weights[f]
                    )
                    self.weights[f] -= learning_rate * output_error[f, i, j] * self.input[:, i:i + self.kernel_size[0], j:j + self.kernel_size[1]]
                    self.bias[f] -= learning_rate * output_error[f, i, j]
        return input_error



<h1>Código integrado con Dropout</h1>

In [19]:
import numpy as np

# Clase para capas densas (fully connected)
class FCLayer(Layer):
    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5

    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)

        # Actualizar los parámetros sin regularización L2
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

# Clase para Capa de Activación
class ActivationLayer(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        return self.activation_prime(self.input) * output_error

# Clase para Dropout
class DropoutLayer(Layer):
    def __init__(self, rate):
        self.rate = rate
        self.mask = None

    def forward_propagation(self, input_data, training=True):
        if training:
            self.mask = np.random.binomial(1, 1 - self.rate, size=input_data.shape)
            self.output = input_data * self.mask / (1 - self.rate)
        else:
            self.output = input_data
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        return output_error * self.mask / (1 - self.rate)

# Clase para Capa Convolucional
class Conv2D(Layer):
    def __init__(self, filters, kernel_size, activation):
        self.filters = filters
        self.kernel_size = kernel_size
        self.activation = activation
        self.weights = np.random.randn(filters, kernel_size[0], kernel_size[1]) * 0.01
        self.bias = np.zeros((filters, 1))

    def forward_propagation(self, input_data):
        self.input = input_data
        # Implementar la operación de convolución
        h, w = input_data.shape[1], input_data.shape[2]
        fh, fw = self.kernel_size
        self.output = np.zeros((self.filters, h - fh + 1, w - fw + 1))

        for f in range(self.filters):
            for i in range(h - fh + 1):
                for j in range(w - fw + 1):
                    self.output[f, i, j] = np.sum(input_data[:, i:i + fh, j:j + fw] * self.weights[f]) + self.bias[f]

        # Aplicar la función de activación
        self.output = self.activation(self.output)
        return self.output

 
# Clase para MaxPooling
class MaxPooling2D(Layer):
    def __init__(self, pool_size):
        self.pool_size = pool_size

    def forward_propagation(self, input_data):
        self.input = input_data
        h, w = input_data.shape[1], input_data.shape[2]
        ph, pw = self.pool_size
        self.output = np.zeros((input_data.shape[0], h // ph, w // pw))

        for i in range(0, h, ph):
            for j in range(0, w, pw):
                self.output[:, i // ph, j // pw] = np.max(input_data[:, i:i + ph, j:j + pw], axis=(1, 2))

        return self.output

    def backward_propagation(self, output_error, learning_rate):
        input_error = np.zeros(self.input.shape)
        h, w = self.input.shape[1], self.input.shape[2]
        ph, pw = self.pool_size

        for i in range(0, h, ph):
            for j in range(0, w, pw):
                input_error[:, i:i + ph, j:j + pw] += (
                    (output_error[:, i // ph, j // pw][:, np.newaxis, np.newaxis] *
                     (self.input[:, i:i + ph, j:j + pw] == np.max(self.input[:, i:i + ph, j:j + pw], axis=(1, 2), keepdims=True)))
                )

        return input_error

# Clase para AveragePooling
class AveragePooling2D(Layer):
    def __init__(self, pool_size):
        self.pool_size = pool_size

    def forward_propagation(self, input_data):
        self.input = input_data
        h, w = input_data.shape[1], input_data.shape[2]
        ph, pw = self.pool_size
        self.output = np.zeros((input_data.shape[0], h // ph, w // pw))

        for i in range(0, h, ph):
            for j in range(0, w, pw):
                self.output[:, i // ph, j // pw] = np.mean(input_data[:, i:i + ph, j:j + pw], axis=(1, 2))

        return self.output

    def backward_propagation(self, output_error, learning_rate):
        input_error = np.zeros(self.input.shape)
        h, w = self.input.shape[1], self.input.shape[2]
        ph, pw = self.pool_size

        for i in range(0, h, ph):
            for j in range(0, w, pw):
                input_error[:, i:i + ph, j:j + pw] += output_error[:, i // ph, j // pw][:, np.newaxis, np.newaxis] / (ph * pw)

        return input_error

# Clase Red
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    # add layer to network
    def add(self, layer):
        self.layers.append(layer)

    # set loss to use
    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

    # predict output for given input
    def predict(self, input_data):
        if input_data.ndim == 1:
            input_data = np.array([[x] for x in input_data])
        samples = len(input_data)
        result = []

        for i in range(samples):
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            result.append(output)
        return result

    # train the network
    def fit(self, x_train, y_train, epochs, learning_rate, x_val=None, y_val=None):
        if x_train[0].ndim == 1:
            x_train = np.array([[x] for x in x_train])
        samples = len(x_train)

        for i in range(epochs):
            err = 0
            for j in range(samples):
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward_propagation(output)

                err += self.loss(y_train[j], output)

                error = self.loss_prime(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)

            err /= samples
            err = np.mean(err)
            print('epoch %d/%d   error=%f' % (i + 1, epochs, err))

# Funciones de Activación
def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return 1 - np.tanh(x) ** 2

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_prime(x):
    sig = sigmoid(x)
    return sig * (1 - sig)

def relu(x):
    return np.maximum(0, x)

def relu_prime(x):
    return np.where(x > 0, 1, 0)


# Funciones de pérdida
def mse(y_true, y_hat):
    return (y_true - y_hat) ** 2

def mse_prime(y_true, y_hat):
    return 2 * (y_hat - y_true)

def bce(y_true, y_hat):
    epsilon = 1e-15
    y_hat = np.clip(y_hat, epsilon, 1 - epsilon)
    return -(y_true * np.log(y_hat) + (1 - y_true) * np.log(1 - y_hat))

def bce_prime(y_true, y_hat):
    epsilon = 1e-15
    y_hat = np.clip(y_hat, epsilon, 1 - epsilon)
    return -(y_true / y_hat) + (1 - y_true) / (1 - y_hat)

In [20]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score

X_train_full, X_val, y_train_full, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

model = Network()

entrada_dim = len(X_train_full[0])

# Agregamos capas al modelo
model.add(FCLayer(entrada_dim, 128))
model.add(ActivationLayer(relu, relu_prime))
model.add(DropoutLayer(rate=0.2))  # Añadiendo Dropout con un 20% de tasa
model.add(FCLayer(128, 64))
model.add(ActivationLayer(sigmoid, sigmoid_prime))
model.add(DropoutLayer(rate=0.2))  # Añadiendo Dropout
model.add(FCLayer(64, 10))
model.add(ActivationLayer(sigmoid, sigmoid_prime))

model.use(bce, bce_prime)

model.fit(X_train_full, y_train_full, epochs=30, learning_rate=0.1, x_val=X_val, y_val=y_val)

y_hat = model.predict(X_test)

for i in range(len(y_hat)):
    y_hat[i] = np.argmax(y_hat[i])

# Reportamos los resultados del modelo
matriz_conf = confusion_matrix(y_test, y_hat)
print('MATRIZ DE CONFUSIÓN para modelo ANN')
print(matriz_conf, '\n')
print('La exactitud de testeo del modelo ANN es: {:.3f}'.format(accuracy_score(y_test, y_hat)))


epoch 1/30   error=0.275342
epoch 2/30   error=0.175796
epoch 3/30   error=0.143178
epoch 4/30   error=0.133792
epoch 5/30   error=0.103752
epoch 6/30   error=0.092134
epoch 7/30   error=0.087471
epoch 8/30   error=0.081053
epoch 9/30   error=0.070765
epoch 10/30   error=0.062393
epoch 11/30   error=0.060012
epoch 12/30   error=0.057110
epoch 13/30   error=0.045405
epoch 14/30   error=0.041346
epoch 15/30   error=0.045356
epoch 16/30   error=0.052242
epoch 17/30   error=0.060302
epoch 18/30   error=0.050757
epoch 19/30   error=0.040720
epoch 20/30   error=0.028774
epoch 21/30   error=0.032936
epoch 22/30   error=0.037235
epoch 23/30   error=0.026061
epoch 24/30   error=0.044376
epoch 25/30   error=0.039553
epoch 26/30   error=0.022985
epoch 27/30   error=0.031815
epoch 28/30   error=0.016302
epoch 29/30   error=0.013322
epoch 30/30   error=0.030399
MATRIZ DE CONFUSIÓN para modelo ANN
[[ 94   0   0   2   0   1   1   0   1   0]
 [  0 111   0   1   1   0   0   2   0   0]
 [  4   1  76   7

<h1>Ahora vamos con Early Stopping</h1>

In [21]:
# Clase EarlyStopping proporcionada
class EarlyStopping:
    def __init__(self, patience=5, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.best_loss = None
        self.wait = 0

    def should_stop(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
            return False
        if self.best_loss - val_loss > self.min_delta:
            self.best_loss = val_loss
            self.wait = 0
        else:
            self.wait += 1
        return self.wait >= self.patience


<h1>Código implementado junto al pooling y el Early Stopping</h1>

In [23]:
import numpy as np

# Clase base para Capa
class Layer:
    def __init__(self):
        self.input = None
        self.output = None

    # computes the output Y of a layer for a given input X
    def forward_propagation(self, input):
        raise NotImplementedError

    # computes dE/dX for a given dE/dY (and update parameters if any)
    def backward_propagation(self, output_error, learning_rate):
        raise NotImplementedError


# Clase para capas densas (fully connected)
class FCLayer(Layer):
    def __init__(self, input_size, output_size):
        self.weights = np.random.rand(input_size, output_size) - 0.5
        self.bias = np.random.rand(1, output_size) - 0.5

    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)
        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error


# Clase para Capa de Activación. Junto con la capa densa forman perceptrones.
class ActivationLayer(Layer):
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward_propagation(self, input_data):
        self.input = input_data
        self.output = self.activation(self.input)
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        return self.activation_prime(self.input) * output_error


# Clases para capas convolucionales y de pooling
class Conv2D(Layer):
    def __init__(self, filters, kernel_size, activation=None):
        self.filters = filters
        self.kernel_size = kernel_size
        self.activation = activation
        self.weights = np.random.rand(kernel_size[0], kernel_size[1], 1, filters) - 0.5
        self.bias = np.random.rand(filters) - 0.5

    def forward_propagation(self, input_data):
        self.input = input_data
        # Aplicar la convolución aquí (simplificada)
        # Se necesita una implementación completa para la convolución
        self.output = np.zeros((input_data.shape[0] - self.kernel_size[0] + 1, input_data.shape[1] - self.kernel_size[1] + 1, self.filters))
        for f in range(self.filters):
            for i in range(self.output.shape[0]):
                for j in range(self.output.shape[1]):
                    self.output[i, j, f] = np.sum(input_data[i:i+self.kernel_size[0], j:j+self.kernel_size[1]] * self.weights[..., f]) + self.bias[f]
        if self.activation:
            return self.activation(self.output)
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        # Se necesita implementar la retropropagación para la convolución
        pass


class MaxPooling2D(Layer):
    def __init__(self, pool_size):
        self.pool_size = pool_size

    def forward_propagation(self, input_data):
        self.input = input_data
        # Max pooling (simplificada)
        output_shape = (input_data.shape[0] // self.pool_size[0], input_data.shape[1] // self.pool_size[1], input_data.shape[2])
        self.output = np.zeros(output_shape)
        for i in range(output_shape[0]):
            for j in range(output_shape[1]):
                self.output[i, j] = np.max(input_data[i*self.pool_size[0]:(i+1)*self.pool_size[0], j*self.pool_size[1]:(j+1)*self.pool_size[1], :], axis=(0, 1))
        return self.output

    def backward_propagation(self, output_error, learning_rate):
        # Se necesita implementar la retropropagación para max pooling
        pass

class AveragePooling2D(MaxPooling2D):
    def forward_propagation(self, input_data):
        self.input = input_data
        output_shape = (input_data.shape[0] // self.pool_size[0], input_data.shape[1] // self.pool_size[1], input_data.shape[2])
        self.output = np.zeros(output_shape)
        for i in range(output_shape[0]):
            for j in range(output_shape[1]):
                self.output[i, j] = np.mean(input_data[i*self.pool_size[0]:(i+1)*self.pool_size[0], j*self.pool_size[1]:(j+1)*self.pool_size[1], :], axis=(0, 1))
        return self.output



# Clase Red, conecta múltiples capas.
class Network:
    def __init__(self):
        self.layers = []
        self.loss = None
        self.loss_prime = None

    # add layer to network
    def add(self, layer):
        self.layers.append(layer)

    # set loss to use
    def use(self, loss, loss_prime):
        self.loss = loss
        self.loss_prime = loss_prime

    # predict output for given input
    def predict(self, input_data):
        if input_data.ndim == 1:  # Ajuste para arreglos unidimensionales
            input_data = np.array([[x] for x in input_data])
        samples = len(input_data)
        result = []

        for i in range(samples):
            output = input_data[i]
            for layer in self.layers:
                output = layer.forward_propagation(output)
            result.append(output)
        return result

    # train the network with Early Stopping
    def fit(self, x_train, y_train, epochs, learning_rate, x_val=None, y_val=None, early_stopping=None):
        if x_train[0].ndim == 1:
            x_train = np.array([[x] for x in x_train])
        samples = len(x_train)

        for i in range(epochs):
            err = 0
            for j in range(samples):
                output = x_train[j]
                for layer in self.layers:
                    output = layer.forward_propagation(output)

                err += self.loss(y_train[j], output)

                error = self.loss_prime(y_train[j], output)
                for layer in reversed(self.layers):
                    error = layer.backward_propagation(error, learning_rate)

            err /= samples
            err = np.mean(err)

            print('epoch %d/%d   error=%f' % (i + 1, epochs, err))

            # Validación y Early Stopping
            if x_val is not None and y_val is not None and early_stopping is not None:
                val_err = 0
                for j in range(len(x_val)):
                    val_output = x_val[j]
                    for layer in self.layers:
                        val_output = layer.forward_propagation(val_output)
                    val_err += self.loss(y_val[j], val_output)
                val_err /= len(x_val)
                val_err = np.mean(val_err)

                print('Validation error: %f' % val_err)

                if early_stopping.should_stop(val_err):
                    print('Early stopping at epoch %d' % (i + 1))
                    break


# Funciones de Activación
def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return 1 - np.tanh(x) ** 2

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_prime(x):
    sig = sigmoid(x)
    return sig * (1 - sig)

def relu(x):
    return np.maximum(0, x)

def relu_prime(x):
    return np.where(x > 0, 1, 0)


# Funciones de pérdida
def mse(y_true, y_hat):
    return (y_true - y_hat) ** 2

def mse_prime(y_true, y_hat):
    return 2 * (y_hat - y_true)

def bce(y_true, y_hat):
    epsilon = 1e-15
    y_hat = np.clip(y_hat, epsilon, 1 - epsilon)
    return -(y_true * np.log(y_hat) + (1 - y_true) * np.log(1 - y_hat))

def bce_prime(y_true, y_hat):
    epsilon = 1e-15
    y_hat = np.clip(y_hat, epsilon, 1 - epsilon)
    return -(y_true / y_hat) + (1 - y_true) / (1 - y_hat)


<h1>Entrenamos y imprimimos</h1>

In [24]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score

X_train_full, X_val, y_train_full, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

model = Network()

# Necesitamos identificar cuantos nodos tiene nuestra entrada, y eso depende del tamaño de X.
entrada_dim = len(X_train_full[0])

model.add(FCLayer(entrada_dim, 128))
model.add(ActivationLayer(relu, relu_prime))
model.add(FCLayer(128, 64))
model.add(ActivationLayer(sigmoid, sigmoid_prime))
model.add(FCLayer(64, 10))
model.add(ActivationLayer(sigmoid, sigmoid_prime))

model.use(bce, bce_prime)

# Inicializamos EarlyStopping
early_stopping = EarlyStopping(patience=5, min_delta=0.001)

model.fit(X_train_full, y_train_full, epochs=30, learning_rate=0.1, x_val=X_val, y_val=y_val, early_stopping=early_stopping)

# Usamos el modelo para predecir sobre los datos de prueba (X_test)
y_hat = model.predict(X_test)

# Transformamos la salida en un vector one-hot encoded, es decir, obtenemos la clase predicha
for i in range(len(y_hat)):
    y_hat[i] = np.argmax(y_hat[i])

# Reportamos los resultados del modelo
matriz_conf = confusion_matrix(y_test, y_hat)
print('MATRIZ DE CONFUSIÓN para modelo ANN')
print(matriz_conf, '\n')
print('La exactitud de testeo del modelo ANN es: {:.3f}'.format(accuracy_score(y_test, y_hat)))


epoch 1/30   error=0.228073
Validation error: 0.122745
epoch 2/30   error=0.121789
Validation error: 0.110490
epoch 3/30   error=0.081817
Validation error: 0.095770
epoch 4/30   error=0.054483
Validation error: 0.108022
epoch 5/30   error=0.040151
Validation error: 0.112454
epoch 6/30   error=0.026330
Validation error: 0.094817
epoch 7/30   error=0.013364
Validation error: 0.102930
epoch 8/30   error=0.006153
Validation error: 0.087987
epoch 9/30   error=0.002951
Validation error: 0.084543
epoch 10/30   error=0.001527
Validation error: 0.085199
epoch 11/30   error=0.001149
Validation error: 0.085808
epoch 12/30   error=0.000955
Validation error: 0.086645
epoch 13/30   error=0.000822
Validation error: 0.087280
epoch 14/30   error=0.000725
Validation error: 0.087963
Early stopping at epoch 14
MATRIZ DE CONFUSIÓN para modelo ANN
[[ 95   0   1   0   0   1   1   0   1   0]
 [  0 113   1   0   0   1   0   0   0   0]
 [  2   1  82   3   2   0   5   3   6   0]
 [  0   0   4  92   2   3   2   3