In [8]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sys

class NeuralNetwork:
    def __init__(self, layer_sizes):
        self.weights = []

        # Inicialización de pesos y sesgos con valores aleatorios para cada capa
        for i in range(len(layer_sizes) - 1):
            weight = np.random.rand(layer_sizes[i], layer_sizes[i + 1])/20
            self.weights.append(weight)

    def lossFn(self, y, h):
        return np.mean((y - h) ** 2)

    def logistic(self, x):
        return 1 / (1 + np.exp(-x))

    def logistic_derivative(self, x):
        return x * (1 - x)

    def forward_propagation(self, inputs):
        activations = [inputs]  # Lista para almacenar las activaciones de cada capa
        #print(activations)
        # Calcula las activaciones pasando la entrada a través de las capas
        for i in range(len(self.weights)):
            inp = np.dot(activations[i], self.weights[i])
            activation = self.logistic(inp)
            activations.append(activation)
        return activations

    # Propagación hacia atrás: calcula los errores y los propaga de vuelta a través de la red
    def backward_propagation(self, activations, expected_output):
        # Calcula el error de la capa de salida
        mean_error = self.lossFn(expected_output, activations[-1])
        errors = [expected_output - activations[-1]]
        sys.stdout.write(f'\r{mean_error}')
        sys.stdout.flush()
        # Propaga el error hacia atrás y calcula el error para cada capa
        for i in range(len(activations) - 2, 0, -1):
            error = np.dot(errors[0], self.weights[i].T) * self.logistic_derivative(activations[i])
            errors.insert(0, error)
        return errors

    # Actualización de pesos y sesgos usando los errores calculados
    def update_weights(self, activations, errors, learning_rate):
        for i in range(len(self.weights)):
            # Actualiza los pesos con el gradiente del error y la tasa de aprendizaje
            self.weights[i] += np.dot(activations[i].T, errors[i]) * learning_rate

    # Función de entrenamiento: ejecuta la propagación hacia adelante y hacia atrás
    def train(self, inputs, expected_output, learning_rate, iterations):
        # Ejecuta el ciclo de entrenamiento para el número dado de iteraciones
        for i in range(iterations):
            activations = self.forward_propagation(inputs)
            errors = self.backward_propagation(activations, expected_output)
            self.update_weights(activations, errors, learning_rate)
            

    # Función de predicción: calcula las salidas de la red para las entradas dadas
    def predict(self, inputs):
        activations = self.forward_propagation(inputs)
        #print(activations)
        return activations[-1]

# Ejemplo de uso
# Configuración de tamaños de capa: 4 entradas, 5 neuronas en la primera capa oculta, y así sucesivamente hasta 3 salidas
layer_sizes = [57,100,2]
nn = NeuralNetwork(layer_sizes)

# Carga y preparación de datos de entrenamiento
df = pd.read_csv('../spambase.data', delimiter=",")
df = df.sample(frac = 1)

# Extracción y normalización de las entradas
inputs = df.drop('spam', axis=1)
normInputs = inputs.copy()
# Normaliza las características para que estén en una escala de 0 a 1
for column in inputs.columns[0:]:
    normInputs[column] = (inputs[column] - inputs[column].min()) / (inputs[column].max() - inputs[column].min())


# Convierte los datos normalizados y las salidas a arrays de numpy
normInputs = np.array(normInputs)
outputs = np.array(pd.get_dummies(df['spam']))

# Entrenamiento de la red con los datos
nn.train(normInputs, outputs, learning_rate=0.01, iterations=5000)


# Uso de la red para hacer una predicción con un ejemplo de entrada
#print(nn.predict(np.array([5.3,3.7,1.5,0.2])))
#print(nn.predict(np.array([6.1,2.8,4,1.3])))
#print(nn.predict(np.array([6.9,3.2,5.7,2.3])))

0.033234000194684865

In [3]:
def normal(df):
    normdf = df.copy()
    for li in df.columns[0:]:
        normdf[li] = (df[li] -  inputs[li].min()) / (inputs[li].max() - inputs[li].min())
    return normdf
df2 = pd.read_csv('../spambase.data', delimiter=",")

In [9]:
df3 = df2.drop("spam", axis = 1)
df3 = normal(df3)
spam = 0
nospam = 0
fp = 0
fn = 0
for it in df3.index:
    x = df3.loc[it].values
    x = nn.predict(x)
    if x[0] >= x[1]:
        nospam = nospam +1
        if df2.loc[it].spam == 1:
            fn = fn + 1
    else:
        spam = spam +1
        if df2.loc[it].spam == 0:
            fp = fp + 1
z = spam / (spam+nospam)
print("SPAM: %d, NO SPAM: %d  PERCENT = %2f  FP: %d Fn: %d"%(spam,nospam, z*100, fp,fn))


SPAM: 1918, NO SPAM: 2683  PERCENT = 41.686590  FP: 150 Fn: 45


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,48,49,50,51,52,53,54,55,56,spam
0,0.00,0.64,0.64,0.0,0.32,0.00,0.00,0.00,0.00,0.00,...,0.000,0.000,0.0,0.778,0.000,0.000,3.756,61,278,1
1,0.21,0.28,0.50,0.0,0.14,0.28,0.21,0.07,0.00,0.94,...,0.000,0.132,0.0,0.372,0.180,0.048,5.114,101,1028,1
2,0.06,0.00,0.71,0.0,1.23,0.19,0.19,0.12,0.64,0.25,...,0.010,0.143,0.0,0.276,0.184,0.010,9.821,485,2259,1
3,0.00,0.00,0.00,0.0,0.63,0.00,0.31,0.63,0.31,0.63,...,0.000,0.137,0.0,0.137,0.000,0.000,3.537,40,191,1
4,0.00,0.00,0.00,0.0,0.63,0.00,0.31,0.63,0.31,0.63,...,0.000,0.135,0.0,0.135,0.000,0.000,3.537,40,191,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4596,0.31,0.00,0.62,0.0,0.00,0.31,0.00,0.00,0.00,0.00,...,0.000,0.232,0.0,0.000,0.000,0.000,1.142,3,88,0
4597,0.00,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.00,...,0.000,0.000,0.0,0.353,0.000,0.000,1.555,4,14,0
4598,0.30,0.00,0.30,0.0,0.00,0.00,0.00,0.00,0.00,0.00,...,0.102,0.718,0.0,0.000,0.000,0.000,1.404,6,118,0
4599,0.96,0.00,0.00,0.0,0.32,0.00,0.00,0.00,0.00,0.00,...,0.000,0.057,0.0,0.000,0.000,0.000,1.147,5,78,0


In [43]:
df_split.reset_index()

1.0