In [None]:
import numpy as np
import pandas as pd
import scipy.io as sio
import scipy.optimize as opt
import sklearn.metrics

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def holdout(X, y, percentage=0.6):
  X_training = X.sample(round(percentage*len(X))) 
  y_training = y.iloc[X_training.index]
  X_test = X.iloc[~X.index.isin(X_training.index)]
  y_test = y.iloc[~X.index.isin(X_training.index)]

  X_training = X_training.reset_index(drop=True)
  y_training = y_training.reset_index(drop=True)
  X_test = X_test.reset_index(drop=True)
  y_test = y_test.reset_index(drop=True)
  
  return X_training, y_training, X_test, y_test

def normalize(X):
    return (X - X.mean()) / X.std()

def forward(thetas, X):
    activations = []
    a = np.hstack((1, X))
    activations.append(a)
    
    for i in range(len(thetas)):
        z = np.dot(thetas[i], a)
        a = sigmoid(z)
        if i < len(thetas) - 1:
            a = np.hstack((1, a))
        activations.append(a)
    
    h = activations[-1]
    
    return activations, h

def nnCostFunction(nn_params, layer_sizes, X, y):
    thetas = []
    start = 0
    for i in range(len(layer_sizes) - 1):
        end = start + (layer_sizes[i + 1] * (layer_sizes[i] + 1))
        theta = np.reshape(nn_params[start:end], (layer_sizes[i + 1], layer_sizes[i] + 1), order='F')
        thetas.append(theta)
        start = end

    m = len(y)
    y_d = pd.get_dummies(y.flatten())
    
    suma = 0
    for i in range(X.shape[0]):

        activations, h = forward(thetas, X[i])
        temp1 = y_d.iloc[i] * np.log(h)
        temp2 = (1 - y_d.iloc[i]) * np.log(1 - h)
        temp3 = np.sum(temp1 + temp2)
        suma = suma + temp3

    J = (np.sum(suma) / (-m))
    return J

def nnGradFunction(nn_params, layer_sizes, X, y):
    thetas = []
    start = 0
    for i in range(len(layer_sizes) - 1):
        end = start + (layer_sizes[i] + 1) * layer_sizes[i + 1]
        theta = np.reshape(nn_params[start:end], (layer_sizes[i + 1], layer_sizes[i] + 1), order='F')
        thetas.append(theta)
        start = end

    m = len(y)
    y_d = pd.get_dummies(y.flatten())
    deltas = [np.zeros(theta.shape) for theta in thetas]

    for i in range(X.shape[0]):
        a = [np.hstack((1, X[i]))]
        for theta in thetas:
            a.append(np.hstack((1, sigmoid(a[-1] @ theta.T))))
        a[-1] = a[-1][1:]

        d = [a[-1] - y_d.iloc[i]]
        for j in range(len(thetas) - 1, 0, -1):
            d.insert(0, np.multiply(thetas[j].T @ d[0], np.multiply(a[j], 1 - a[j]))[1:])

        # Paso 3.3: Cálculo de las derivadas ajustando las dimensiones de los errores y las activaciones de cada capa correctamente
        for j in range(len(deltas)):
            deltas[j] += np.reshape(d[j], (layer_sizes[j + 1], 1)) @ np.reshape(a[j], (1, layer_sizes[j] + 1))

    deltas = [delta / m for delta in deltas]
    gradiente = np.hstack([delta.ravel(order='F') for delta in deltas])
    return gradiente

def training(initial_thetas, X_train, y_train, layer_sizes):
    maxiter = 10

    nn_initial_params = np.hstack([theta.ravel(order='F') for theta in initial_thetas])

    nn_params = opt.fmin_cg(maxiter=maxiter, f=nnCostFunction, x0=nn_initial_params, fprime=nnGradFunction,
                            args=(layer_sizes, X_train, y_train.flatten()), gtol=0.005)

    thetas = []
    start = 0
    for i in range(len(layer_sizes) - 1):
        end = start + (layer_sizes[i + 1] * (layer_sizes[i] + 1))
        theta = np.reshape(nn_params[start:end], (layer_sizes[i + 1], layer_sizes[i] + 1), order='F')
        thetas.append(theta)
        start = end

    return thetas

def thetasInicial(nlayers, epsilon=0.12):
    thetas = []
    for i in range(1, len(nlayers)):
        theta = np.random.rand(nlayers[i], nlayers[i - 1] + 1) * 2 * epsilon - epsilon
        thetas.append(theta)
    return thetas

data = sio.loadmat('datasets/ex4data1.mat')
X = data['X']
y = data['y']

y = y % 10

# Definir el número de neuronas por capa
nlayer = [400, 200, 10]
thetas = thetasInicial(nlayer)
display(thetas)
# Entrenamiento de la red neuronal
trained_thetas = training(thetas, X, y, nlayer)

def predict(thetas, X):
    ones = np.ones((len(X), 1))
    a = np.hstack((ones, X))
    for theta in thetas[:-1]:
        a = np.hstack((ones, sigmoid(a @ theta.T)))
    h = sigmoid(a @ thetas[-1].T)  # La hipótesis o predicción
    
    # La predicción será el índice de la clase con mayor probabilidad
    pred = np.argmax(h, axis=1)
    return pred

# Realizar predicciones
predictions = predict(trained_thetas, X)
print(sklearn.metrics.accuracy_score(y, predictions))

print("Predicciones: \n", predictions)


[array([[-0.07825672, -0.09973021, -0.0365219 , ...,  0.05358921,
          0.07731829, -0.05953887],
        [ 0.10232424,  0.00458265, -0.05456071, ..., -0.08395068,
          0.07963235, -0.09261419],
        [ 0.02274563, -0.04224518, -0.05519225, ...,  0.06423434,
          0.01742372, -0.03705605],
        ...,
        [ 0.08415173, -0.02694921,  0.1104775 , ..., -0.03496844,
          0.11977541, -0.09278696],
        [ 0.05606542,  0.02197399,  0.10943548, ..., -0.07770881,
         -0.06790782,  0.06090263],
        [-0.03833635, -0.0801408 ,  0.0412591 , ..., -0.09481721,
         -0.02753908,  0.0483412 ]]),
 array([[ 0.05408523,  0.02094522, -0.09604994, ..., -0.06459794,
          0.08963507, -0.0357565 ],
        [-0.04857243,  0.03833608, -0.03684097, ...,  0.02233409,
          0.05385499,  0.0801044 ],
        [ 0.09904466,  0.0411483 ,  0.10960868, ...,  0.11355427,
         -0.09996735,  0.08570318],
        ...,
        [ 0.07813201,  0.04091359,  0.01456675, ...,  

         Current function value: 1.104335
         Iterations: 10
         Function evaluations: 18
         Gradient evaluations: 18
0.8444
Predicciones: 
 [0 0 0 ... 9 9 0]


  res = _minimize_cg(f, x0, args, fprime, callback=callback, c1=c1, c2=c2,
