# Feed Forward Multiclass Classification Neural Networks

In [1]:
from typing import List, Callable, Any # para docstrings y typing
import numpy as np

In [236]:
# funcion sigmoide
def h_teta(matriz_t, matriz_x): # esta funcion es sigmoide(h_zeta)
    #zeta_teta = np.matmul(vector_t.T, vector_x)
    zeta_teta = np.matmul(matriz_t, transformar_bias(matriz_x,1).T)
    #print('Zeta teta: ', zeta_teta) # si zeta >= 0; entonces clase positiva.
    #[5,4,-1] = 5 + 4x1-x2 = 0
    # x2 =  5 + 4x1

    return ((1 + np.power(np.e, -1*zeta_teta))**-1) # (n,1)

def jota_teta(x, y, hipotesis, m, tetas):
    #h = hipotesis(tetas, x)
    #print('hipotesis: {}\n'.format(-np.log(h)))
    #print('y shape: {}\n'.format(y.shape))
    #print('h shape: {}\n'.format(h.shape))
    return (-1/float(m)) * (np.matmul(y.T, np.log(hipotesis) + np.matmul((1-y.T), np.log(1 - hipotesis))))

def gradiente(
        x,
        y,
        h,
        m,
        tetas,
    ):
    return (((np.matmul((h - y).T, x).T) / float(m)))

def descenso_gradiente(
        x_set: List[List[float]],
        y_set: List[float],
        #tetas_iniciales: List[float],
        hipotesis: Callable[[Any], Any],#[[List[float], List[float]], List[float]], # Callable[[parametros], resultado]
        gradiente: Callable[[Any], Any],#[[List[float], List[float], List[float], float], List[float]], # Callable[[parametros], resultado]
        max_iters: int = 10000,
        alpha: float = 0.0001,
        _lambda: float = 0.0,
        grado: int = 1
    ) -> List[float]:
    
    """Esta función ejecuta el descenso en gradiente para encontrar las tetas que minimizan el costo."""
    
    unos = np.ones(x_set.shape[1]) # [1] ya que X viene en formato de filas, por lo que cada columna es una observacion.
    
    X = transformar_arreglo(x_set, grado)
    
    m, n = X.shape
    #y_set = y_set.reshape(m,1) # convertir a vector columna.
    #print(y_set[-10:])
    tetas = np.random.rand(n,1)

    for i in range(max_iters):
        h = hipotesis(X, tetas) # vector solucion (100,1)
        #print((h - ys).shape) # (100,1) - (100,1)
        tetas -= alpha * gradiente(X, y_set, h, m, tetas) 
    
    #costo = jota_teta(y_set, h, m)
    #y_pred = np.matmul(X,tetas)

    #return y_pred, tetas, costo.sum()
    return tetas # retorno X, ya que incluye la col de uno's, la cual será útil en cross validation.

def cross_validate(x_train, y_train, x_test, y_test, tetas):
    """ Calculo la validación cruzada para los tetas resultantes del train set sobre el test set."""
    
    m = x_train.shape[0]
    h_train = h_teta(x_train, tetas)
    h_test = h_teta(x_test, tetas)
    costo_train = jota_teta(x_train, y_train, h_train, m, tetas)
    costo_test = jota_teta(x_train, y_test, h_test, m, tetas)
    
    return [(costo_train, costo_test), (h_train, h_test)]

def transformar_bias(x_set, grado):
    
    unos = np.ones(x_set.shape[1]) # [1] ya que X viene en formato de filas, por lo que cada columna es una observacion.
    
    if grado == 1:
        X = np.vstack(
            (
            unos,
            x_set,
            #-x_set**2
            )
        ).T # Se transpuso la matriz para tener la columna de unos y asi calcular teta_0
    elif grado == 2:
        X = np.vstack(
            (
            unos,
            x_set,
            x_set**2
            )
        ).T
    elif grado == -2:
        X = np.vstack(
            (
            unos,
            x_set,
            -x_set**2
            )
        ).T
        
    return X

In [278]:
#X = np.array([[1,2,3],[4,5,6],[7,8,9], [1,4,7]])
X = np.array([
    [1,4,7,1,0], # cada fila son los valores para todas las observaciones de una feature
    [2,5,8,4,0],
    [3,6,9,7,0],
])
#Y = np.array([[1,0,0], [0,1,0],[0,0,1], [0,0,0]])
Y = np.array([
    [1,0,0,0,0], # cada fila son los valores para todas las observaciones de una feature
    [0,1,0,0,0],
    [0,0,1,0,0],
])
#X = np.array([[1],[2],[3]])
#Y = np.array([[1,0,0]])

In [174]:
X.shape # 4 observaciones, 3 features.
Y.shape

(3, 5)

In [175]:
NN_HIDDEN_LAYERS = 2
NN_ARCH = [X.shape[0], 4, 3, Y.shape[0]] # X y Y tienen que estar transpuestas para utilizar X[0] y no X[1]
NN_ARCH # Cuantas neuronas deseo que tenga cada capa.

[3, 4, 3, 3]

In [162]:
np.random.rand(4,4)

array([[0.30975814, 0.7022761 , 0.67219334, 0.16710222],
       [0.29845537, 0.84663045, 0.5778933 , 0.56890888],
       [0.24952919, 0.34340312, 0.44557254, 0.80503592],
       [0.90758975, 0.15634901, 0.34210286, 0.34536396]])

In [185]:
TETAS = [
    np.random.rand(
        NN_ARCH[i+1], # cantidad de neuronas en siguiente capa.
        NN_ARCH[i]+1 # cantidad de neuronas de entrada, + 1 por la neurona del bias
    ) for i in range(len(NN_ARCH)-1) # No considero la última capa, porque es la hipotesis.
]
TETAS

[array([[0.00990474, 0.64827585, 0.63373462, 0.0757352 ],
        [0.89126963, 0.37618299, 0.26208752, 0.66950716],
        [0.85141703, 0.27772932, 0.03914973, 0.96008971],
        [0.52861848, 0.635508  , 0.57933768, 0.8462869 ]]),
 array([[0.41818099, 0.42775091, 0.00427627, 0.89723039, 0.02509069],
        [0.87210105, 0.11351693, 0.82855083, 0.32620398, 0.20191474],
        [0.0334966 , 0.47473493, 0.89216888, 0.79920142, 0.21185361]]),
 array([[0.94771751, 0.68469799, 0.1752419 , 0.65394532],
        [0.41925688, 0.06778615, 0.43341854, 0.56638286],
        [0.14866472, 0.68260659, 0.42323183, 0.28512791]])]

In [186]:
print('Neuronas de salida | Neuronas de entrada (con bias)')
for tetas in TETAS:
    print(tetas.shape)

Neuronas de salida | Neuronas de entrada (con bias)
(4, 4)
(3, 5)
(3, 4)


In [187]:
z0 = transformar_bias(X,1).T
z0

array([[1., 1., 1., 1., 1.],
       [1., 4., 7., 1., 0.],
       [2., 5., 8., 4., 0.],
       [3., 6., 9., 7., 0.]])

In [189]:
TETAS[0].shape, TETAS[1].shape

((4, 4), (3, 5))

In [190]:
z1 = transformar_bias(np.matmul(TETAS[0], z0),1).T
z1

array([[1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00],
       [2.15285544e+00, 6.22609247e+00, 1.02993295e+01, 3.72326549e+00,
        9.90474421e-03],
       [3.80014913e+00, 7.72348214e+00, 1.16468152e+01, 7.00235280e+00,
        8.91269631e-01],
       [4.08771492e+00, 7.91862119e+00, 1.17495275e+01, 8.00637320e+00,
        8.51417028e-01],
       [4.86166252e+00, 1.10450602e+01, 1.72284580e+01, 9.40548545e+00,
        5.28618475e-01]])

In [191]:
z2 = transformar_bias(np.matmul(TETAS[1], z1),1).T
z2

array([[ 1.        ,  1.        ,  1.        ,  1.        ,  1.        ],
       [ 5.14492184, 10.49638115, 15.84784046,  9.4603066 ,  1.2034097 ],
       [ 6.58017348, 12.79141164, 19.0026498 , 11.60737683,  1.99615908],
       [ 8.7427754 , 18.54839975, 28.35402411, 16.4396329 ,  1.62580519]])

In [192]:
#z3 = transformar_arreglo(np.matmul(TETAS[2], z2),1).T
# la ultima capa no se le agrega bias porque es la salida, ya no sera entrada de ninguna otra capa mas.
z3 = np.matmul(TETAS[2], z2)
z3

array([[11.34085437, 22.50579921, 33.67074404, 20.2098903 ,  3.18468813],
       [ 8.57173863, 17.18029678, 25.78885494, 15.40251324,  2.28683194],
       [ 8.9383704 , 18.01596266, 27.09355491, 16.20634184,  2.2785206 ]])

In [421]:
def feed_forward(T, X):
    """ Voy a devolver una matriz con los valores de activación para cada capa."""
    # X no tiene que entrar con bias.
    A = [X] # z0 es igual al vector de entradas
    for i in range(len(TETAS)):
        A.append(
            # h_teta es la fx de activacion; sigmoide.
            h_teta(
                T[i],
                A[i] # en h_teta le agrego el bias a la matriz
            )
        )
    return A

In [256]:
A = feed_forward(TETAS, X, Y)
A

capa  0
capa  1
capa  2


[array([[1, 4, 7, 1, 0],
        [2, 5, 8, 4, 0],
        [3, 6, 9, 7, 0]]),
 array([[0.89593531, 0.99802674, 0.99996635, 0.97641474, 0.50247617],
        [0.97812192, 0.99955788, 0.99999125, 0.99909109, 0.70915211],
        [0.98349931, 0.99963623, 0.99999211, 0.99966678, 0.70086431],
        [0.9923218 , 0.99998403, 0.99999997, 0.99991774, 0.62916084]]),
 array([[0.8472152 , 0.85462628, 0.85476926, 0.85347694, 0.78258099],
        [0.90932446, 0.91226226, 0.91231815, 0.91203439, 0.86673235],
        [0.91114687, 0.91757383, 0.9176944 , 0.91676354, 0.83175893]]),
 array([[0.90745895, 0.90827799, 0.90829353, 0.90816489, 0.89838083],
        [0.80009087, 0.80095569, 0.80097198, 0.80085434, 0.78902476],
        [0.79764303, 0.79895287, 0.79897786, 0.79877419, 0.78361287]])]

In [254]:
A[3]

array([[0.90745895, 0.90827799, 0.90829353, 0.90816489, 0.89838083],
       [0.80009087, 0.80095569, 0.80097198, 0.80085434, 0.78902476],
       [0.79764303, 0.79895287, 0.79897786, 0.79877419, 0.78361287]])

In [260]:
n_obs = X.shape[1]
n_obs

5

In [357]:
TETAS

[array([[0.00990474, 0.64827585, 0.63373462, 0.0757352 ],
        [0.89126963, 0.37618299, 0.26208752, 0.66950716],
        [0.85141703, 0.27772932, 0.03914973, 0.96008971],
        [0.52861848, 0.635508  , 0.57933768, 0.8462869 ]]),
 array([[0.41818099, 0.42775091, 0.00427627, 0.89723039, 0.02509069],
        [0.87210105, 0.11351693, 0.82855083, 0.32620398, 0.20191474],
        [0.0334966 , 0.47473493, 0.89216888, 0.79920142, 0.21185361]]),
 array([[0.94771751, 0.68469799, 0.1752419 , 0.65394532],
        [0.41925688, 0.06778615, 0.43341854, 0.56638286],
        [0.14866472, 0.68260659, 0.42323183, 0.28512791]])]

In [358]:
# flatten all arrays
flat = np.concatenate([TETAS[i].flatten() for i in range(len(TETAS))])
flat

array([0.00990474, 0.64827585, 0.63373462, 0.0757352 , 0.89126963,
       0.37618299, 0.26208752, 0.66950716, 0.85141703, 0.27772932,
       0.03914973, 0.96008971, 0.52861848, 0.635508  , 0.57933768,
       0.8462869 , 0.41818099, 0.42775091, 0.00427627, 0.89723039,
       0.02509069, 0.87210105, 0.11351693, 0.82855083, 0.32620398,
       0.20191474, 0.0334966 , 0.47473493, 0.89216888, 0.79920142,
       0.21185361, 0.94771751, 0.68469799, 0.1752419 , 0.65394532,
       0.41925688, 0.06778615, 0.43341854, 0.56638286, 0.14866472,
       0.68260659, 0.42323183, 0.28512791])

In [363]:
# construir shapes de cada matriz de tetas
shapes = [(NN_ARCH[i+1], NN_ARCH[i]+1) for i in range(len(NN_ARCH)-1)]
shapes

[(4, 4), (3, 5), (3, 4)]

In [369]:
block_size = [np.product(shape) for shape in shapes]
block_size

[16, 15, 12]

In [368]:
accum_block_size = np.zeros(len(NN_ARCH), dtype=int)
accum_block_size

array([0, 0, 0, 0])

In [370]:
for i in range(len(block_size)):
    accum_block_size[i+1] = accum_block_size[i] + block_size[i]
accum_block_size

array([ 0, 16, 31, 43])

In [371]:
transform_tetas = [
    flat[accum_block_size[i]:accum_block_size[i+1]].reshape(*shapes[i])
    for i in range(len(shapes))
]

In [372]:
transform_tetas

[array([[0.00990474, 0.64827585, 0.63373462, 0.0757352 ],
        [0.89126963, 0.37618299, 0.26208752, 0.66950716],
        [0.85141703, 0.27772932, 0.03914973, 0.96008971],
        [0.52861848, 0.635508  , 0.57933768, 0.8462869 ]]),
 array([[0.41818099, 0.42775091, 0.00427627, 0.89723039, 0.02509069],
        [0.87210105, 0.11351693, 0.82855083, 0.32620398, 0.20191474],
        [0.0334966 , 0.47473493, 0.89216888, 0.79920142, 0.21185361]]),
 array([[0.94771751, 0.68469799, 0.1752419 , 0.65394532],
        [0.41925688, 0.06778615, 0.43341854, 0.56638286],
        [0.14866472, 0.68260659, 0.42323183, 0.28512791]])]

In [377]:
# fx para aplanar matrices de tetas
flat_tetas = lambda tetas_matrixes : np.concatenate(
    [
        tetas_matrixes[i].flatten()
        for i in range(len(tetas_matrixes))
    ]
)

In [378]:
flat_tetas(TETAS)

array([0.00990474, 0.64827585, 0.63373462, 0.0757352 , 0.89126963,
       0.37618299, 0.26208752, 0.66950716, 0.85141703, 0.27772932,
       0.03914973, 0.96008971, 0.52861848, 0.635508  , 0.57933768,
       0.8462869 , 0.41818099, 0.42775091, 0.00427627, 0.89723039,
       0.02509069, 0.87210105, 0.11351693, 0.82855083, 0.32620398,
       0.20191474, 0.0334966 , 0.47473493, 0.89216888, 0.79920142,
       0.21185361, 0.94771751, 0.68469799, 0.1752419 , 0.65394532,
       0.41925688, 0.06778615, 0.43341854, 0.56638286, 0.14866472,
       0.68260659, 0.42323183, 0.28512791])

In [379]:
def build_tetas_matrix(flat_tetas_array, nn_arch):
    """ Transformo un arreglo de tetas, en un arreglo de matrices de tetas.
    
    flat_tetas_array: array unidimensional con los valores de tetas.
    nn_arch: Architectura de la red neuronal, es un arreglo con la cantidad de neuronas en cada capa.
    
    Retorno un arreglo de matrices de tetas.
    """
    layers = len(nn_arch)
    
    # construyo los shapes utilizando la arquitectura de la Red Neuronal.
    # Shape = (Cantidad neuronas de salida, Cantidad neuronas entrada + 1) por el bias 
    shapes = [
        (
            nn_arch[i+1], # neuronas de salida en la sig. capa
            nn_arch[i]+1 # neuronas de entrada mas neurona del bias
        ) for i in range(layers - 1)
    ]
    
    # Calculo el tamaño de cada matriz en el arreglo
    block_size = [
        np.product(shape)
        for shape in shapes
    ]
    
    accum_block_size = np.zeros(len(NN_ARCH), dtype=int) # indices para cada particion del flat array
    
    for i in range(len(block_size)):
        accum_block_size[i+1] = accum_block_size[i] + block_size[i] # Calculo la suma acumulada de los indices
    
    return [
        flat_tetas_array[
            accum_block_size[i] : accum_block_size[i+1]
        ].reshape(*shapes[i]) # reshapeo cada segmento del flat array a su shape correspondiente
        for i in range(len(shapes))
    ]
    

In [380]:
build_tetas_matrix(flat, NN_ARCH)

[array([[0.00990474, 0.64827585, 0.63373462, 0.0757352 ],
        [0.89126963, 0.37618299, 0.26208752, 0.66950716],
        [0.85141703, 0.27772932, 0.03914973, 0.96008971],
        [0.52861848, 0.635508  , 0.57933768, 0.8462869 ]]),
 array([[0.41818099, 0.42775091, 0.00427627, 0.89723039, 0.02509069],
        [0.87210105, 0.11351693, 0.82855083, 0.32620398, 0.20191474],
        [0.0334966 , 0.47473493, 0.89216888, 0.79920142, 0.21185361]]),
 array([[0.94771751, 0.68469799, 0.1752419 , 0.65394532],
        [0.41925688, 0.06778615, 0.43341854, 0.56638286],
        [0.14866472, 0.68260659, 0.42323183, 0.28512791]])]

In [350]:
t = []
for i in range(len(TETAS)): # parecido a inflate matrix
    print('Shape ', TETAS[i].shape)
    t = np.append(t, TETAS[i])
    #print(TETAS[i].flatten().reshape(
    #    NN_ARCH[i+1],
    #    NN_ARCH[i]+1))
t



Shape  (4, 4)
Shape  (3, 5)
Shape  (3, 4)


array([0.00990474, 0.64827585, 0.63373462, 0.0757352 , 0.89126963,
       0.37618299, 0.26208752, 0.66950716, 0.85141703, 0.27772932,
       0.03914973, 0.96008971, 0.52861848, 0.635508  , 0.57933768,
       0.8462869 , 0.41818099, 0.42775091, 0.00427627, 0.89723039,
       0.02509069, 0.87210105, 0.11351693, 0.82855083, 0.32620398,
       0.20191474, 0.0334966 , 0.47473493, 0.89216888, 0.79920142,
       0.21185361, 0.94771751, 0.68469799, 0.1752419 , 0.65394532,
       0.41925688, 0.06778615, 0.43341854, 0.56638286, 0.14866472,
       0.68260659, 0.42323183, 0.28512791])

In [275]:
y = np.random.randint(0,10, 20).reshape(20,1)
y

array([[5],
       [5],
       [6],
       [3],
       [4],
       [0],
       [7],
       [2],
       [2],
       [0],
       [7],
       [0],
       [6],
       [4],
       [2],
       [3],
       [1],
       [8],
       [7],
       [1]])

In [276]:
Y = (y == np.arange(10)).astype(int)
Y.transpose()

array([[0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
       [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [277]:
X.shape

(3, 5)

In [300]:
A[-1]

array([[0.90745895, 0.90827799, 0.90829353, 0.90816489, 0.89838083],
       [0.80009087, 0.80095569, 0.80097198, 0.80085434, 0.78902476],
       [0.79764303, 0.79895287, 0.79897786, 0.79877419, 0.78361287]])

In [294]:
NN_LAYERS = len(NN_ARCH)
NN_LAYERS

4

In [299]:
Y

array([[1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0]])

In [390]:
d = [*range(NN_LAYERS-1), A[-1] - Y]
d

[0,
 1,
 2,
 array([[-0.09254105,  0.90827799,  0.90829353,  0.90816489,  0.89838083],
        [ 0.80009087, -0.19904431,  0.80097198,  0.80085434,  0.78902476],
        [ 0.79764303,  0.79895287, -0.20102214,  0.79877419,  0.78361287]])]

In [320]:
TETAS[2][:,1:].shape

(3, 3)

In [327]:
A[2].shape

(3, 5)

In [437]:
for i in range(NN_LAYERS-2, -1, -1):
    print(i)

2
1
0


In [342]:
for i in range(NN_LAYERS-2, 0, -1):
    print('Shape TETAS[i][:,1:] ', TETAS[i][:, 1:].T.shape)
    print('Shape Delta[i+1] ', D[i+1].shape)
    print('Shape A[i] ', A[i].shape)
    print('\n')
    D[i] = np.matmul(
        TETAS[i][:, 1:].T, # (3, 3) Excluyo la neurona del bias.
        D[i+1] # (3, m) m -> cantidad obs.
    ) * A[i] * (1 - A[i])

Shape TETAS[i][:,1:]  (3, 3)
Shape Delta[i+1]  (3, 5)
Shape A[i]  (3, 5)


Shape TETAS[i][:,1:]  (4, 3)
Shape Delta[i+1]  (3, 5)
Shape A[i]  (4, 5)




In [389]:
D

[0, 0, 0]

In [307]:
TETAS[0][:,1:]

array([[0.64827585, 0.63373462, 0.0757352 ],
       [0.37618299, 0.26208752, 0.66950716],
       [0.27772932, 0.03914973, 0.96008971],
       [0.635508  , 0.57933768, 0.8462869 ]])

In [381]:
D = [0 for teta in tetas]
D

[0, 0, 0]

In [387]:
TETAS

[array([[0.00990474, 0.64827585, 0.63373462, 0.0757352 ],
        [0.89126963, 0.37618299, 0.26208752, 0.66950716],
        [0.85141703, 0.27772932, 0.03914973, 0.96008971],
        [0.52861848, 0.635508  , 0.57933768, 0.8462869 ]]),
 array([[0.41818099, 0.42775091, 0.00427627, 0.89723039, 0.02509069],
        [0.87210105, 0.11351693, 0.82855083, 0.32620398, 0.20191474],
        [0.0334966 , 0.47473493, 0.89216888, 0.79920142, 0.21185361]]),
 array([[0.94771751, 0.68469799, 0.1752419 , 0.65394532],
        [0.41925688, 0.06778615, 0.43341854, 0.56638286],
        [0.14866472, 0.68260659, 0.42323183, 0.28512791]])]

In [408]:
np.vstack((np.ones((1,m), float),A[2])).shape

(4, 5)

In [404]:
m = X.shape[1]
m

5

In [393]:
d[3].shape, A[2].shape, TETAS[2].shape

((3, 5), (3, 5), (3, 4))

In [410]:
np.matmul(
            d[3], # diferencia capa siguiente (3,5)
            np.vstack((np.ones((1,m), float), A[2])).T # ()
        )

array([[3.53057619, 2.952371  , 3.18002391, 3.16243826],
       [2.99189764, 2.49337336, 2.69098286, 2.6718824 ],
       [2.97796082, 2.48172977, 2.67846689, 2.65945536]])

In [415]:
d

[0,
 1,
 2,
 array([[-0.09254105,  0.90827799,  0.90829353,  0.90816489,  0.89838083],
        [ 0.80009087, -0.19904431,  0.80097198,  0.80085434,  0.78902476],
        [ 0.79764303,  0.79895287, -0.20102214,  0.79877419,  0.78361287]])]

In [414]:
np.divide(d, m)

array([0.0, 0.2, 0.4,
       array([[-0.01850821,  0.1816556 ,  0.18165871,  0.18163298,  0.17967617],
       [ 0.16001817, -0.03980886,  0.1601944 ,  0.16017087,  0.15780495],
       [ 0.15952861,  0.15979057, -0.04020443,  0.15975484,  0.15672257]])],
      dtype=object)

In [453]:
layers = len(NN_ARCH)
d = [*range(layers - 1), A[-1] - Y] # -1 porque no tomo en cuenta la primera capa
d

[0,
 1,
 2,
 array([[-0.09254105,  0.90827799,  0.90829353,  0.90816489,  0.89838083],
        [ 0.80009087, -0.19904431,  0.80097198,  0.80085434,  0.78902476],
        [ 0.79764303,  0.79895287, -0.20102214,  0.79877419,  0.78361287]])]

In [460]:
def back_propagation(T, nn_arch, X, Y):
    """Calculo la matriz del gradiente para las tetas de cada capa de la Red Neuronal.
    
    T: Arrgelo flat de todas las tetas de la Red Neuronal.
    nn_arch: Arreglo de la cantidad de neuronas por capa.
    
    Retorno el gradiente para cada capa
    """
    #Paso 1
    D = [0.0 for teta in tetas] # Gradiente
    
    m = X.shape[1] # [1] porque cada columna es una observacion.
    layers = len(nn_arch)
    TETAS = build_tetas_matrix(T, nn_arch)
    
    # Paso 2.2
    A = feed_forward(TETAS, X) # valores de activacion para cada neurona de cada capa
    
    # Calculo el error entre la hipotesis de la NN y los valores actuales.
    d = [*range(layers - 1), A[-1] - Y] # -1 porque no tomo en cuenta la primera capa
    
    for l in range(layers-2, -1, -1): # -2 por capa de salida ya calculada, y capa de entrada. Solo para transitions.
        #print('Shape TETAS[l][:,1:] ', TETAS[l][:, 1:].T.shape)
        #print('Shape Delta[l+1] ', D[l+1].shape)
        #print('Shape A[l] ', A[l].shape)
        #print('\n')
        
        # Paso 2.4
        d[l] = np.matmul(
            TETAS[l][:, 1:].T, # (3, 3) Excluyo la neurona del bias.
            d[l+1] # (3, m) cantidad de neuronas, m -> cantidad obs.
        ) * A[l] * (1 - A[l])
        
        # Paso 2.5 y 2.6
        D[l] += np.matmul(
            d[l+1], # diferencia capa siguiente (3,5)
            np.vstack((np.ones((1,m), float), A[l])).T # ()
        )
        
    return np.divide(D, m) # Paso 3 retorno la matriz de gradientes
    

In [462]:
# Previo a cambios
back_propagation(flat, NN_ARCH, X, Y)

array([array([[1.08011471e-02, 1.80862741e-03, 4.60575958e-03, 7.40289175e-03],
       [1.02161033e-02, 4.42318820e-04, 9.15723448e-04, 1.38912808e-03],
       [1.53348720e-02, 4.60577264e-04, 9.11728106e-04, 1.36287895e-03],
       [2.92075022e-03, 3.67106846e-05, 7.44679322e-05, 1.12225180e-04]]),
       array([[0.12741184, 0.10481613, 0.11515646, 0.11491141, 0.11211483],
       [0.05708388, 0.0460527 , 0.05124362, 0.05115208, 0.04987841],
       [0.09038983, 0.07135019, 0.079909  , 0.0796833 , 0.07725646]]),
       array([[0.70611524, 0.5904742 , 0.63600478, 0.63248765],
       [0.59837953, 0.49867467, 0.53819657, 0.53437648],
       [0.59559216, 0.49634595, 0.53569338, 0.53189107]])], dtype=object)

In [None]:
# Luego de cambios
back_propagation(flat, NN_ARCH, X, Y)

In [461]:
for tetas_matrix in back_propagation(flat, NN_ARCH, X, Y):
    print(tetas_matrix.shape)

(4, 4)
(3, 5)
(3, 4)


In [441]:
for tetas_matrix in TETAS:
    print(tetas_matrix.shape)

(4, 4)
(3, 5)
(3, 4)


In [424]:
TETAS -= back_propagation(flat, NN_ARCH, X, Y)

array([[0.70611524, 0.5904742 , 0.63600478, 0.63248765],
       [0.59837953, 0.49867467, 0.53819657, 0.53437648],
       [0.59559216, 0.49634595, 0.53569338, 0.53189107]])

# Resumen de funciones

In [None]:
# fx para aplanar matrices de tetas
flattenize_tetas = lambda tetas_matrixes : np.concatenate(
    [
        tetas_matrixes[i].flatten()
        for i in range(len(tetas_matrixes))
    ]
)

In [None]:
def build_tetas_matrix(flat_tetas_array, nn_arch):
    """ Transformo un arreglo de tetas, en un arreglo de matrices de tetas.
    
    flat_tetas_array: array unidimensional con los valores de tetas.
    nn_arch: Architectura de la red neuronal, es un arreglo con la cantidad de neuronas en cada capa.
    
    Retorno un arreglo de matrices de tetas.
    """
    layers = len(nn_arch)
    
    # construyo los shapes utilizando la arquitectura de la Red Neuronal.
    # Shape = (Cantidad neuronas de salida, Cantidad neuronas entrada + 1) por el bias 
    shapes = [
        (
            nn_arch[i+1], # neuronas de salida en la sig. capa
            nn_arch[i]+1 # neuronas de entrada mas neurona del bias
        ) for i in range(layers - 1)
    ]
    
    # Calculo el tamaño de cada matriz en el arreglo
    block_size = [
        np.product(shape)
        for shape in shapes
    ]
    
    accum_block_size = np.zeros(len(NN_ARCH), dtype=int) # indices para cada particion del flat array
    
    for i in range(len(block_size)):
        accum_block_size[i+1] = accum_block_size[i] + block_size[i] # Calculo la suma acumulada de los indices
    
    return [
        flat_tetas_array[
            accum_block_size[i] : accum_block_size[i+1]
        ].reshape(*shapes[i]) # reshapeo cada segmento del flat array a su shape correspondiente
        for i in range(len(shapes))
    ]
    

In [None]:
def feed_forward(T, X):
    """ Voy a devolver una matriz con los valores de activación para cada capa."""
    # X no tiene que entrar con bias.
    A = [X] # a0 = z0 El vector de activacion en la capa 0 es igual al vector de entradas
    for i in range(len(TETAS)):
        A.append(
            # h_teta es la fx de activacion; sigmoide.
            h_teta(
                T[i],
                A[i] # en h_teta le agrego el bias a la matriz de activaciones
            )
        )
    return A

In [None]:
def back_propagation(T, nn_arch, X, Y):
    """Calculo la matriz del gradiente para las tetas de cada capa de la Red Neuronal.
    
    T: Arrgelo flat de todas las tetas de la Red Neuronal.
    nn_arch: Arreglo de la cantidad de neuronas por capa.
    
    Retorno el gradiente para las tetas de cada capa.
    """
    #Paso 1
    D = [0.0 for teta in tetas] # Gradiente
    
    m = X.shape[1] # [1] porque cada columna es una observacion.
    layers = len(nn_arch)
    TETAS = build_tetas_matrix(T, nn_arch)
    
    # Paso 2.2
    A = feed_forward(TETAS, X) # valores de activacion para cada neurona de cada capa
    
    # Calculo el error entre la hipotesis de la NN y los valores actuales.
    d = [*range(layers - 1), A[-1] - Y] # -1 porque no tomo en cuenta la primera capa
    
    for l in range(layers-2, 0, -1): # -2 por capa de salida ya calculada, y capa de entrada. Solo para capas intermedias.
        #print('Shape TETAS[l][:,1:] ', TETAS[l][:, 1:].T.shape)
        #print('Shape Delta[l+1] ', D[l+1].shape)
        #print('Shape A[l] ', A[l].shape)
        #print('\n')
        
        # Paso 2.4
        d[l] = np.matmul(
            TETAS[l][:, 1:].T, # (3, 3) Excluyo la neurona del bias.
            d[l+1] # (3, m) cantidad de neuronas, m -> cantidad obs.
        ) * A[l] * (1 - A[l])
        
        # Paso 2.5 y 2.6
        D[l] += np.matmul(
            d[l+1], # diferencia capa siguiente (3,5)
            np.vstack((np.ones((1,m), float), A[l])).T # ()
        )
        
    return np.divide(D, m) # Paso 3 retorno la matriz de gradientes