# Feed Forward Multiclass Classification Neural Networks

In [1]:
from typing import List, Callable, Any # para docstrings y typing
import numpy as np

In [212]:
# funcion sigmoide
def h_teta(matriz_t, matriz_x): # esta funcion es sigmoide(h_zeta)
    #zeta_teta = np.matmul(vector_t.T, vector_x)
    zeta_teta = np.matmul(matriz_t, matriz_x)
    #print('Zeta teta: ', zeta_teta) # si zeta >= 0; entonces clase positiva.
    #[5,4,-1] = 5 + 4x1-x2 = 0
    # x2 =  5 + 4x1

    return ((1 + np.power(np.e, -1*zeta_teta))**-1) # (n,1)

def jota_teta(x, y, hipotesis, m, tetas):
    #h = hipotesis(tetas, x)
    #print('hipotesis: {}\n'.format(-np.log(h)))
    #print('y shape: {}\n'.format(y.shape))
    #print('h shape: {}\n'.format(h.shape))
    return (-1/float(m)) * (np.matmul(y.T, np.log(hipotesis) + np.matmul((1-y.T), np.log(1 - hipotesis))))

def gradiente(
        x,
        y,
        h,
        m,
        tetas,
    ):
    return (((np.matmul((h - y).T, x).T) / float(m)))

def descenso_gradiente(
        x_set: List[List[float]],
        y_set: List[float],
        #tetas_iniciales: List[float],
        hipotesis: Callable[[Any], Any],#[[List[float], List[float]], List[float]], # Callable[[parametros], resultado]
        gradiente: Callable[[Any], Any],#[[List[float], List[float], List[float], float], List[float]], # Callable[[parametros], resultado]
        max_iters: int = 10000,
        alpha: float = 0.0001,
        _lambda: float = 0.0,
        grado: int = 1
    ) -> List[float]:
    
    """Esta función ejecuta el descenso en gradiente para encontrar las tetas que minimizan el costo."""
    
    unos = np.ones(x_set.shape[1]) # [1] ya que X viene en formato de filas, por lo que cada columna es una observacion.
    
    X = transformar_arreglo(x_set, grado)
    
    m, n = X.shape
    #y_set = y_set.reshape(m,1) # convertir a vector columna.
    #print(y_set[-10:])
    tetas = np.random.rand(n,1)

    for i in range(max_iters):
        h = hipotesis(X, tetas) # vector solucion (100,1)
        #print((h - ys).shape) # (100,1) - (100,1)
        tetas -= alpha * gradiente(X, y_set, h, m, tetas) 
    
    #costo = jota_teta(y_set, h, m)
    #y_pred = np.matmul(X,tetas)

    #return y_pred, tetas, costo.sum()
    return tetas # retorno X, ya que incluye la col de uno's, la cual será útil en cross validation.

def cross_validate(x_train, y_train, x_test, y_test, tetas):
    """ Calculo la validación cruzada para los tetas resultantes del train set sobre el test set."""
    
    m = x_train.shape[0]
    h_train = h_teta(x_train, tetas)
    h_test = h_teta(x_test, tetas)
    costo_train = jota_teta(x_train, y_train, h_train, m, tetas)
    costo_test = jota_teta(x_train, y_test, h_test, m, tetas)
    
    return [(costo_train, costo_test), (h_train, h_test)]

def transformar_bias(x_set, grado):
    
    unos = np.ones(x_set.shape[1]) # [1] ya que X viene en formato de filas, por lo que cada columna es una observacion.
    
    if grado == 1:
        X = np.vstack(
            (
            unos,
            x_set,
            #-x_set**2
            )
        ).T # Se transpuso la matriz para tener la columna de unos y asi calcular teta_0
    elif grado == 2:
        X = np.vstack(
            (
            unos,
            x_set,
            x_set**2
            )
        ).T
    elif grado == -2:
        X = np.vstack(
            (
            unos,
            x_set,
            -x_set**2
            )
        ).T
        
    return X

In [184]:
#X = np.array([[1,2,3],[4,5,6],[7,8,9], [1,4,7]])
X = np.array([
    [1,4,7,1,0], # cada fila son los valores para todas las observaciones de una feature
    [2,5,8,4,0],
    [3,6,9,7,0],
])
#Y = np.array([[1,0,0], [0,1,0],[0,0,1], [0,0,0]])
Y = np.array([
    [1,0,0,0,0], # cada fila son los valores para todas las observaciones de una feature
    [0,1,0,0,0],
    [0,0,1,0,0],
])
#X = np.array([[1],[2],[3]])
#Y = np.array([[1,0,0]])

In [174]:
X.shape # 4 observaciones, 3 features.
Y.shape

(3, 5)

In [175]:
NN_HIDDEN_LAYERS = 2
NN_ARCH = [X.shape[0], 4, 3, Y.shape[0]]
NN_ARCH # Cuantas neuronas deseo que tenga cada capa.

[3, 4, 3, 3]

In [162]:
np.random.rand(4,4)

array([[0.30975814, 0.7022761 , 0.67219334, 0.16710222],
       [0.29845537, 0.84663045, 0.5778933 , 0.56890888],
       [0.24952919, 0.34340312, 0.44557254, 0.80503592],
       [0.90758975, 0.15634901, 0.34210286, 0.34536396]])

In [185]:
TETAS = [
    np.random.rand(
        NN_ARCH[i+1], # cantidad de neuronas en siguiente capa.
        NN_ARCH[i]+1 # + 1 por la neurona del bias
    ) for i in range(len(NN_ARCH)-1) # No considero la última capa.
]
TETAS

[array([[0.00990474, 0.64827585, 0.63373462, 0.0757352 ],
        [0.89126963, 0.37618299, 0.26208752, 0.66950716],
        [0.85141703, 0.27772932, 0.03914973, 0.96008971],
        [0.52861848, 0.635508  , 0.57933768, 0.8462869 ]]),
 array([[0.41818099, 0.42775091, 0.00427627, 0.89723039, 0.02509069],
        [0.87210105, 0.11351693, 0.82855083, 0.32620398, 0.20191474],
        [0.0334966 , 0.47473493, 0.89216888, 0.79920142, 0.21185361]]),
 array([[0.94771751, 0.68469799, 0.1752419 , 0.65394532],
        [0.41925688, 0.06778615, 0.43341854, 0.56638286],
        [0.14866472, 0.68260659, 0.42323183, 0.28512791]])]

In [186]:
print('Neuronas de salida | Neuronas de entrada (con bias)')
for tetas in TETAS:
    print(tetas.shape)

Neuronas de salida | Neuronas de entrada (con bias)
(4, 4)
(3, 5)
(3, 4)


In [187]:
z0 = transformar_bias(X,1).T
z0

array([[1., 1., 1., 1., 1.],
       [1., 4., 7., 1., 0.],
       [2., 5., 8., 4., 0.],
       [3., 6., 9., 7., 0.]])

In [189]:
TETAS[0].shape, TETAS[1].shape

((4, 4), (3, 5))

In [190]:
z1 = transformar_bias(np.matmul(TETAS[0], z0),1).T
z1

array([[1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
        1.00000000e+00],
       [2.15285544e+00, 6.22609247e+00, 1.02993295e+01, 3.72326549e+00,
        9.90474421e-03],
       [3.80014913e+00, 7.72348214e+00, 1.16468152e+01, 7.00235280e+00,
        8.91269631e-01],
       [4.08771492e+00, 7.91862119e+00, 1.17495275e+01, 8.00637320e+00,
        8.51417028e-01],
       [4.86166252e+00, 1.10450602e+01, 1.72284580e+01, 9.40548545e+00,
        5.28618475e-01]])

In [191]:
z2 = transformar_bias(np.matmul(TETAS[1], z1),1).T
z2

array([[ 1.        ,  1.        ,  1.        ,  1.        ,  1.        ],
       [ 5.14492184, 10.49638115, 15.84784046,  9.4603066 ,  1.2034097 ],
       [ 6.58017348, 12.79141164, 19.0026498 , 11.60737683,  1.99615908],
       [ 8.7427754 , 18.54839975, 28.35402411, 16.4396329 ,  1.62580519]])

In [192]:
#z3 = transformar_arreglo(np.matmul(TETAS[2], z2),1).T
# la ultima capa no se le agrega bias porque es la salida, ya no sera entrada de ninguna otra capa mas.
z3 = np.matmul(TETAS[2], z2)
z3

array([[11.34085437, 22.50579921, 33.67074404, 20.2098903 ,  3.18468813],
       [ 8.57173863, 17.18029678, 25.78885494, 15.40251324,  2.28683194],
       [ 8.9383704 , 18.01596266, 27.09355491, 16.20634184,  2.2785206 ]])

In [227]:
def feed_forward(T, X, Y):
    """ Voy a devolver una matriz con los valores de activación para cada capa."""
    # X no tiene que entrar con bias.
    A = [X] # z1 es igual al vector de entradas
    for i in range(len(TETAS)):
        A.append(
            h_teta(
                T[i],
                A[i]
                
            )
        )
    return A
    

In [228]:
feed_forward(TETAS, X, Y)

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 5 is different from 4)

In [232]:
input_x = np.matmul(TETAS[0], transformar_bias(X,1).T)
input_x

array([[2.15285544e+00, 6.22609247e+00, 1.02993295e+01, 3.72326549e+00,
        9.90474421e-03],
       [3.80014913e+00, 7.72348214e+00, 1.16468152e+01, 7.00235280e+00,
        8.91269631e-01],
       [4.08771492e+00, 7.91862119e+00, 1.17495275e+01, 8.00637320e+00,
        8.51417028e-01],
       [4.86166252e+00, 1.10450602e+01, 1.72284580e+01, 9.40548545e+00,
        5.28618475e-01]])

In [234]:
h_z1 = np.matmul(TETAS[0], input_x)
h_z1

array([[ 5.44359386, 10.92341909, 16.40324432, 10.36257645,  1.15749416],
       [ 7.67458307, 17.92468844, 28.17479382, 14.34800981,  0.92116791],
       [ 7.71605569, 18.36029911, 29.00454253, 14.45836742,  0.79681868],
       [10.03559289, 22.13440756, 34.23322223, 19.01637089,  1.51226567]])

In [235]:
((1 + np.power(np.e, -1*h_z1))**-1)

array([[0.9956947 , 0.99998197, 0.99999992, 0.99996841, 0.76087709],
       [0.99953573, 0.99999998, 1.        , 0.99999941, 0.71528001],
       [0.99955458, 0.99999999, 1.        , 0.99999947, 0.68929355],
       [0.99995619, 1.        , 1.        , 0.99999999, 0.81939674]])