In [1]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import numpy as np
import numpy.random
import math
import sys

In [2]:
np.random.seed(12)

In [3]:
def make_classification(r0=1,r1=3,k=1000):
    """
    Creaci ́on de los datos
    """
    X1 = [np.array([r0*np.cos(t),r0*np.sin(t)]) for t in range(0,k)]
    X2 = [np.array([r1*np.cos(t),r1*np.sin(t)]) for t in range(0,k)]
    X = np.concatenate((X1,X2))
    n,d = X.shape
    Y = np.zeros((2*k,2))
    Y[0:k] += [1,0]
    Y[k:] += [0,1]
    noise = np.array([np.random.normal(0,1,2) for i in range(n)])
    X += 0.5*noise
    return X,Y

In [4]:
X, Y = make_classification(k=10)
x_train, x_eval, y_train, y_eval = train_test_split(X, Y, test_size=0.3)
x_train,y_train


(array([[ 0.3607577 ,  0.53242367],
        [-1.82562063,  1.62122554],
        [-0.93510202,  2.6111466 ],
        [-1.42589537,  1.01701671],
        [ 1.42508044,  0.5959117 ],
        [-0.98742896,  0.08100617],
        [ 0.5867844 , -2.89637741],
        [ 2.13475472,  2.00758583],
        [-0.01524927, -0.72269578],
        [ 2.98799883, -1.0304259 ],
        [ 1.23649292, -0.34071294],
        [-1.05713456,  0.6791072 ],
        [ 1.50814835, -0.8869999 ],
        [-0.03957542,  0.14193676]]),
 array([[1., 0.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [1., 0.],
        [1., 0.],
        [1., 0.],
        [1., 0.]]))

In [5]:
class Node():
    """Nodo super clase con funciones generales"""
    def __init__(self, values):
    # Agrega los par ́ametros necesarios
        self.values = values
        self.grads = None
        return
        
    def __call__(self, *kwargs):
        return self.forward(*kwargs)

    def __str__(self):
        return str(self.values) #Valor n ́um del nodo
        
    def backward(self, consumer_grad=1):
        self.grads = consumer_grad
# Agrega otros m ́etodos necesarios

In [139]:
##Clases basicas

class PreActivation(Node):
    # Pre-activación wx+b
    def __init__(self, input_size, output_size, parent = None):
        # Generamos una matriz aleatoria de tamaño input x output 
        # Y la trasponemos para usarla como matriz
        self.w = np.random.uniform(0,1,(input_size, output_size)).T
        
        # Generamos el vector de sesgo del tamaño de salida
        self.b = np.random.uniform(0,1, output_size).T # Bias
        print(self.w)
        print(self.b)
        # Guardamos estos parámetros
        self.input_size = input_size
        self.output_size = output_size
        self.parent = parent

        return None

    def forward(self):
        self.values = np.dot(self.w, self.parent.values) + self.b
        
        return self

    def backward(self, consumer_grad=1):
        # La función backward recibe el gradiente de los nodos hijos y regresa el gradiente
        self.grad_w = np.dot(consumer_grad, self.values.T)
        self.grad_b = consumer_grad

        self.update()
        return self  

    def update(self):
        
        return

In [7]:
class Tanh(Node):
    # Activación ReLU

    def __init__(self, preactivation_node):
        # Usualmente un nodo de preactivación
        self.parent = preactivation_node
        return None

    def function(self, x):
        return np.tanh(x)

    def forward(self):
        self.values =  self.function(self.parent.values)

        return self

    def backward(self, consumer_grad):
        m = self.preactivation.output_size
        
        dh_da = np.zeros(m)
        for i in range(m):
            dh_da[i] = 1 - self.value[i]**2

        d_k = dh_da * consumer_grad
        self.grad = d_k
        print(f"Tanh d_k = {d_k}")
        # Calculamos la suma d_k+1 * W_q,i para pasarla al siguiente nodo
        n = self.preactivation.input_size
        d_kW = np.zeros(m)
        for i in range(n):
            for q in range(m):
                d_kW[q] += d_k[q] * self.preactivation.w[q][i]

        self.parent.backward(d_kW)

        return self
        

In [8]:
class ReLU(Node):
    # Activación ReLU

    def __init__(self, preactivation_node):
        # Nodo de preactivación
        self.parent = preactivation_node
        return None

    def function(self, x):
            return x * (x >= 0)

    def forward(self):
        self.values = self.function(self.parent.values)
        
        return self

    def backward(self, consumer_grad):
        m = self.preactivation.output_size

        dh_da = np.zeros(m)
        for i in range(m):
            # Calculamos la derivada de ReLu respecto a su preactivación a
            if self.preactivation.value[i] >= 0:
                dh_da[i] = 1
            else:
                dh_da[i] = 0
        
        d_k = dh_da * consumer_grad
        self.grad = d_k
        print(f"ReLu d_k = {d_k}")
        # Calculamos la suma d_k+1 * W_q,i para pasarla al siguiente nodo

        n = self.preactivation.input_size
        d_kW = np.zeros(n)
        for i in range(n):
            for q in range(m):
                d_kW[q] += d_k[q] * self.preactivation.w[q][i]
            
        self.parent.backward(d_kW)
        return self
        

In [140]:
def kronecker_delta(x,y):
    if x == y:
        return 1
    else:
        return 0


class Softmax(Node):
    # Activación ReLU

    def __init__(self, preactivation_node):
        # Nodo de preactivación
        self.parent = preactivation_node
        return None

    def function(self, x):
        # Recibimos sólo un arreglo x = [x_1, x_2] para evaluar en la softmax
        S = sum( [np.exp(x_i) for x_i in x] )
        return np.exp(x) / S

    def forward(self):
        # self.parent = x
        # Aplicamos softmax a cada renglón de la matriz de preactivación values
        # Esto nos da una matriz de n x m siendo n el número de datos y m las capas ocultas
        self.values = self.function(self.parent.values)
        return self

    def derivative(self, X):
        # En este caso X son todos los datos

        # Tomamos la dimensión del problema
        classes = len(X[0]) 

        # inicializamos el arreglo de derivadas
        derivatives = []

        # Calculamos la derivada de la softmax para cada valor s
        for s in X:
            DS = np.zeros((classes, classes)) # Inicializamos la matriz derivada
            for i in range(classes):
                for j in range(classes):
                    # Recordemos que s ya son softmax
                    DS[i][j] = s[i] * (kronecker_delta(i,j) - s[j])

            # Agregamos esta matriz al arreglo
            derivatives.append( DS )

        # Las derivadas es un arreglo de matrices cuadradas del tamaño de las salidas de la capa
        derivatives = np.array(derivatives) # Lo hacemos tipo numpy
        return derivatives


    def backward(self, consumer_grad):
        d_k = np.multiply(consumer_grad, self.derivative(self.values))
        

        # dh_da = self.derivative(self.values)
        # print(f"dh_da = {dh_da}")
    
        # d_k = []
        # for i in range(len(dh_da)):
        #     d_k.append( np.dot(dh_da[i], consumer_grad[i]) )

        # d_k = np.array(d_k)
        # # d_k = np.dot(dh_da.T, consumer_grad)
        # self.grad = d_k
        print(f"d_k = {d_k}")


        # Calculamos la suma d_k+1 * W_q,i para pasarla al siguiente nodo

        # n = self.parent.input_size
        # d_kW = np.zeros((n,len(d_k)))
        # for i in range(n):
        #     for q in range(len(d_k)):
        #         d_kW[q][:] += np.dote(d_k[q], self.parent.w[q][i])

        # print(d_kW)
        # self.parent.backward(d_kW)

        return self

In [141]:
class CrossEntropy(Node):
    # Error de clasificación binario
    def __init__(self, output_node, classes = [0,1]):
        self.parent = output_node
        self.classes = classes
        return None

    def forward(self, Y_real):
        # Definido por casos para evitar infinitos innecesarios
        epsilon = sys.float_info.epsilon

        self.real_outputs = Y_real
        add = 0
        
        # E = Σ_c y_c log(y_pred_c), donde c son las clases

        # Sumamos sobre todos los datos
        for y_pred, y_real in zip(self.parent.values, Y_real):
            # Sumamos sobre todas las clases
            for c in self.classes:
                add -= y_real[c]*np.log(y_pred[c] + epsilon)
        
        self.value = add
        
        return self
    
    def backward(self, consumer_grad = 1):
        # Esto hace la división -y_real/y_pred pero a cada valor,
        # Esto da una matriz de n x 2 donde n es el número de datos
        
        dL_df = - self.real_outputs / self.parent.values
        # dL_df = - self.real_outputs + self.parent.values

        self.grad = dL_df * consumer_grad
        # print(f"dL_df = {self.grad}")
        self.parent.backward(self.grad)
        return

In [142]:
# Arquitectura de la Red
initial_node = Node(x_train)

pre_tanh = PreActivation(2, 4, initial_node)
tanh_layer = Tanh( pre_tanh )
pre_relu = PreActivation(4, 3, tanh_layer)
relu_layer = ReLU(pre_relu)
pre_soft = PreActivation(3, 2, relu_layer)
softmax_layer = Softmax(pre_soft)
error = CrossEntropy(softmax_layer)


[[0.81465939 0.56870448]
 [0.84753015 0.36016433]
 [0.75472427 0.6222704 ]
 [0.20482329 0.9801781 ]]
[0.79918599 0.92758313 0.37418154 0.58505709]
[[1.31800684e-01 1.92974832e-01 1.47200029e-01 8.96373524e-01]
 [4.44618356e-04 9.03643223e-01 9.42721860e-01 1.44522347e-01]
 [7.07492575e-01 3.19596741e-01 1.90922764e-01 4.38372896e-01]]
[0.86187837 0.85397894 0.71046179]
[[0.91556179 0.80581815 0.62590033]
 [0.79514374 0.4433964  0.93225286]]
[0.16343504 0.7490742 ]


In [143]:
p_0 = pre_tanh()
l_0 = tanh_layer()
print(l_0)

p_1 = pre_relu()
l_1 = relu_layer()
print(l_1)

p_2 = pre_soft()
l_2= softmax_layer()
print(l_2)





error(y_train).value

[[ 0.88445618  0.89065725  0.75209704  0.82770959]
 [ 0.22974972 -0.03576252  0.00518195  0.94682851]
 [ 0.90910929  0.79152356  0.85998227  0.99456776]
 [ 0.2126535   0.08518005 -0.06900706  0.85908935]
 [ 0.98005848  0.98197369  0.94889249  0.89785563]
 [ 0.0408136   0.11931182 -0.31009211  0.431883  ]
 [-0.35396321  0.36420787 -0.75534591 -0.97235239]
 [ 0.99872843  0.99802595  0.99690383  0.99495607]
 [ 0.35902196  0.57460425 -0.0868205  -0.12576742]
 [ 0.99001446  0.99585845  0.96317663  0.18491594]
 [ 0.92356414  0.95201228  0.79883234  0.46553936]
 [ 0.31329304  0.26940292 -0.00107526  0.77557831]
 [ 0.90928356  0.95505074  0.74448373  0.02453819]
 [ 0.68984794  0.73758508  0.40752194  0.61447182]]
[[ 2.0029704   2.48784942  2.12729702]
 [ 1.73473306  0.96348755  1.27763173]
 [ 2.15253734  2.5240994   2.20679995]
 [ 1.66625096  0.99014901  1.2515623 ]
 [ 2.12503797  2.76607045  2.29244223]
 [ 1.23176472  0.73189854  0.90759093]
 [-0.          0.33032778  0.00596943]
 [ 2.2247019

np.float64(46.84968892406513)

In [144]:
x_train[0], y_train[0]

(array([0.3607577 , 0.53242367]), array([1., 0.]))

In [145]:
error.backward()

ValueError: operands could not be broadcast together with shapes (14,2) (14,2,2) 