In [420]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import numpy as np
import numpy.random
import math
import sys

In [421]:
def make_classification(r0=1,r1=3,k=1000):
    """
    Creaci ́on de los datos
    """
    X1 = [np.array([r0*np.cos(t),r0*np.sin(t)]) for t in range(0,k)]
    X2 = [np.array([r1*np.cos(t),r1*np.sin(t)]) for t in range(0,k)]
    X = np.concatenate((X1,X2))
    n,d = X.shape
    Y = np.zeros((2*k,2))
    Y[0:k] += [1,0]
    Y[k:] += [0,1]
    noise = np.array([np.random.normal(0,1,2) for i in range(n)])
    X += 0.5*noise
    return X,Y

In [422]:
X, Y = make_classification(k=10)
x_train, x_eval, y_train, y_eval = train_test_split(X, Y, test_size=0.3)


In [423]:
class Node():
    """Nodo super clase con funciones generales"""
    def __init__(self, value):
    # Agrega los par ́ametros necesarios
        self.value = value
        self.grad = None
        return
        
    def __call__(self, *kwargs):
        return self.forward(*kwargs)

    def __str__(self):
        return str(self.value) #Valor n ́um del nodo
        
    def backward(self, consumer_grad=1):
        self.grad = consumer_grad
# Agrega otros m ́etodos necesarios

In [424]:
##Clases basicas

class PreActivation(Node):
    # Pre-activación wx+b
    def __init__(self, input_size, output_size):
        # Generamos una matriz aleatoria de tamaño input x output 
        # Y la trasponemos para usarla como matriz
        self.w = np.random.uniform(0,1,(input_size, output_size)).T
        
        # Generamos el vector de sesgo del tamaño de salida
        self.b = np.random.uniform(0,1, output_size).T # Bias
        print(self.w)
        print(self.b)
        # Guardamos estos parámetros
        self.input_size = input_size
        self.output_size = output_size

        return None

    def forward(self, x):
        # Función que activa el nodo
        self.parent = x
        self.value = np.dot(self.w, x.value) + self.b # Expresión lineal
        return self

    def backward(self, consumer_grad=1):
        # La función backward recibe el gradiente de los nodos hijos y regresa el gradiente
        self.grad = self.parent.value
        self.parent.backward(self.grad)
        return self  

    def update(self, consumer_grad):
        
        return

In [425]:
class Tanh(Node):
    # Activación ReLU

    def __init__(self,node):
        self.preactivation = node
        return None

    def function(self, x):
        return np.tanh(x)

    def forward(self, x):
        self.parent = x
        self.value = self.function(self.preactivation(self.parent).value)
        return self

    def backward(self, consumer_grad):
        m = self.preactivation.output_size
        
        dh_da = np.zeros(m)
        for i in range(m):
            dh_da[i] = 1 - self.value[i]**2

        d_k = dh_da * consumer_grad
        self.grad = d_k
        print(f"Tanh d_k = {d_k}")
        # Calculamos la suma d_k+1 * W_q,i para pasarla al siguiente nodo
        n = self.preactivation.input_size
        d_kW = np.zeros(m)
        for i in range(n):
            for q in range(m):
                d_kW[q] += d_k[q] * self.preactivation.w[q][i]

        self.parent.backward(d_kW)

        return self
        

In [426]:
class ReLU(Node):
    # Activación ReLU

    def __init__(self,node):
        self.preactivation = node
        return None

    def function(self, x):
        return x * (x > 0)

    def forward(self, x):
        self.parent = x
        self.value = self.function(self.preactivation(self.parent).value)
        return self

    def backward(self, consumer_grad):
        m = self.preactivation.output_size

        dh_da = np.zeros(m)
        for i in range(m):
            # Calculamos la derivada de ReLu respecto a su preactivación a
            if self.preactivation.value[i] >= 0:
                dh_da[i] = 1
            else:
                dh_da[i] = 0
        
        d_k = dh_da * consumer_grad
        self.grad = d_k
        print(f"ReLu d_k = {d_k}")
        # Calculamos la suma d_k+1 * W_q,i para pasarla al siguiente nodo

        n = self.preactivation.input_size
        d_kW = np.zeros(n)
        for i in range(n):
            for q in range(m):
                d_kW[q] += d_k[q] * self.preactivation.w[q][i]
            
        self.parent.backward(d_kW)
        return self
        

In [427]:
class Softmax(Node):
    # Activación ReLU

    def __init__(self,node):
        self.preactivation = node
        return None

    def function(self, x):
        S = sum( [np.exp(x_i) for x_i in x] )
        return np.exp(x) / S

    def forward(self, x):
        self.parent = x
        self.value = self.function(self.preactivation(self.parent).value)
        return self

    def backward(self, consumer_grad):
        m = self.preactivation.output_size

        df_da = np.zeros(m)
        for i in range(m):
            x = self.value[i]
            df_da[i] =  x*(1-x)

        d_k = consumer_grad * df_da
        print(f"softmax d_k = {d_k}")
        self.grad = d_k

        # Calculamos la suma d_k+1 * W_q,i para pasarla al siguiente nodo
        n = self.preactivation.input_size
        d_kW = np.zeros(n)
        for i in range(n):
            for q in range(m):
                d_kW[q] += d_k[q] * self.preactivation.w[q][i]

        self.parent.backward(d_kW)

        return self

In [428]:
class CrossEntropy(Node):
    # Error de clasificación binario
    def __init__(self, output_node):
        self.parent = output_node
        return None

    def forward(self, Y_real):
        # Definido por casos para evitar infinitos innecesarios
        epsilon = sys.float_info.epsilon
        self.real_output = Y_real
    
        add = 0
        for y_pred, y_real in zip(self.parent.value, Y_real):
            add -= y_real * np.log(y_pred + epsilon)
        
        self.value = add
        
        return self

    def backward(self, consumer_grad = 1):
        dL_df = - self.real_output / self.parent.value
        self.grad = dL_df * consumer_grad
        print(f"dL_df = {self.grad}")
        self.parent.backward(self.grad)
        return

In [429]:
# Arquitectura de la Red
tanh_layer = Tanh( PreActivation(2, 3) )
relu_layer = ReLU( PreActivation(3,2) )
softmax_layer = Softmax( PreActivation(2,2) )

error = CrossEntropy(softmax_layer)


[[0.45534135 0.27524145]
 [0.09271912 0.16742675]
 [0.16438683 0.36372621]]
[0.0639517  0.96479423 0.46107253]
[[0.42921124 0.58489117 0.50757214]
 [0.20038538 0.63435382 0.80153569]]
[0.60209484 0.0099548 ]
[[0.25553991 0.79513999]
 [0.1873873  0.62575499]]
[0.37176567 0.69626575]


In [430]:
x_train[0], y_train[0]

(array([-0.68599333, -1.56861369]), array([1., 0.]))

In [431]:
initial_node = Node(x_train[1])

l_0 = tanh_layer(initial_node)
print(l_0)

l_1 = relu_layer(l_0)
print(l_1)

l_2 = softmax_layer(l_1)
print(l_2)


error(y_train[0]).value

[0.88630298 0.84670913 0.74464401]
[1.85569929 1.32152888]
[0.5064541 0.4935459]


np.float64(0.6803215817064229)

In [432]:
error.backward()

dL_df = [-1.9745126 -0.       ]
softmax d_k = [-0.4935459 -0.       ]
ReLu d_k = [-0.51855876  0.        ]
Tanh d_k = [-0.16923114  0.          0.        ]


In [433]:
np.zeros((5,2))

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]])