In [472]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import numpy as np
import numpy.random
import math
import sys

In [473]:
np.random.seed(12)

In [580]:
def make_classification(r0=1,r1=3,k=1000):
    """
    Creaci ́on de los datos
    """
    X1 = [np.array([r0*np.cos(t),r0*np.sin(t)]) for t in range(0,k)]
    X2 = [np.array([r1*np.cos(t),r1*np.sin(t)]) for t in range(0,k)]
    X = np.concatenate((X1,X2))
    n,d = X.shape
    Y = np.zeros((2*k,2))
    Y[0:k] += [1,0]
    Y[k:] += [0,1]
    noise = np.array([np.random.normal(0,1,2) for i in range(n)])
    X += 0.5*noise
    return X,Y

In [581]:
X, Y = make_classification(k=10)
x_train, x_eval, y_train, y_eval = train_test_split(X, Y, test_size=0.3)
x_train,y_train


(array([[-0.76426676,  0.16091609],
        [-0.61529766, -0.74541055],
        [-0.3287628 , -0.70257991],
        [ 1.01693482, -0.17293928],
        [-1.40265409,  2.40347388],
        [-3.0017048 ,  1.1205725 ],
        [-0.38794572,  1.7246724 ],
        [ 0.409174  ,  2.50235795],
        [ 0.07288647, -0.51099316],
        [ 0.87497902,  0.48022502],
        [ 3.01475757, -0.54734915],
        [ 2.7509809 , -0.27612794],
        [ 2.65890006,  2.04163995],
        [-2.51613602,  1.46012579]]),
 array([[1., 0.],
        [1., 0.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [1., 0.],
        [0., 1.],
        [1., 0.],
        [1., 0.],
        [0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.]]))

In [527]:
class Node():
    """Nodo super clase con funciones generales"""
    def __init__(self, values):
    # Agrega los par ́ametros necesarios
        self.values = values
        self.grads = None
        return
        
    def __call__(self, *kwargs):
        return self.forward(*kwargs)

    def __str__(self):
        return str(self.values) #Valor n ́um del nodo
        
    def backward(self, consumer_grad=1):
        self.grads = consumer_grad
# Agrega otros m ́etodos necesarios

In [688]:
##Clases basicas

class PreActivation(Node):
    # Pre-activación wx+b
    def __init__(self, input_size, output_size, parent = None):
        # Generamos una matriz aleatoria de tamaño input x output 
        # Y la trasponemos para usarla como matriz
        self.w = np.random.uniform(0,1,(input_size, output_size)).T
        
        # Generamos el vector de sesgo del tamaño de salida
        self.b = np.random.uniform(0,1, output_size).T # Bias
        print(self.w)
        print(self.b)
        # Guardamos estos parámetros
        self.input_size = input_size
        self.output_size = output_size
        self.parent = parent

        return None

    def forward(self):
        # Función que activa el nodo

        # A cada valor x le aplicamos Wx + b
        # Tenemos una matriz donde cada renglón es un vector de preactivación a
        self.values = []
        for value in self.parent.values:
            # print(np.dot(self.w, value) + self.b)
            self.values.append(np.dot(self.w, value) + self.b) # Expresión lineal
        
        self.values = np.array(self.values)

        return self

    def backward(self, consumer_grad=1):
        # La función backward recibe el gradiente de los nodos hijos y regresa el gradiente
        self.grad_w = self.value * consumer_grad
        self.grad_b = consumer_grad

        self.update()
        return self  

    def update(self):
        
        return

In [666]:
class Tanh(Node):
    # Activación ReLU

    def __init__(self, preactivation_node):
        # Usualmente un nodo de preactivación
        self.parent = preactivation_node
        return None

    def function(self, x):
        return np.tanh(x)

    def forward(self):
        self.values =  self.function(self.parent.values)

        return self

    def backward(self, consumer_grad):
        m = self.preactivation.output_size
        
        dh_da = np.zeros(m)
        for i in range(m):
            dh_da[i] = 1 - self.value[i]**2

        d_k = dh_da * consumer_grad
        self.grad = d_k
        print(f"Tanh d_k = {d_k}")
        # Calculamos la suma d_k+1 * W_q,i para pasarla al siguiente nodo
        n = self.preactivation.input_size
        d_kW = np.zeros(m)
        for i in range(n):
            for q in range(m):
                d_kW[q] += d_k[q] * self.preactivation.w[q][i]

        self.parent.backward(d_kW)

        return self
        

In [692]:
class ReLU(Node):
    # Activación ReLU

    def __init__(self, preactivation_node):
        # Nodo de preactivación
        self.parent = preactivation_node
        return None

    def function(self, x):
            return x * (x >= 0)

    def forward(self):
        self.values = self.function(self.parent.values)
        
        return self

    def backward(self, consumer_grad):
        m = self.preactivation.output_size

        dh_da = np.zeros(m)
        for i in range(m):
            # Calculamos la derivada de ReLu respecto a su preactivación a
            if self.preactivation.value[i] >= 0:
                dh_da[i] = 1
            else:
                dh_da[i] = 0
        
        d_k = dh_da * consumer_grad
        self.grad = d_k
        print(f"ReLu d_k = {d_k}")
        # Calculamos la suma d_k+1 * W_q,i para pasarla al siguiente nodo

        n = self.preactivation.input_size
        d_kW = np.zeros(n)
        for i in range(n):
            for q in range(m):
                d_kW[q] += d_k[q] * self.preactivation.w[q][i]
            
        self.parent.backward(d_kW)
        return self
        

In [668]:
class Softmax(Node):
    # Activación ReLU

    def __init__(self, preactivation_node):
        # Nodo de preactivación
        self.parent = preactivation_node
        return None

    def function(self, x):
        S = sum( [np.exp(x_i) for x_i in x] )
        return np.exp(x) / S

    def forward(self):
        # self.parent = x
        #Aplicamos softmax a cada renglón de la matriz de preactivación values
        self.values = self.function(self.parent.values)
        return self

    def backward(self, consumer_grad):
        m = self.preactivation.output_size

        df_da = np.zeros(m)
        for i in range(m):
            x = self.value[i]
            df_da[i] =  x*(1-x)

        d_k = consumer_grad * df_da
        print(f"softmax d_k = {d_k}")
        self.grad = d_k

        # Calculamos la suma d_k+1 * W_q,i para pasarla al siguiente nodo
        n = self.preactivation.input_size
        d_kW = np.zeros(n)
        for i in range(n):
            for q in range(m):
                d_kW[q] += d_k[q] * self.preactivation.w[q][i]

        self.parent.backward(d_kW)

        return self

In [669]:
class CrossEntropy(Node):
    # Error de clasificación binario
    def __init__(self, output_node, classes = [0,1]):
        self.parent = output_node
        self.classes = classes
        return None

    def forward(self, Y_real):
        # Definido por casos para evitar infinitos innecesarios
        epsilon = sys.float_info.epsilon

        self.real_output = Y_real
        add = 0

        # E = 0*log(p_0) + 1*log(p_1)
        for y_pred, y_real in zip(self.parent.values, Y_real):
            for c in self.classes:
                add -= y_real[c]*np.log(y_pred[c] + epsilon)
        
        self.value = add
        
        return self

    def backward(self, consumer_grad = 1):
        dL_df = - self.real_output / self.parent.value
        self.grad = dL_df * consumer_grad
        print(f"dL_df = {self.grad}")
        self.parent.backward(self.grad)
        return

In [693]:
# Arquitectura de la Red
initial_node = Node(x_train)

pre_tanh = PreActivation(2, 4, initial_node)
tanh_layer = Tanh( pre_tanh )
pre_relu = PreActivation(4, 3, tanh_layer)
relu_layer = ReLU(pre_relu)
pre_soft = PreActivation(3, 2, relu_layer)
softmax_layer = Softmax(pre_soft)
error = CrossEntropy(softmax_layer)


[[0.39896034 0.13918484]
 [0.21362927 0.04570622]
 [0.14313454 0.49488973]
 [0.4766943  0.64167011]]
[0.40498166 0.19440978 0.15859723 0.31379152]
[[0.86654101 0.76444474 0.48707278 0.13560891]
 [0.23352597 0.28595968 0.71714878 0.65733534]
 [0.40333586 0.29580364 0.4945167  0.41556978]]
[0.83297728 0.10599079 0.16138678]
[[0.93759214 0.36640151 0.35875738]
 [0.69292549 0.56456855 0.88460132]]
[0.42355058 0.89331428]


In [694]:
p_0 = pre_tanh()
l_0 = tanh_layer()
print(l_0)

p_1 = pre_relu()
l_1 = relu_layer()
print(l_1)

# p_2 = pre_soft()
# l_2= softmax_layer()
# print(l_2)





error(y_train).value

[[ 0.12185801  0.03847589  0.12813177  0.05267616]
 [ 0.05569475  0.02888624 -0.28981946 -0.4283099 ]
 [ 0.17423394  0.09180492 -0.23186513 -0.28558482]
 [ 0.65649444  0.38315509  0.21515454  0.59643063]
 [ 0.1779893   0.00461548  0.81685216  0.82976817]
 [-0.56258883 -0.37619943  0.27614999 -0.37829308]
 [ 0.45441876  0.18809491  0.74275262  0.84417612]
 [ 0.72424512  0.37668826  0.89678592  0.97128614]
 [ 0.34779925  0.18448801 -0.08365947  0.02064412]
 [ 0.6755614   0.38275211  0.47885333  0.77750674]
 [ 0.91069248  0.67147971  0.30881568  0.88528499]
 [ 0.89844233  0.64662643  0.39330575  0.8952938 ]
 [ 0.94136901  0.69405853  0.91371358  0.99385794]
 [-0.37620315 -0.26954555  0.47851193  0.051238  ]]
[[ 1.03753778  0.27196579  0.30717171]
 [ 0.7040752  -0.36212964 -0.12891809]
 [ 0.90247488 -0.18107535  0.0254762 ]
 [ 1.88043476  0.91521894  0.89376926]
 [ 1.50113099  1.28011622  0.98331513]
 [ 0.14109255 -0.18359096 -0.19745302]
 [ 1.84679031  1.35346785  1.11842697]
 [ 2.3170377

AttributeError: 'Softmax' object has no attribute 'values'

In [675]:
x_train[0], y_train[0]

(array([-0.76426676,  0.16091609]), array([1., 0.]))

In [560]:
error.backward()

AttributeError: 'Tanh' object has no attribute 'value'

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]])