1. Implemente un perceptrón simple que aprenda la función lógica $AND$ y la función lógica $OR$, de $2$ y de $4$ entradas. Muestre la evolución del error durante el entrenamiento. Para el caso de $2$ dimensiones, grafique la recta discriminadora y todos los vectores de entrada de la red

![](img/perceptrón-simple1.png)

$AND$ de $2$ entradas:
| $x_1$ | $0$ | $0$ | $1$ | $1$ |
|-------|-----|-----|-----|-----|
| $x_2$ | $0$ | $1$ | $0$ | $1$ |
| $y$   | $0$ | $0$ | $0$ | $1$ |

In [12]:
import numpy as np
from matplotlib import pyplot as plt
np.random.seed(2002)

In [13]:
def AND(X):
    return all(X)

def OR(X):
    return any(X)

In [14]:
class PerceptronSimple:
    def __init__(self):
        self.W = np.random.randn(3)
    def train(self, X, Y, alpha, iter_):
        for _ in range(iter_):
            for n in range(len(X)):
                a = self.predict(X[n])
                if a != Y[n]:
                    self.W[0] += alpha * (Y[n] - a) * X[n][0]
                    self.W[1] += alpha * (Y[n] - a) * X[n][1]
                    self.W[2] += alpha * (Y[n] - a) * (-1)
    def predict(self, x):
        h = np.dot(np.append(x, -1), self.W)
        return 0 if h < 0 else 1

In [15]:
X_train = [[x1,x2] for x1 in [0,1] for x2 in[0,1]]
Y_train = [AND(x) for x in X_train]

perceptron = PerceptronSimple()
perceptron.train(X_train[1:] + [[1,1]], Y_train[1:] + [True], 0.01, 10000)

for x in X_train:
    print(x, perceptron.predict(x))

[0, 0] 0
[0, 1] 0
[1, 0] 0
[1, 1] 1


$AND$ de $4$ entradas:

| $x_1$ | $0$ | $0$ | $0$ | $0$ | $0$ | $0$ | $0$ | $0$ | $1$ | $1$ | $1$ | $1$ | $1$ | $1$ | $1$ | $1$ |
|-------|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|
| $x_2$ | $0$ | $0$ | $0$ | $0$ | $1$ | $1$ | $1$ | $1$ | $0$ | $0$ | $0$ | $0$ | $1$ | $1$ | $1$ | $1$ |
| $x_3$ | $0$ | $0$ | $1$ | $1$ | $0$ | $0$ | $0$ | $1$ | $0$ | $0$ | $1$ | $1$ | $0$ | $0$ | $1$ | $1$ |
| $x_4$ | $0$ | $1$ | $0$ | $1$ | $0$ | $1$ | $0$ | $1$ | $0$ | $1$ | $0$ | $1$ | $0$ | $1$ | $0$ | $1$ |
| $y$   | $0$ | $0$ | $0$ | $0$ | $0$ | $0$ | $0$ | $0$ | $0$ | $0$ | $0$ | $0$ | $0$ | $0$ | $0$ | $1$ |

In [16]:
class PerceptronSimple:
    def __init__(self):
        self.W = np.random.randn(5)
    def train(self, X, Y, alpha, iter_):
        for _ in range(iter_):
            for n in range(len(X)):
                a = self.predict(X[n])
                if a != Y[n]:
                    self.W[0] += alpha * (Y[n] - a) * X[n][0]
                    self.W[1] += alpha * (Y[n] - a) * X[n][1]
                    self.W[2] += alpha * (Y[n] - a) * X[n][2]
                    self.W[3] += alpha * (Y[n] - a) * X[n][3]
                    self.W[4] += alpha * (Y[n] - a) * (-1)
    def predict(self, x):
        h = np.dot(np.append(x, -1), self.W)
        return 0 if h < 0 else 1

In [17]:
X_train = [[x1,x2,x3,x4] for x1 in [0,1] for x2 in[0,1] for x3 in [0,1] for x4 in[0,1]]
Y_train = [AND(x) for x in X_train]

perceptron = PerceptronSimple()
perceptron.train(X_train[5:] + [[1,1,1,1] for _ in range(5)], Y_train[5:] + [True for _ in range(5)], 0.001, 10000)

for x in X_train:
    print(x, perceptron.predict(x))

[0, 0, 0, 0] 0
[0, 0, 0, 1] 0
[0, 0, 1, 0] 0
[0, 0, 1, 1] 0
[0, 1, 0, 0] 0
[0, 1, 0, 1] 0
[0, 1, 1, 0] 0
[0, 1, 1, 1] 0
[1, 0, 0, 0] 0
[1, 0, 0, 1] 0
[1, 0, 1, 0] 0
[1, 0, 1, 1] 0
[1, 1, 0, 0] 0
[1, 1, 0, 1] 0
[1, 1, 1, 0] 0
[1, 1, 1, 1] 1


2. Implemente un perceptrón multicapa que aprenda la función lógica $XOR$ de $2$ y de $4$ entradas (utilizando el algoritmo Backpropagation y actualizando en batch). Muestre cómo evoluciona el error durante el entrenamiento.

![](img/perceptrón-multicapa1.png)

$XOR$ de $2$ entradas:
| $x_1$ | $0$ | $0$ | $1$ | $1$ |
|-------|-----|-----|-----|-----|
| $x_2$ | $0$ | $1$ | $0$ | $1$ |
| $y$   | $0$ | $1$ | $1$ | $0$ |

In [45]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def d_sigmoid(z):
    return sigmoid(z) * (1 - sigmoid(z))

class PerceptronMulticapa:
    def __init__(self, sizes):
        self.L = len(sizes)
        self.sizes = sizes
        self.a = [[0 for _ in range(s)] for s in sizes]
        self.z = [[0 for _ in range(s)] for s in sizes[1:]]
        self.w = [np.random.randn(n,m) for n, m in zip(sizes[1:], sizes[:-1])]
        self.b = [np.random.randn(s) for s in sizes[1:]]
    def predict(self, x):
        self.a[0] = x
        for l in range(1, self.L):
            self.z[l-1] = self.w[l-1] @ self.a[l-1] + self.b[l-1]
            self.a[l] = sigmoid(self.z[l-1])
        # return 1 if self.a[-1] > 0.5 else 0
        return self.a[-1]
    # def train(self, X, Y, iters=1000):
    #     # una pasada
    #     # para cada x en X con su correspondiente y deseado en Y
    #     for x, y in zip(X, Y):
    #         # predecir x
    #         a_out = self.predict(x)
    #         # calcular el error
    #         grad_C_a = self.a[L] - y
    #         delta_l = grad_C_a * d_sigmoid(self.z[L-1])
    #         # calcular el gradiente de C con respecto a w y b
    #         grad_C_w = [np.zeros(n,m) for n, m in zip(sizes[1:], sizes[:-1])]
    #         for l in range(L,0,-1):
    #             delta_l = ((w[l]).dot(delta_l)) * d_sigmoid(z[l])
    #             d_C_w = a[l-1] @ delta_l
    #             grad_C_w[l] = d_C_w
    #         # mover los parámetros en la dirección contraria al gradiente
    #         self.w -= grad_C_w

    def train(self, X, Y, lr=0.01, iters=1000):
        for _ in range(iters):  # Iterar varias veces
            for x, y in zip(X, Y):
                # predigo x para actualizar la matriz de activaciones
                self.predict(x)
                
                # calculo el error
                grad_C_a = self.a[-1] - y
                delta_l = grad_C_a * d_sigmoid(self.z[-1])
                
                # inicializo los gradientes
                grad_C_w = [np.zeros_like(w) for w in self.w]
                grad_C_b = [np.zeros_like(b) for b in self.b]
                
                # backprop
                for l in range(self.L-2, -1, -1):
                    grad_C_w[l] = np.outer(delta_l, self.a[l])
                    grad_C_b[l] = delta_l
                    if l > 0:
                        delta_l = (self.w[l].T @ delta_l) * d_sigmoid(self.z[l-1])
                
                # muevo los parámetros en la dirección contraria al gradiente en módulo learning rate
                self.w = [w - lr * grad_w for w, grad_w in zip(self.w, grad_C_w)]
                self.b = [b - lr * grad_b for b, grad_b in zip(self.b, grad_C_b)]





In [22]:
X_train = [[x1, x2] for x1 in [0, 1] for x2 in [0, 1]]
Y_train = [x1 ^ x2 for x1, x2 in X_train]

perceptron = PerceptronMulticapa([2,3,1])
perceptron.train(X_train, Y_train, lr=0.1, iters=10000)
for x in X_train:
    print(x, perceptron.predict(x))

[0, 0] 0
[0, 1] 1
[1, 0] 1
[1, 1] 0


$XOR$ de $4$ entradas:
| $x_1$ | $0$ | $0$ | $0$ | $0$ | $0$ | $0$ | $0$ | $0$ | $1$ | $1$ | $1$ | $1$ | $1$ | $1$ | $1$ | $1$ |
|-------|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|-----|
| $x_2$ | $0$ | $0$ | $0$ | $0$ | $1$ | $1$ | $1$ | $1$ | $0$ | $0$ | $0$ | $0$ | $1$ | $1$ | $1$ | $1$ |
| $x_3$ | $0$ | $0$ | $1$ | $1$ | $0$ | $0$ | $1$ | $1$ | $0$ | $0$ | $1$ | $1$ | $0$ | $0$ | $1$ | $1$ |
| $x_4$ | $0$ | $1$ | $0$ | $1$ | $0$ | $1$ | $0$ | $1$ | $0$ | $1$ | $0$ | $1$ | $0$ | $1$ | $0$ | $1$ |
| $y$   | $0$ | $1$ | $1$ | $0$ | $1$ | $0$ | $0$ | $1$ | $1$ | $0$ | $0$ | $1$ | $0$ | $1$ | $1$ | $0$ |

In [20]:
X_train = [[x1, x2, x3, x4] for x1 in [0, 1] for x2 in [0, 1] for x3 in [0, 1] for x4 in [0, 1]]
Y_train = [x1 ^ x2 ^ x3 ^ x4 for x1, x2, x3, x4 in X_train]

perceptron = PerceptronMulticapa([4,8,1])
perceptron.train(X_train,Y_train,lr=0.01,iters=100000)
for x in X_train:
    print(x, perceptron.predict(x))

[0, 0, 0, 0] 0
[0, 0, 0, 1] 1
[0, 0, 1, 0] 1
[0, 0, 1, 1] 0
[0, 1, 0, 0] 1
[0, 1, 0, 1] 0
[0, 1, 1, 0] 0
[0, 1, 1, 1] 1
[1, 0, 0, 0] 1
[1, 0, 0, 1] 0
[1, 0, 1, 0] 0
[1, 0, 1, 1] 1
[1, 1, 0, 0] 0
[1, 1, 0, 1] 1
[1, 1, 1, 0] 1
[1, 1, 1, 1] 0


4.
    a) Implemente una red con aprendizaje Backpropagation que aprenda la siguiente función:
    $$
    f(x, y, z) = \sin(x) + \cos(y) + z
    $$
    donde $x, y \in [0, 2\pi]$ y $z \in [-1, 1]$.  
    Para ello construya un conjunto de datos de entrenamiento y un conjunto de evaluación. Muestre la evolución del error de entrenamiento y de evaluación en función de las épocas de entrenamiento.

    b) Estudie la evolución de los errores durante el entrenamiento de una red con una capa oculta de $30$ neuronas cuando el conjunto de entrenamiento contiene $40$ muestras.  
    ¿Qué ocurre si el minibatch tiene tamaño 40? ¿Y si tiene tamaño 1?

In [28]:
def f(x, y, z):
    return np.sin(x) + np.cos(y) + z

X_train = [[x, y, z] for x, y, z in zip(np.linspace(0, 2*np.pi, 100), np.linspace(0, 2*np.pi, 100), np.linspace(-1, 1, 100))]
Y_train = [f(x, y, z) for x, y, z in X_train]

In [None]:
# voy a tener que cambiar la función costo usando el mse
# la arquitectura no debe estar tan mal
perceptron = PerceptronMulticapa([3,10,10,10,1])
perceptron.train(X_train,Y_train,lr=0.01,iters=1000)

for x, y, z in X_train:
    print(str(round(x, 2))+"\t"+
          str(round(y, 2))+"\t"+
          str(round(z, 2))+"\t"+
          str(round(perceptron.predict([x, y, z])[0],2))+"\t"+
          str(round(f(x, y, z),2)))

0.0	0.0	-1.0	0.51	0.0
0.06	0.06	-0.98	0.51	0.08
0.13	0.13	-0.96	0.51	0.16
0.19	0.19	-0.94	0.52	0.23
0.25	0.25	-0.92	0.52	0.3
0.32	0.32	-0.9	0.52	0.36
0.38	0.38	-0.88	0.52	0.42
0.44	0.44	-0.86	0.52	0.47
0.51	0.51	-0.84	0.52	0.52
0.57	0.57	-0.82	0.52	0.56
0.63	0.63	-0.8	0.51	0.6
0.7	0.7	-0.78	0.51	0.63
0.76	0.76	-0.76	0.51	0.66
0.83	0.83	-0.74	0.5	0.68
0.89	0.89	-0.72	0.5	0.69
0.95	0.95	-0.7	0.49	0.7
1.02	1.02	-0.68	0.48	0.7
1.08	1.08	-0.66	0.48	0.7
1.14	1.14	-0.64	0.46	0.69
1.21	1.21	-0.62	0.45	0.67
1.27	1.27	-0.6	0.44	0.66
1.33	1.33	-0.58	0.42	0.63
1.4	1.4	-0.56	0.4	0.6
1.46	1.46	-0.54	0.38	0.57
1.52	1.52	-0.52	0.35	0.53
1.59	1.59	-0.49	0.32	0.49
1.65	1.65	-0.47	0.29	0.44
1.71	1.71	-0.45	0.26	0.39
1.78	1.78	-0.43	0.23	0.34
1.84	1.84	-0.41	0.2	0.28
1.9	1.9	-0.39	0.17	0.22
1.97	1.97	-0.37	0.14	0.16
2.03	2.03	-0.35	0.12	0.1
2.09	2.09	-0.33	0.1	0.03
2.16	2.16	-0.31	0.08	-0.03
2.22	2.22	-0.29	0.06	-0.1
2.28	2.28	-0.27	0.05	-0.17
2.35	2.35	-0.25	0.05	-0.24
2.41	2.41	-0.23	0.04	-0.31
2.48	2.4

In [None]:
Y = [perceptron.predict(x) for x in X_train]

plt.plot(X_train,Y_train)
# plt.plot(X_train,Y_train)

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 100 is different from 3)