In [8]:
import numpy as np

x = np.array([0, 1]) # inputs
w1 = np.array([[1, 2], [3, 4]]).T
b1 = np.array([0, 0])

np.dot(w1, x)+b1
w1

array([[1, 3],
       [2, 4]])

In [114]:
def sigmoid(x): return (1 + np.exp(-x))**-1

def random_weights(shape, min=-1, max=1): return (max - min) * np.random.random(shape) + min

def xor_net(inputs: np.ndarray, weights: np.ndarray):
    """Simulates a Neural Network for {2, 2, 1} architecture.

    Args:
        inputs (np.ndarray): 1x2 array representing the input nodes
        weights (np.ndarray): 1x9 array representing the weights and biases of the network

    Returns:
        int: output of the network. Range of values: (0, 1)
    """
    inputs = np.append(inputs, 1) # add bias `value`
    hidden = np.dot(weights[:6].reshape((2,3)), inputs)
    layer1 = sigmoid(hidden)

    layer1 = np.append(layer1, 1) # add bias `value`
    final = np.dot(weights[6:], layer1)

    return sigmoid(final)

def mse(weights, inputs, outputs, net=xor_net):
    return sum([(net(inpt, weights)-output)**2 for inpt, output in zip(inputs, outputs)])

def grdmse(weights, inputs, outputs, net=xor_net):
    par_derivs = np.zeros((9))
    for inpt, output in zip(inputs, outputs):
        inpt = np.append(inpt, 1)
        hidden = np.dot(weights[:6].reshape((2,3)), inpt)
        layer1 = np.append(sigmoid(hidden), 1)

        final = sigmoid(np.dot(weights[6:], layer1))
        t = 2*(final - output) * final * (1 - final)

        par_derivs[6:]  += t * layer1
        par_derivs[3:6] += t * weights[7] * layer1[1] * (1 - layer1[1]) * inpt
        par_derivs[:3]  += t * weights[6] * layer1[0] * (1 - layer1[0]) * inpt
    return par_derivs

def print_test(weights, inputs, outputs):
    for inpt, output in zip(inputs, outputs):
        print(f"{inpt[0]} | {inpt[1]} = {xor_net(inpt, weights):.3f} [{output}]")

def grad_desc(eta=0.01, n_loops: int = 1000):
    inputs = np.array([[0, 0],
                        [0, 1],
                        [1, 0],
                        [1, 1]])

    outputs = np.array([0, 1, 1, 0])
    weights = random_weights((9))

    for i in range(n_loops):
        weights += -eta*grdmse(weights, inputs, outputs)
        # if i%10 == 0: print(i, mse(weights, inputs, outputs))
    print_test(weights, inputs, outputs)
    print(weights.reshape((3,3)))

grad_desc(n_loops=50000)

0 | 0 = 0.049 [0]
0 | 1 = 0.941 [1]
1 | 0 = 0.940 [1]
1 | 1 = 0.060 [0]
[[-5.68576192 -5.80853074  2.07213717]
 [ 3.79787207  3.81827614 -5.91660222]
 [-7.57906046 -7.7311309   3.78604252]]
