In [1]:
import numpy as np

In [2]:
class Linear:
    def __init__(self, input_size, output_size):
        self.input_size = input_size
        self.output_size = output_size
    
    def init(self):
        self.W = np.random.uniform(-1, 1,(self.input_size,self.output_size))
    
    def forward(self, x):
        self.fw = x
        return np.dot(x, self.W)
    
    def backward(self, d, lr):
        d_w = np.dot(self.fw.T, d)
        d_e = np.dot(d, self.W.T)
        self.W -= lr * d_w / self.fw.shape[0]
        return d_e

In [3]:
class ReLU:
    def __init__(self):
        pass

    def init(self):
        pass

    def forward(self, x):
        self.fw = x
        return x * (x > 0)
    
    def backward(self, d, lr):
        return d * np.where(self.fw > 0, 1.0, 0.0)

In [4]:
class Sigmoid:
    def __init__(self):
        pass
    
    def init(self):
        pass
    
    def forward(self, x):
        self.fw = x
        self.out = 1.0 / (1.0 + np.exp(-x))
        return self.out
    
    def backward(self, d, lr):
        return d * (self.out * (1.0 - self.out))

In [5]:
class NeuralNetwork:
    def __init__(self, layers: list):
        self.layers = layers
        for layer in self.layers:
            layer.init()

    def forward_pass(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x
    
    def backward_pass(self, deriv, lr):
        for layer in reversed(self.layers):
            deriv = layer.backward(deriv, lr)
    

In [6]:
arch = [
    Linear(2, 8),
    ReLU(),
    Linear(8,1),
    Sigmoid()
]

In [7]:
nn = NeuralNetwork(arch)

In [8]:
print(nn.forward_pass([0,1]))

[0.58491881]


In [9]:
def binary_cross_entropy_loss(prediction, ground_truth):
    return -(ground_truth * np.log(prediction) + (1-ground_truth) * np.log(1-prediction))

In [10]:
def derivative_bcel(prediction, ground_truth):
    x =  np.where(ground_truth == 0, 1.0 / (1.0 - prediction), -1.0 / prediction)
    return x

In [44]:
num_epochs = 10_000
batch_size = 4
for i in range(num_epochs):
    batch = np.random.randint(0,2,size=(batch_size, 2))
    labels = np.array([[i[0] ^ i[1]]for i in batch]).astype(float)
    y_hat = nn.forward_pass(batch)
    d = derivative_bcel(y_hat, labels)
    nn.backward_pass(d, 0.01)
    loss = binary_cross_entropy_loss(y_hat, labels)
batch = np.array([[0,0],[0,1],[1,0],[1,1]])
print(nn.forward_pass(batch))

[[0.5       ]
 [0.99036961]
 [0.99047328]
 [0.01451786]]
