In [36]:
import numpy as np
# Find a small float to avoid division by zero
epsilon = np.finfo(float).eps
# Sigmoid function and its differentiation
def sigmoid(z):
    return 1/(1+np.exp(-z.clip(-500, 500)))
def dsigmoid(z):
    s = sigmoid(z)
    return 2 * s * (1-s)
# ReLU function and its differentiation
def relu(z):
    return np.maximum(0, z)
def drelu(z):
    return (z > 0).astype(float)




In [37]:
#  Loss function L(y, yhat) and its differentiation
def cross_entropy(y, yhat):

    ''' Binary cross entropy function
    L = - y log yhat - (1-y) log (1-yhat)
    Args:
    y, yhat (np.array): 1xn matrices which n are the number of data instances
    Returns:
    average cross entropy value of shape 1x1, averaging over the n instances
    '''
    return (-(y.T @ np.log(yhat.clip(epsilon)) +
          (1-y.T) @ np.log((1-yhat).clip(epsilon))
          ) / y.shape[1])


def d_cross_entropy(y, yhat):
    '''dL/dyhat '''
    return (- np.divide(y, yhat.clip(epsilon))
        + np.divide(1-y, (1-yhat).clip(epsilon)))


In [38]:
class mlp:
    def __init__(self, layersizes, activations, derivatives, lossderiv):
        self.layersizes = layersizes
        self.activations = activations
        self.derivatives = derivatives
        self.lossderiv = lossderiv
        L = len(self.layersizes)
        self.z = [None]*L
        self.w = [None]*L
        self.b = [None]*L
        self.a = [None]*L
        self.dz = [None]*L
        self.dw = [None]*L
        self.db = [None]*L
        self.da = [None]*L

    def initialize(self, seed=42):
        np.random.seed(seed)
        sigma = 0.1
        for l, (n_in, n_out) in enumerate(zip(self.layersizes, self.layersizes[1:]), 1):
            self.w[l] = np.random.randn(n_in, n_out) * sigma
            self.b[l] = np.random.randn(1, n_out) * sigma

    def forward(self, x):

        self.a[0] = x
        for l, func in enumerate(self.activations, 1):

            self.z[l] = (self.a[l-1] @ self.w[l]) + self.b[l]

            self.a[l] = func(self.z[l])
        return self.a[-1]

    def backward(self, y, yhat):
        self.da[-1] = self.lossderiv(y, yhat)
        for l, func in reversed(list(enumerate(self.derivatives, 1))):

            self.dz[l] = self.da[l]*func(self.z[l])
            self.dw[l] = self.a[l-1].T@self.dz[l]
            self.db[l] = np.mean(self.dz[l], axis=0, keepdims=True)
            self.da[l-1] = self.dz[l] @ self.w[l].T

    def update(self, eta):
        for l in range(1, len(self.w)):
            self.w[l] -= eta*self.dw[l]
            self.b[l] -= eta * self.db[l]


In [39]:
from sklearn.datasets import make_circles
from sklearn.metrics import accuracy_score
# Make data: Two circles on x-y plane as a classification problem
X, y = make_circles(n_samples=1000, factor=0.5, noise=0.1)
y = y.reshape(-1,1) # our model expects a 2D array of (n_sample, n_dim)

In [40]:
model = mlp(layersizes=[2, 4, 3, 1],
activations=[relu, relu, sigmoid],
derivatives=[drelu, drelu, dsigmoid],
lossderiv=d_cross_entropy)
model.initialize()
yhat = model.forward(X)
loss = cross_entropy(y, yhat)
score = accuracy_score(y, (yhat > 0.5))
print(f"Before training - loss value {loss} accuracy {score}")


Before training - loss value [[693.63164393]] accuracy 0.5


In [42]:
n_epochs = 50
learning_rate = 0.005
for n in range(n_epochs):
    model.forward(X)
    yhat = model.a[-1]
    model.backward(y, yhat)
    model.update(learning_rate)
    loss = cross_entropy(y, yhat)
    score = accuracy_score(y, (yhat > 0.5))
    print(f"Iteration {n} - loss value {loss} accuracy {score}")

Iteration 0 - loss value [[693.49898455]] accuracy 0.5
Iteration 1 - loss value [[693.49715475]] accuracy 0.5
Iteration 2 - loss value [[693.49532309]] accuracy 0.5
Iteration 3 - loss value [[693.49348719]] accuracy 0.5
Iteration 4 - loss value [[693.49164582]] accuracy 0.5
Iteration 5 - loss value [[693.48979572]] accuracy 0.5
Iteration 6 - loss value [[693.48793506]] accuracy 0.5
Iteration 7 - loss value [[693.4860605]] accuracy 0.5
Iteration 8 - loss value [[693.48416805]] accuracy 0.5
Iteration 9 - loss value [[693.48225403]] accuracy 0.5
Iteration 10 - loss value [[693.48031224]] accuracy 0.5
Iteration 11 - loss value [[693.47833433]] accuracy 0.5
Iteration 12 - loss value [[693.47631829]] accuracy 0.5
Iteration 13 - loss value [[693.47425103]] accuracy 0.5
Iteration 14 - loss value [[693.472122]] accuracy 0.5
Iteration 15 - loss value [[693.46991919]] accuracy 0.5
Iteration 16 - loss value [[693.46763382]] accuracy 0.5
Iteration 17 - loss value [[693.4652324]] accuracy 0.5
Iterat