In [67]:
import numpy as np
from tensorflow import keras

In [68]:
def CE(y, v, epsilon=1e-15):
    y = np.clip(y, epsilon, 1 - epsilon)
    return -v*np.log(y)-(1-v)*np.log(1-y)


def sRel(x):
    return np.log(1+np.exp(np.clip(x, -709, 709)))

def sig(z:float):
    return np.clip(1/(1 + np.exp(-np.clip(z, -709, 709))), 1e-15, 1-1e-15)

def sRelDer(x):
    return np.exp(np.clip(x, -709, 709))/(1+np.exp(np.clip(x, -709, 709)))

def CEDer(y, v):
    epsilon = 1e-15
    y = np.clip(y, epsilon, 1 - epsilon)
    return (-v/y + (1-v)/(1-y))

def sigDer(x):
    return sig(x)*(1-sig(x))



In [69]:
class NN:
    def __init__(self,  loss_func,loss_der, backward_func, forward_funcs, layers=2, neurons_per_layer=2, initial_x=2):
        self.W = []
        self.layers = layers
        self.W.append(np.random.randn(neurons_per_layer,initial_x+1))
        #determin the hidden layers
        for layer in range(layers-2):
            self.W.append(np.random.randn(neurons_per_layer, neurons_per_layer+1))
        # Hypothetically say we only have one output from this NN
        self.W.append(np.random.randn(1, neurons_per_layer+1))
        self.loss_func = loss_func
        self.forward_funcs = forward_funcs
        self.loss_der = loss_der
        self.backward_func = backward_func


    # -------------------------- forward prop
    def forward_prop(self,X, v, W):
        y = []
        z = []

        y0 = X

        y0 = np.insert(y0, 0, 1)

        y.append(y0)

        # print(y0)


        for i in range(len(W)):
            # print(W[i].shape, y[-1].shape)
            zi = W[i] @ y[-1]

            z.append(zi)

            # print(f"z{i+1} ---> {zi}")

            yoo = self.forward_funcs[i](zi)
            if(i < len(W) - 1):
                yoo = np.insert(yoo, 0, 1)

            y.append(yoo)

            # print(f"y{i+1} ---> {yoo}")


        Loss = self.loss_func(y[-1], v)
        # print("Loss --->",Loss)

        return (y,z,Loss)


    # --------------------------------------- Backward Propagation
    def backward_prob(self, W, y, z, v):
        yp = []
        zp = []
        dW = []

        y2p = self.loss_der(y[len(W)], v)
        z2p = self.backward_func[self.layers-1](z[len(W)-1]) * y2p

        yp.append(y2p)
        zp.append(z2p)
        

        # -------------------------
        # print(f"y2p ---> {y2p} ------ z2p ---> {z2p}")
        for i in range(len(W)-1):
            yop = W[len(W)-1-i].T @ zp[-1] 
            yop = yop[1:]
            zop = self.backward_func[self.layers - 1 - i](z[len(W)-i-2]) * yop

            yp.append(yop)
            zp.append(zop)

            # print(f"y1p --> {yop} ---- z1p ---> {zop}")


        yp.reverse()
        zp.reverse()
        for i in range(len(W)):
            zpi = zp[i].reshape(-1,1)
            y[i] = y[i].reshape(-1,1)

        
            doW = zpi @ y[i].T 

            dW.append(doW)

        return dW
        

    def train(self, dataset, lr=0.01):
        delta = 10000000000000000000
        Loss = 0
        Old_Loss = 1000000
        total_loss = 0
        while delta > 0.0000001 : 
            
            Old_Loss = total_loss
            total_loss = 0
            dWs = []
            for data in dataset :
                y,z,Loss = self.forward_prop(data[0], data[1], self.W)
                total_loss += Loss
                dW = self.backward_prob(self.W, y, z, data[1])
                
                dWs.append(dW)
            
            avg_dW = []
            for layer in range(len(self.W)):
                # Get all gradients for this layer
                layer_gradients = [dWs[i][layer] for i in range(len(dWs))]
                # Calculate mean gradient for this layer
                layer_mean = np.mean(layer_gradients, axis=0)
                avg_dW.append(layer_mean)

            for i in range(len(self.W)):
                self.W[i] -= lr * avg_dW[i]
            delta = abs(Old_Loss - total_loss)
        
        
    def test(self, dataset):
        for data in dataset :
            y,z,Loss = self.forward_prop(data[0], data[1], self.W)
            print(data[0],y[-1], Loss)

## AND

In [70]:
dataset = [[[1,1],1], [[0,0],0], [[1,0],0], [[0,1],0]]

layers = 5
forward_funcs = [sRel for i in range(layers - 1)]
forward_funcs.append(sig)

backward_funcs = [sRelDer for i in range(layers - 1)]
backward_funcs.append(sigDer)



# (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# X_train = x_train / 255.0
# X_test = x_test / 255.0

# # Reshape if needed (for neural networks)
# X_train = X_train.reshape(-1, 28*28)  # Flatten the images
# X_test = X_test.reshape(-1, 28*28)

# print(X_test[0])

andx = NN(layers=layers, initial_x=len(dataset[0][0]), 
         loss_func=CE,loss_der=CEDer, forward_funcs=forward_funcs, neurons_per_layer=10, backward_func=backward_funcs)

andx.train(dataset, lr=0.1)

andx.test(dataset)


[1, 1] [0.99970096] [0.00029909]
[0, 0] [1.e-15] [9.99200722e-16]
[1, 0] [0.00011907] [0.00011908]
[0, 1] [0.00012688] [0.00012689]


## OR

In [71]:
dataset = [[[1,1],1], [[0,0],0], [[1,0],1], [[0,1],1]]

layers = 5
forward_funcs = [sRel for i in range(layers - 1)]
forward_funcs.append(sig)

backward_funcs = [sRelDer for i in range(layers - 1)]
backward_funcs.append(sigDer)



# (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# X_train = x_train / 255.0
# X_test = x_test / 255.0

# # Reshape if needed (for neural networks)
# X_train = X_train.reshape(-1, 28*28)  # Flatten the images
# X_test = X_test.reshape(-1, 28*28)

# print(X_test[0])

orx = NN(layers=layers, initial_x=len(dataset[0][0]), 
         loss_func=CE,loss_der=CEDer, forward_funcs=forward_funcs, neurons_per_layer=10, backward_func=backward_funcs)

orx.train(dataset, lr=0.1)

orx.test(dataset)


[1, 1] [1.] [9.99200722e-16]
[0, 0] [0.00025011] [0.00025014]
[1, 0] [0.99986308] [0.00013693]
[0, 1] [0.99992284] [7.71624127e-05]


## XOR

In [72]:
dataset = [[[1,1],0], [[0,0],0], [[1,0],1], [[0,1],1]]

layers = 5
forward_funcs = [sRel for i in range(layers - 1)]
forward_funcs.append(sig)

backward_funcs = [sRelDer for i in range(layers - 1)]
backward_funcs.append(sigDer)



# (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# X_train = x_train / 255.0
# X_test = x_test / 255.0

# # Reshape if needed (for neural networks)
# X_train = X_train.reshape(-1, 28*28)  # Flatten the images
# X_test = X_test.reshape(-1, 28*28)

# print(X_test[0])

xor = NN(layers=layers, initial_x=len(dataset[0][0]), 
         loss_func=CE,loss_der=CEDer, forward_funcs=forward_funcs, neurons_per_layer=10, backward_func=backward_funcs)

xor.train(dataset, lr=0.1)

xor.test(dataset)


[1, 1] [0.00027077] [0.00027081]
[0, 0] [3.10789393e-05] [3.10794223e-05]
[1, 0] [1.] [9.99200722e-16]
[0, 1] [0.99947854] [0.0005216]


[1. 1. 1.]
z1 ---> [0.3 0.3]
y1 ---> [0.85435524 0.85435524]
z2 ---> [0.27087105]
y2 ---> [0.56730673]
Loss ---> [0.83772619]
y2p ---> [-2.31110599] ------ z2p ---> [-0.56730673]
y1p --> [-0.05673067 -0.05673067] ---- z1p ---> [-0.03258851 -0.03258851]
--------------dW----------------
 [array([[-0.03258851, -0.03258851, -0.03258851],
       [-0.03258851, -0.03258851, -0.03258851]]), array([[-0.56730673, -0.48468148, -0.48468148]])]