In [1]:
import numpy as np
import math
import random
import pandas as pd
import plotly.express as px
import multiprocessing
from sklearn.model_selection import train_test_split

np.set_printoptions(threshold=np.inf)


def sigmoid(z: np.array):
    return 1 / (1 + np.exp(-np.clip(z, -100, 100)))


def tanh(z: np.array):
    z = np.clip(z, -100, 100)
    expZ = np.exp(z)
    expZ_minus = np.exp(-z)
    return (expZ - expZ_minus) / (expZ + expZ_minus)



def reLU(z: np.array):
    return np.maximum(0.0, z)


def leakyReLU(z: np.array):
    return np.maximum(0.01 * z, z)

def softMax(z: np.array):
    z = np.clip(z, -100, 100)
    t = np.exp(z)
    return t / np.sum(t, axis=0, keepdims=True)

In [14]:
class Model:
    class Layer:
        def __init__(self, layerHeight, prevLayer, nextLayer, activationFunc="tanh"):
            self.layerHeight = layerHeight
            self.w = np.random.randn(layerHeight, prevLayer.layerHeight) * np.sqrt(1 / prevLayer.layerHeight)
            self.b = np.zeros((layerHeight, 1))
            self.v_dw = np.zeros((layerHeight, prevLayer.layerHeight))
            self.v_db = np.zeros((layerHeight, 1))
            self.s_dw = np.zeros((layerHeight, prevLayer.layerHeight))
            self.s_db = np.zeros((layerHeight, 1))
            self.t = 1
            self.a, self.z = None, None
            self.d = 1

            # self.dy_dz = lambda nextL, dz_Next, a, z: (nextL.w.T @ dz_Next)
            if activationFunc == "sigmoid":
                self.activation = sigmoid
                # self.da_dz = lambda a, z: np.multiply(a, (1 - a))
                self.dy_dz = lambda nextL, dz_Next, a, z: (nextL.w.T @ dz_Next) * (a - np.square(a))
            elif activationFunc == "tanh":
                self.activation = np.tanh
                # self.da_dz = lambda a, z: (1 - np.square(a))
                self.dy_dz = lambda nextL, dz_Next, a, z: (nextL.w.T @ dz_Next) * (1 - np.square(a))
            elif activationFunc == "relu":
                self.activation = reLU
                # self.da_dz = lambda a, z: np.where(z > 0, 1, 0)
                self.dy_dz = lambda nextL, dz_Next, a, z: (nextL.w.T @ dz_Next) * (np.where(z > 0, 1, 0))
            elif activationFunc == "leaky_relu":
                self.activation = leakyReLU
                # self.da_dz = lambda a, z: np.where(z > 0, 1, 0.01)
                self.dy_dz = lambda nextL, dz_Next, a, z: (nextL.w.T @ dz_Next) * (np.where(z > 0, 1, 0.01))
            elif activationFunc == "softmax":
                self.activation = softMax
                # self.da_dz = lambda a, z: a - np.square(a)
                self.dy_dz = lambda nextL, dz_Next, a, z: ValueError("softmax shouldn't be used for hidden layers")
            self.prevLayer = prevLayer if prevLayer else None
            self.nextLayer = nextLayer if nextLayer else None

        def predict(self, a_Prev: np.array, keepActiveProb: float):
            self.d = np.random.random((self.layerHeight, a_Prev.shape[1])) < keepActiveProb
            self.z = (self.w @ a_Prev) + self.b
            self.a = self.activation(self.z) * self.d
            self.a /= keepActiveProb
            # self.a = self.activation((self.w @ a_Prev) + self.b)
            return self.nextLayer.predict(self.a, keepActiveProb)

        def propagate(self, dz_Next: np.array, trainingStep: float, l2_lambda=0):
            # da = self.dy_da(self.a, dz_Next, self.nextLayer) * self.d
            # dz = self.da_dz(self.a, self.z) @ da
            dz = self.dy_dz(self.nextLayer, dz_Next, self.a, self.z) * self.d
            dw = ((dz @ self.prevLayer.a.T) / dz.shape[1]) + ((l2_lambda / dz.shape[1]) * self.w)
            db = np.mean(dz, axis=1, keepdims=True)

            beta1 = 0.9
            beta2 = 0.999
            self.v_dw = (beta1 * self.v_dw) + ((1 - beta1) * dw)
            self.v_db = (beta1 * self.v_db) + ((1 - beta1) * db)
            self.s_dw = (beta2 * self.s_dw) + ((1 - beta2) * np.square(dw))
            self.s_db = (beta2 * self.s_db) + ((1 - beta2) * np.square(db))
            v_dw_cor = self.v_dw / (1 - (beta1 ** self.t))
            v_db_cor = self.v_db / (1 - (beta1 ** self.t))
            s_dw_cor = self.s_dw / (1 - (beta2 ** self.t))
            s_db_cor = self.s_db / (1 - (beta2 ** self.t))

            self.prevLayer.propagate(dz, trainingStep, l2_lambda)
            self.w -= trainingStep * (v_dw_cor / (np.sqrt(s_dw_cor) + (10 ** -8)))
            self.b -= trainingStep * (v_db_cor / (np.sqrt(s_db_cor) + (10 ** -8)))
            self.t += 1

    class Head(Layer):
        def __init__(self, layerHeight, prevLayer, nextLayer, activationFunc="sigmoid-logistic"):
            super().__init__(layerHeight, prevLayer, nextLayer)
            if activationFunc == "sigmoid-logistic":
                self.activation = sigmoid
                self.dy_dz = lambda nextL, y, a, z: a - y
                self.error = lambda y_hat, y: np.sum(-np.mean(
                    (y * np.log(np.clip(y_hat, 0.001, 1))) + (
                            (1 - y) * np.log(np.clip(1 - y_hat, 0.001, 1))), axis=1, keepdims=True))
            elif activationFunc == "sigmoid-r2":
                self.activation = sigmoid
                self.dy_dz = lambda nextL, y, a, z: 2 * (a - y) * (a - np.square(a))
                self.error = lambda y_hat, y: np.mean(np.square(y - y_hat))
            elif activationFunc == "relu-r2":
                self.activation = reLU
                self.dy_dz = lambda nextL, y, a, z: 2 * (a - y) * (np.where(z > 0, 1, 0))
                self.error = lambda y_hat, y: np.mean(np.square(y - y_hat))
            elif activationFunc == "softmax-logistic":
                self.activation = softMax
                self.dy_dz = lambda nextL, y, a, z: a - y
                self.error = lambda y_hat, y: np.sum(-np.mean(
                    y * np.log(np.clip(y_hat, 0.001, 1)), axis=1, keepdims=True))

        def predict(self, a_Prev: np.array, keepActiveProb: float):
            self.z = (self.w @ a_Prev) + self.b
            self.a = self.activation(self.z)
            return self.a

    class Input(Layer):
        def __init__(self, layerHeight, nextLayer):
            self.layerHeight = layerHeight
            self.a = None
            self.w = np.identity(self.layerHeight)
            self.b = np.zeros((layerHeight, 1))
            self.nextLayer = nextLayer if nextLayer else None

        def predict(self, a_Prev: np.array, keepActiveProb: float):
            self.a = (self.w @ a_Prev) + self.b
            return self.nextLayer.predict(self.a, keepActiveProb)

        def propagate(self, realY, trainingStep, l2_lambda=0):
            return

    def __init__(self, xSize, ySize, layers, activationFunc="tanh", headActivation="sigmoid-logistic"):
        """
        activationFunc = "sigmoid" | "tanh" | "relu" | "leaky_relu" | "softmax" \n
        headActivation = "sigmoid-logistic" | "sigmoid-r2" | "relu-r2" | "softmax-logistic"
        """
        self.head = None

        def createLayer(prevLayer, depth):
            if depth >= len(layers):
                layer = self.Head(ySize, prevLayer, None, activationFunc=headActivation)
                self.head = layer
                return layer
            else:
                layer = self.Layer(layers[depth], prevLayer, None, activationFunc=activationFunc)
                layer.nextLayer = createLayer(layer, depth + 1)
                return layer

        self.input = self.Input(xSize, None)
        self.input.nextLayer = createLayer(self.input, 0)
        self.trainingData = pd.DataFrame([], columns=['Name', 'Iteration', 'Value'])

    def predict(self, a: np.array, keepActiveProb=1.0):
        return self.input.predict(a, keepActiveProb)

    def train(self, X: np.array, Y: np.array, trainingStep, trainingIterations,
              debugStep=100, x_dev=None, y_dev=None, epochSize=1024, keepActiveProb=1.0, l2_reg_lambda=0):
        """
        epochSize ~ 128 | 256 | 512 \n
        keepActiveProb ~ ratio of active neurons {>0.8} \n
        l2_reg_lambda ~ weight decay coefficient {<2.0} \n
        """
        trainSetMean = np.mean(X, axis=1, keepdims=True)
        trainSetR2 = np.mean(np.square(X - trainSetMean), axis=1, keepdims=True)
        self.input.w = self.input.w / np.sqrt(trainSetR2)
        self.input.b = -trainSetMean / np.sqrt(trainSetR2)
        epochs = X.shape[1] // epochSize
        display('epochs Amount: ' + str(epochs))
        for tr_i in range(trainingIterations):
            perm_indices = np.random.permutation(X.shape[1])
            x = X[:, perm_indices]
            y = Y[:, perm_indices]
            for e in range(epochs):
                self.predict(x[:, epochSize * e: epochSize * (e + 1)], keepActiveProb)
                self.head.propagate(y[:, epochSize * e: epochSize * (e + 1)], trainingStep, l2_reg_lambda)
            if x_dev is not None and tr_i % debugStep == 0:
                self.predict(x_dev)
                self.trainingData.loc[len(self.trainingData)] = (
                    'ErrorDev', tr_i // debugStep, self.head.error(self.head.a, y_dev))
                self.trainingData.loc[len(self.trainingData)] = (
                    'PrecisionDev', tr_i // debugStep, np.mean((y_dev == (self.head.a > 0.5)).astype(int)))
            if tr_i % debugStep == 0:
                self.predict(x)
                self.trainingData.loc[len(self.trainingData)] = (
                    'Error', tr_i // debugStep, self.head.error(self.head.a, y))
                self.trainingData.loc[len(self.trainingData)] = (
                    'Precision', tr_i // debugStep, np.mean((y == (self.head.a > 0.5)).astype(int)))


In [3]:
df = pd.read_csv('Datasets/Iris.csv')

# Define your features (X) and target variable (y)
X_df = df[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]
y_df = pd.get_dummies(df['Species'])

# Split the dataset into training and test sets
x_train, x_dev, y_train, y_dev = train_test_split(X_df, y_df, test_size=0.2)
x_train = x_train.T.values
y_train = y_train.T.values
x_dev = x_dev.T.values
y_dev = y_dev.T.values
display(x_train)
display(y_train)

array([[6.8, 6.9, 5. , 5.8, 6.7, 5.6, 5.6, 6.3, 4.9, 6.7, 7.6, 7.2, 5. ,
        5.9, 6. , 5.6, 6.9, 5.4, 4.4, 6.1, 6.3, 5.2, 6.2, 4.9, 4.4, 6.3,
        6.1, 5.7, 4.5, 6.4, 6.7, 5.1, 6.1, 5.8, 4.8, 6.4, 5.7, 5.6, 5.2,
        4.6, 5.5, 7.3, 4.8, 5.9, 4.7, 4.9, 6.5, 5. , 5.7, 6.7, 5.1, 7.1,
        5.3, 5.2, 5.9, 6.7, 4.9, 5. , 6.9, 6.4, 6.9, 5.4, 7.4, 5.5, 6. ,
        6.3, 5.5, 7.9, 5.5, 5.1, 5.6, 4.7, 6.3, 6.4, 5.1, 5.4, 5. , 4.8,
        5.5, 7.2, 4.4, 6.3, 5.8, 4.6, 6.5, 5.4, 5.4, 7.7, 5.1, 7.7, 5.7,
        6.3, 6.8, 6. , 5. , 4.6, 6.7, 6.1, 6.4, 6. , 4.3, 6.2, 5.6, 5. ,
        5.5, 6.8, 6.1, 5.7, 4.6, 6.2, 5. , 5. , 4.9, 5.8, 5.7, 6.7, 6.3,
        6.6, 6.3, 5.7],
       [3. , 3.2, 3.4, 2.7, 3. , 2.7, 3. , 2.7, 2.4, 3.3, 3. , 3.6, 2.3,
        3. , 3.4, 2.8, 3.1, 3.9, 2.9, 2.8, 2.3, 3.5, 3.4, 3.1, 3.2, 3.3,
        3. , 2.8, 2.3, 2.8, 3.1, 3.3, 2.9, 2.7, 3. , 3.1, 2.8, 2.5, 3.4,
        3.6, 2.5, 2.9, 3.4, 3.2, 3.2, 3.1, 3. , 3. , 2.5, 3.3, 3.8, 3. ,
        3.7, 4.1, 3. , 3.1,

array([[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1,
        0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0,
        1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0,
        0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0,
        1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0,
        0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0,
        0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0,
        1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1,
        1, 0, 0, 1, 1, 0, 1, 1, 0, 1],
       [1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
        1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
        0, 0, 1, 0, 1, 1, 0, 1, 0,

In [19]:
np.random.seed(69)
model = Model(4, 3, [25, 25, 25, 25], activationFunc="tanh", headActivation="softmax-logistic")
model.train(x_train, y_train, 0.001, 1001, x_dev=x_dev, y_dev=y_dev, epochSize=120, debugStep=50, l2_reg_lambda=1)

'epochs Amount: 1'

In [20]:
px.line(model.trainingData, x='Iteration', y='Value', color='Name')

In [13]:
model.predict(np.hstack((x_train, x_dev)))

array([[ 1.32636998e-02,  4.71371829e-03,  9.90022344e-01,
        -3.65493802e-03,  4.21292711e-02, -3.18050747e-02,
        -2.75519504e-04, -1.08431083e-02,  1.45487558e-01,
        -2.53748771e-02, -3.15864378e-03, -1.43969461e-02,
         7.51817392e-02, -5.53675477e-02, -1.36088365e-02,
         5.85228647e-03,  6.95468049e-03,  9.91083467e-01,
         9.89735373e-01, -2.97974987e-02, -1.20349268e-01,
         9.91515107e-01, -1.85276925e-02,  9.90985459e-01,
         9.90789908e-01, -7.56906502e-02, -2.49185980e-02,
        -3.91465900e-02,  9.80709297e-01, -1.92969028e-02,
        -1.39823548e-02,  9.88681561e-01,  2.12560271e-02,
         1.32198296e-02,  9.89752899e-01,  1.28152939e-02,
         2.06432960e-03, -2.80329140e-02,  9.91536225e-01,
         9.91979857e-01, -3.67986751e-02,  1.91183306e-02,
         9.90856524e-01,  2.20608362e-02,  9.90347464e-01,
         9.90985459e-01,  7.16731625e-03,  9.89882032e-01,
         7.21791753e-05,  1.27502010e-02,  9.91453754e-0