In [1]:
import numpy as np
import math
import random
import pandas as pd
import plotly.express as px
import multiprocessing
def sigmoid(z: np.array):
    return 1/(1+np.exp(-np.clip(z, -100, 100)))
def tanh(z: np.array):
    z = np.clip(z, -100, 100)
    expZ = np.exp(z)
    expZ_minus = np.exp(-z)
    return (expZ - expZ_minus) / (expZ + expZ_minus)
def reLU(z: np.array):
    return np.maximum(0.0, z)
def leakyReLU(z: np.array):
    return np.maximum(0.01*z, z)

In [2]:
class Model:
    class Layer:
        def __init__(self, layerHeight, prevLayer, nextLayer, activationFunc="sigmoid"):
            self.layerHeight = layerHeight
            self.w = np.random.randn(layerHeight, prevLayer.layerHeight) * 0.01
            self.b = np.zeros((layerHeight, 1))
            self.dw = np.zeros((layerHeight, prevLayer.layerHeight))
            self.db = np.zeros((layerHeight, 1))
            self.a = None
            if activationFunc == "sigmoid":
                self.activation = sigmoid
                self.da_dz = lambda a : np.multiply(a, (1 - a))
            elif activationFunc == "tanh":
                self.activation = np.tanh
                self.da_dz = lambda a: (1 - np.square(a))
            elif activationFunc == "relu":
                self.activation = reLU
                self.da_dz = lambda a: np.where(a > 0, 1, 0)#just taking advantage of the fact a = z for positive and a = 0 for negative
            elif activationFunc == "leaky_relu":
                self.activation = leakyReLU
                self.da_dz = lambda a: np.where(a > 0, 1, 0.01)
            self.prevLayer = prevLayer if prevLayer else None
            self.nextLayer = nextLayer if nextLayer else None
        def predict(self, a_Prev: np.array, keepActiveProb: float):
            self.a = (self.activation((self.w @ a_Prev) + self.b) *
                      (np.random.random((self.layerHeight, a_Prev.shape[1])) < keepActiveProb))
            self.a /= keepActiveProb
            # self.a = self.activation((self.w @ a_Prev) + self.b)
            return self.nextLayer.predict(self.a, keepActiveProb)
        def propagate(self, dz_Next: np.array, trainingStep):
            da = self.nextLayer.w.T @ dz_Next
            dz = self.da_dz(self.a) * da
            self.dw = (0.9 * self.dw) + 0.1*((dz @ self.prevLayer.a.T) / dz.shape[1])
            self.db = (0.9 * self.db) + 0.1*np.mean(dz, axis=1,keepdims=True)
            self.prevLayer.propagate(dz, trainingStep)
            self.w -= trainingStep * self.dw
            self.b -= trainingStep * self.db
    class Head(Layer):
        def predict(self, a_Prev: np.array, keepActiveProb: float):
            self.a = sigmoid((self.w @ a_Prev) + self.b)
            return self.a
        def propagate(self, realY, trainingStep):
            dz = self.a - realY
            self.dw = (0.9 * self.dw) + 0.1*((dz @ self.prevLayer.a.T) / dz.shape[1])
            self.db = (0.9 * self.db) + 0.1*np.mean(dz, axis=1,keepdims=True)
            self.prevLayer.propagate(dz, trainingStep)
            self.w -= trainingStep * self.dw
            self.b -= trainingStep * self.db

    class Input(Layer):
        def __init__(self, layerHeight, nextLayer):
            self.layerHeight = layerHeight
            self.a = None
            self.nextLayer = nextLayer if nextLayer else None
        def predict(self, a_Prev: np.array, keepActiveProb: float):
            self.a = a_Prev
            return self.nextLayer.predict(self.a, keepActiveProb)
        def propagate(self, realY, trainingStep):
            return
    """activationFunc = "sigmoid" | "tanh" | "relu" | "leaky_relu" """
    def __init__(self, xSize, layers, activationFunc="sigmoid"):
        self.head = None
        def createLayer(prevLayer, depth):
            if depth >= len(layers):
                layer = self.Head(1, prevLayer, None)
                self.head = layer
                return layer
            else:
                layer = self.Layer(layers[depth], prevLayer, None, activationFunc=activationFunc)
                layer.nextLayer = createLayer(layer, depth + 1)
                return layer


        self.input = self.Input(xSize, None)
        self.input.nextLayer = createLayer(self.input, 0)
        self.trainingData = pd.DataFrame([], columns=['Name', 'Iteration', 'Value'])
    def predict(self, a: np.array, keepActiveProb=1):
        return self.input.predict(a, keepActiveProb)
    def train(self, x: np.array, y: np.array, trainingStep, trainingIterations, keepActiveProb, debug=False, debugStep=100):
        if debug:
            lastW = list()
            lastB = list()
            currentLayer = self.input.nextLayer
            while currentLayer:
                lastW.append(currentLayer.w.copy())
                lastB.append(currentLayer.b.copy())
                currentLayer = currentLayer.nextLayer
        for tr_i in range(trainingIterations):
            self.predict(x, keepActiveProb)
            if tr_i % 100 == 0:
                self.trainingData.loc[len(self.trainingData)] = ('Error', tr_i//100, -np.mean((y * np.log(np.clip(self.head.a, 0.001, 1))) + ((1 - y) * np.log(np.clip(1 - self.head.a, 0.001, 1)))))
                self.trainingData.loc[len(self.trainingData)] = ('Precision', tr_i//100, np.mean((y == (self.head.a > 0.5)).astype(int)))

                if debug:
                    wChange, bChange = 0, 0
                    i = 0
                    currentLayer = self.input.nextLayer
                    while currentLayer:
                        wChange += np.sum(np.abs(currentLayer.w - lastW[i]))
                        bChange += np.sum(np.abs(currentLayer.b - lastB[i]))
                        lastW[i] = currentLayer.w.copy()
                        lastB[i] = currentLayer.b.copy()
                        i += 1
                        currentLayer = currentLayer.nextLayer
                    self.trainingData.loc[len(self.trainingData)] = ('w', tr_i // debugStep, wChange)
                    self.trainingData.loc[len(self.trainingData)] = ('b', tr_i // debugStep, bChange)
            self.head.propagate(y, trainingStep)


In [24]:
class Model_Improved:
    class Layer:
        def __init__(self, layerHeight, prevLayer, nextLayer, activationFunc="sigmoid"):
            self.layerHeight = layerHeight
            self.w = np.random.randn(layerHeight, prevLayer.layerHeight) * np.sqrt(1/prevLayer.layerHeight)
            self.b = np.zeros((layerHeight, 1))
            self.dw = np.zeros((layerHeight, prevLayer.layerHeight))
            self.db = np.zeros((layerHeight, 1))
            self.a = None
            self.d = None
            if activationFunc == "sigmoid":
                self.activation = sigmoid
                self.da_dz = lambda a: np.multiply(a, (1 - a))
            elif activationFunc == "tanh":
                self.activation = np.tanh
                self.da_dz = lambda a: (1 - np.square(a))
            elif activationFunc == "relu":
                self.activation = reLU
                self.da_dz = lambda a: np.where(a > 0, 1,
                                                0)  #just taking advantage of the fact a = z for positive and a = 0 for negative
            elif activationFunc == "leaky_relu":
                self.activation = leakyReLU
                self.da_dz = lambda a: np.where(a > 0, 1, 0.01)
            self.prevLayer = prevLayer if prevLayer else None
            self.nextLayer = nextLayer if nextLayer else None

        def predict(self, a_Prev: np.array, keepActiveProb: float):
            self.d = np.random.random((self.layerHeight, a_Prev.shape[1])) < keepActiveProb
            self.a = self.activation((self.w @ a_Prev) + self.b) * self.d
            self.a /= keepActiveProb
            # self.a = self.activation((self.w @ a_Prev) + self.b)
            return self.nextLayer.predict(self.a, keepActiveProb)

        def propagate(self, dz_Next: np.array, trainingStep):
            da = (self.nextLayer.w.T @ dz_Next) * self.d
            dz = self.da_dz(self.a) * da
            self.dw = (0.9 * self.dw) + 0.1*((dz @ self.prevLayer.a.T) / dz.shape[1])
            self.db = (0.9 * self.db) + 0.1*np.mean(dz, axis=1,keepdims=True)
            self.prevLayer.propagate(dz, trainingStep)
            self.w -= trainingStep * self.dw
            self.b -= trainingStep * self.db

    class Head(Layer):
        def predict(self, a_Prev: np.array, keepActiveProb: float):
            self.a = sigmoid((self.w @ a_Prev) + self.b)
            return self.a

        def propagate(self, realY, trainingStep):
            dz = self.a - realY
            self.dw = (0.9 * self.dw) + 0.1*((dz @ self.prevLayer.a.T) / dz.shape[1])
            self.db = (0.9 * self.db) + 0.1*np.mean(dz, axis=1,keepdims=True)
            self.prevLayer.propagate(dz, trainingStep)
            self.w -= trainingStep * self.dw
            self.b -= trainingStep * self.db

    class Input(Layer):
        def __init__(self, layerHeight, nextLayer):
            self.layerHeight = layerHeight
            self.a = None
            self.w = np.identity(self.layerHeight)
            self.b = np.zeros((layerHeight, 1))
            self.nextLayer = nextLayer if nextLayer else None

        def predict(self, a_Prev: np.array, keepActiveProb: float):
            self.a = (self.w @ a_Prev) + self.b
            return self.nextLayer.predict(self.a, keepActiveProb)

        def propagate(self, realY, trainingStep):
            return

    """activationFunc = "sigmoid" | "tanh" | "relu" | "leaky_relu" """
    def __init__(self, xSize, layers, activationFunc="sigmoid"):
        self.head = None

        def createLayer(prevLayer, depth):
            if depth >= len(layers):
                layer = self.Head(1, prevLayer, None)
                self.head = layer
                return layer
            else:
                layer = self.Layer(layers[depth], prevLayer, None, activationFunc=activationFunc)
                layer.nextLayer = createLayer(layer, depth + 1)
                return layer

        self.input = self.Input(xSize, None)
        self.input.nextLayer = createLayer(self.input, 0)
        self.trainingData = pd.DataFrame([], columns=['Name', 'Iteration', 'Value'])

    def predict(self, a: np.array, keepActiveProb=1.0):
        return self.input.predict(a, keepActiveProb)

    def train(self, x: np.array, y: np.array, trainingStep, trainingIterations, keepActiveProb, debug=False,
              debugStep=100, devSet=False, x_dev=None, y_dev=None, epochSize=1024):
        trainSetMean = np.mean(x, axis=1, keepdims=True)
        trainSetR2 = np.mean(np.square(x - trainSetMean), axis=1, keepdims=True)
        # trainSetMean = 4.5
        # trainSetR2 = 4.5**2
        self.input.w = self.input.w / np.sqrt(trainSetR2)
        self.input.b = -trainSetMean / np.sqrt(trainSetR2)
        epochs = x.shape[1] // epochSize
        display('epochs Amount: ' + str(epochs))
        for tr_i in range(trainingIterations):
            for e in range(epochs):
                self.predict(x[:, epochSize * e: epochSize * (e + 1)], keepActiveProb)
                self.head.propagate(y[:, epochSize * e: epochSize * (e + 1)], trainingStep)
            if devSet and tr_i % debugStep == 0:
                self.predict(x_dev)
                self.trainingData.loc[len(self.trainingData)] = ('ErrorDev', tr_i // 100, -np.mean(
                    (y_dev * np.log(np.clip(self.head.a, 0.001, 1))) + (
                            (1 - y_dev) * np.log(np.clip(1 - self.head.a, 0.001, 1)))))
                self.trainingData.loc[len(self.trainingData)] = (
                    'PrecisionDev', tr_i // 100, np.mean((y_dev == (self.head.a > 0.5)).astype(int)))
            if tr_i % debugStep == 0:
                self.predict(x)
                self.trainingData.loc[len(self.trainingData)] = ('Error', tr_i // 100, -np.mean(
                    (y * np.log(np.clip(self.head.a, 0.001, 1))) + (
                            (1 - y) * np.log(np.clip(1 - self.head.a, 0.001, 1)))))
                self.trainingData.loc[len(self.trainingData)] = (
                    'Precision', tr_i // 100, np.mean((y == (self.head.a > 0.5)).astype(int)))


In [21]:
#Prepare Training Set
random.seed(1337)
trainCasesAmount = 1000
x = np.zeros((1, trainCasesAmount))
y = np.zeros((1, trainCasesAmount))
for i in range(trainCasesAmount):
    testCase = 0 + random.random()*18
    x[:, i] = np.array([testCase/18])
    y[:, i] = np.array([int(3 < testCase < 6)])

In [22]:
random.seed(1337)
trainCasesAmount = 1000
x_unNorm = np.zeros((1, trainCasesAmount))
y_unNorm  = np.zeros((1, trainCasesAmount))
for i in range(trainCasesAmount):
    testCase = 0 + random.random()*18
    x_unNorm [:, i] = np.array([testCase])
    y_unNorm [:, i] = np.array([int(3 < testCase < 6)])

In [141]:
np.random.seed(69)
model0 = Model(1, [2], activationFunc="tanh")
model0.train(x, y, 1, 100000, 1)

In [174]:
np.random.seed(69)
model0_plus = Model_Improved(1, [2], activationFunc="tanh")
model0_plus.train(x_unNorm, y_unNorm, 0.1, 10000, 1, epochSize=64)

'epochs Amount: 15'

In [180]:
np.random.seed(69)
model0_plus_nobatch = Model_Improved(1, [2], activationFunc="tanh")
model0_plus_nobatch.train(x_unNorm, y_unNorm, 0.5, 100000, 1, epochSize=1000)

'epochs Amount: 1'

In [None]:
np.random.seed(69)
model2 = Model(1, [2], activationFunc="leaky_relu")
model2.train(x, y, 0.1, 20000)

In [143]:
px.line(model0.trainingData, x='Iteration', y = 'Value', color='Name')

In [175]:
px.line(model0_plus.trainingData, x='Iteration', y = 'Value', color='Name')

In [153]:
px.line(model0_plus_nobatch.trainingData, x='Iteration', y = 'Value', color='Name')

In [183]:
inp = input()
while inp != 'stop':
    print(inp, 'is', model0.predict(np.array([[float(inp)/18]])))
    inp = input()

3 is [[0.42238218]]
4 is [[1.]]
5 is [[1.]]
3.5 is [[0.99999999]]
5.1 is [[1.]]


ValueError: could not convert string to float: 'zhopa'

In [32]:
testCasesAmount = 1000000
x_test = np.zeros((1, testCasesAmount))
y_test  = np.zeros((1, testCasesAmount))
for i in range(testCasesAmount):
    testCase = 2 + random.random()*5
    x_test [:, i] = np.array([testCase])
    y_test [:, i] = np.array([int(3 < testCase < 6)])

In [179]:
model0.predict(x_test / 18)
display("Error:",-np.mean(
    (y_test * np.log(np.clip(model0.head.a, 0.001, 1))) + (
            (1 - y_test) * np.log(np.clip(1 - model0.head.a, 0.001, 1)))))
display('Precision:', np.mean((y_test == (model0.head.a > 0.5)).astype(int)))

'Error:'

0.017947648987895557

'Precision:'

0.998345

In [176]:
model0_plus.predict(x_test)
display("Error:",-np.mean(
    (y_test * np.log(np.clip(model0_plus.head.a, 0.001, 1))) + (
            (1 - y_test) * np.log(np.clip(1 - model0_plus.head.a, 0.001, 1)))))
display('Precision:', np.mean((y_test == (model0_plus.head.a > 0.5)).astype(int)))

'Error:'

0.02736318196562356

'Precision:'

0.998292

In [181]:
model0_plus_nobatch.predict(x_test)
display("Error:",-np.mean(
    (y_test * np.log(np.clip(model0_plus_nobatch.head.a, 0.001, 1))) + (
            (1 - y_test) * np.log(np.clip(1 - model0_plus_nobatch.head.a, 0.001, 1)))))
display('Precision:', np.mean((y_test == (model0_plus_nobatch.head.a > 0.5)).astype(int)))

'Error:'

0.015564963620753464

'Precision:'

0.998178