In [1]:
import numpy as np
import math
import random
import pandas as pd
import plotly.express as px
import multiprocessing

np.set_printoptions(threshold=np.inf)


def sigmoid(z: np.array):
    return 1 / (1 + np.exp(-np.clip(z, -100, 100)))


def tanh(z: np.array):
    z = np.clip(z, -100, 100)
    expZ = np.exp(z)
    expZ_minus = np.exp(-z)
    return (expZ - expZ_minus) / (expZ + expZ_minus)


def reLU(z: np.array):
    return np.maximum(0.0, z)


def leakyReLU(z: np.array):
    return np.maximum(0.01 * z, z)

In [2]:
class Model:
    class Layer:
        def __init__(self, layerHeight, prevLayer, nextLayer, activationFunc="sigmoid"):
            self.layerHeight = layerHeight
            self.w = np.random.randn(layerHeight, prevLayer.layerHeight) * np.sqrt(1 / prevLayer.layerHeight)
            self.b = np.zeros((layerHeight, 1))
            self.v_dw = np.zeros((layerHeight, prevLayer.layerHeight))
            self.v_db = np.zeros((layerHeight, 1))
            self.s_dw = np.zeros((layerHeight, prevLayer.layerHeight))
            self.s_db = np.zeros((layerHeight, 1))
            self.t = 1
            self.a = None
            self.d = None
            if activationFunc == "sigmoid":
                self.activation = sigmoid
                self.da_dz = lambda a: np.multiply(a, (1 - a))
            elif activationFunc == "tanh":
                self.activation = np.tanh
                self.da_dz = lambda a: (1 - np.square(a))
            elif activationFunc == "relu":
                self.activation = reLU
                self.da_dz = lambda a: np.where(a > 0, 1,
                                                0)  #just taking advantage of the fact a = z for positive and a = 0 for negative
            elif activationFunc == "leaky_relu":
                self.activation = leakyReLU
                self.da_dz = lambda a: np.where(a > 0, 1, 0.01)
            self.prevLayer = prevLayer if prevLayer else None
            self.nextLayer = nextLayer if nextLayer else None

        def predict(self, a_Prev: np.array, keepActiveProb: float):
            self.d = np.random.random((self.layerHeight, a_Prev.shape[1])) < keepActiveProb
            self.a = self.activation((self.w @ a_Prev) + self.b) * self.d
            self.a /= keepActiveProb
            # self.a = self.activation((self.w @ a_Prev) + self.b)
            return self.nextLayer.predict(self.a, keepActiveProb)

        def propagate(self, dz_Next: np.array, trainingStep: float, l2_lambda=0):
            da = (self.nextLayer.w.T @ dz_Next) * self.d
            dz = self.da_dz(self.a) * da
            dw = ((dz @ self.prevLayer.a.T) / dz.shape[1]) + ((l2_lambda / dz.shape[1]) * self.w)
            db = np.mean(dz, axis=1, keepdims=True)

            beta1 = 0.9
            beta2 = 0.99
            self.v_dw = (beta1 * self.v_dw) + ((1 - beta1) * dw)
            self.v_db = (beta1 * self.v_db) + ((1 - beta1) * db)
            self.s_dw = (beta2 * self.s_dw) + ((1 - beta2) * np.square(dw))
            self.s_db = (beta2 * self.s_db) + ((1 - beta2) * np.square(db))
            v_dw_cor = self.v_dw / (1 - (beta1 ** self.t))
            v_db_cor = self.v_db / (1 - (beta1 ** self.t))
            s_dw_cor = self.s_dw / (1 - (beta2 ** self.t))
            s_db_cor = self.s_db / (1 - (beta2 ** self.t))

            self.prevLayer.propagate(dz, trainingStep, l2_lambda)
            self.w -= trainingStep * (v_dw_cor / (np.sqrt(s_dw_cor) + (10 ** -8)))
            self.b -= trainingStep * (v_db_cor / (np.sqrt(s_db_cor) + (10 ** -8)))
            self.t += 1

    class Head(Layer):
        def __init__(self, layerHeight, prevLayer, nextLayer, activationFunc="sigmoid", costFunc="logistic"):
            super().__init__(layerHeight, prevLayer, nextLayer, activationFunc)
            if costFunc == "logistic":
                self.dy_hat = lambda y_hat, y: ((1 - y) / (1 - y_hat + 10 ** -8)) - (y / (y_hat + 10 ** -8))
                self.error = lambda y_hat, y: -np.mean(
                    (y * np.log(np.clip(y_hat, 0.001, 1))) + (
                            (1 - y) * np.log(np.clip(1 - y_hat, 0.001, 1))))
            if costFunc == "r2":
                self.dy_hat = lambda y_hat, y: 2 * (y_hat - y)
                self.error = lambda y_hat, y: np.mean(np.square(y - y_hat))

        def predict(self, a_Prev: np.array, keepActiveProb: float):
            self.a = self.activation((self.w @ a_Prev) + self.b)
            return self.a

        def propagate(self, realY, trainingStep, l2_lambda=0):
            da = self.dy_hat(self.a, realY)
            dz = self.da_dz(self.a) * da
            dw = ((dz @ self.prevLayer.a.T) / dz.shape[1]) + ((l2_lambda / dz.shape[1]) * self.w)
            db = np.mean(dz, axis=1, keepdims=True)

            beta1 = 0.9
            beta2 = 0.999
            self.v_dw = (beta1 * self.v_dw) + ((1 - beta1) * dw)
            self.v_db = (beta1 * self.v_db) + ((1 - beta1) * db)
            self.s_dw = (beta2 * self.s_dw) + ((1 - beta2) * np.square(dw))
            self.s_db = (beta2 * self.s_db) + ((1 - beta2) * np.square(db))
            v_dw_cor = self.v_dw / (1 - (beta1 ** self.t))
            v_db_cor = self.v_db / (1 - (beta1 ** self.t))
            s_dw_cor = self.s_dw / (1 - (beta2 ** self.t))
            s_db_cor = self.s_db / (1 - (beta2 ** self.t))

            self.prevLayer.propagate(dz, trainingStep)
            self.w -= trainingStep * (v_dw_cor / (np.sqrt(s_dw_cor) + (10 ** -8)))
            self.b -= trainingStep * (v_db_cor / (np.sqrt(s_db_cor) + (10 ** -8)))
            self.t += 1

    class Input(Layer):
        def __init__(self, layerHeight, nextLayer):
            self.layerHeight = layerHeight
            self.a = None
            self.w = np.identity(self.layerHeight)
            self.b = np.zeros((layerHeight, 1))
            self.nextLayer = nextLayer if nextLayer else None

        def predict(self, a_Prev: np.array, keepActiveProb: float):
            self.a = (self.w @ a_Prev) + self.b
            return self.nextLayer.predict(self.a, keepActiveProb)

        def propagate(self, realY, trainingStep, l2_lambda=0):
            return

    def __init__(self, xSize, layers, activationFunc="tanh", headActivation="sigmoid", costFunc="logistic"):
        """
        activationFunc = "sigmoid" | "tanh" | "relu" | "leaky_relu" \n
        costFunc = "logistic" | "r2"
        """
        self.head = None

        def createLayer(prevLayer, depth):
            if depth >= len(layers):
                layer = self.Head(1, prevLayer, None, activationFunc=headActivation, costFunc=costFunc)
                self.head = layer
                return layer
            else:
                layer = self.Layer(layers[depth], prevLayer, None, activationFunc=activationFunc)
                layer.nextLayer = createLayer(layer, depth + 1)
                return layer

        self.input = self.Input(xSize, None)
        self.input.nextLayer = createLayer(self.input, 0)
        self.trainingData = pd.DataFrame([], columns=['Name', 'Iteration', 'Value'])

    def predict(self, a: np.array, keepActiveProb=1.0):
        return self.input.predict(a, keepActiveProb)

    def train(self, X: np.array, Y: np.array, trainingStep, trainingIterations,
              debugStep=100, x_dev=None, y_dev=None, epochSize=1024, keepActiveProb=1.0, l2_reg_lambda=0):
        """
        epochSize ~ 128 | 256 | 512 \n
        keepActiveProb ~ ratio of active neurons {>0.8} \n
        l2_reg_lambda ~ weight decay coefficient {<2.0} \n
        """
        trainSetMean = np.mean(X, axis=1, keepdims=True)
        trainSetR2 = np.mean(np.square(X - trainSetMean), axis=1, keepdims=True)
        self.input.w = self.input.w / np.sqrt(trainSetR2)
        self.input.b = -trainSetMean / np.sqrt(trainSetR2)
        epochs = X.shape[1] // epochSize
        display('epochs Amount: ' + str(epochs))
        for tr_i in range(trainingIterations):
            perm_indices = np.random.permutation(X.shape[1])
            x = X[:, perm_indices]
            y = Y[:, perm_indices]
            for e in range(epochs):
                self.predict(x[:, epochSize * e: epochSize * (e + 1)], keepActiveProb)
                self.head.propagate(y[:, epochSize * e: epochSize * (e + 1)], trainingStep, l2_reg_lambda)
            if x_dev is not None and tr_i % debugStep == 0:
                self.predict(x_dev)
                self.trainingData.loc[len(self.trainingData)] = (
                    'ErrorDev', tr_i // debugStep, self.head.error(self.head.a, y_dev))
                # self.trainingData.loc[len(self.trainingData)] = (
                #     'PrecisionDev', tr_i // debugStep, np.mean((y_dev == (self.head.a > 0.5)).astype(int)))
            if tr_i % debugStep == 0:
                self.predict(x)
                self.trainingData.loc[len(self.trainingData)] = (
                    'Error', tr_i // debugStep, self.head.error(self.head.a, y))
                # self.trainingData.loc[len(self.trainingData)] = (
                #     'Precision', tr_i // debugStep, np.mean((y == (self.head.a > 0.5)).astype(int)))


In [3]:
class Model_BatchNorm:
    class Layer:
        def __init__(self, layerHeight, prevLayer, nextLayer, activationFunc="sigmoid"):
            self.layerHeight = layerHeight
            self.w = np.random.randn(layerHeight, prevLayer.layerHeight) * np.sqrt(1 / prevLayer.layerHeight)
            self.gamma = np.ones((layerHeight, 1))
            self.meanB = np.zeros((layerHeight, 1))
            self.v_dw = np.zeros((layerHeight, prevLayer.layerHeight))
            self.v_dgamma = np.zeros((layerHeight, 1))
            self.v_dmeanB = np.zeros((layerHeight, 1))
            self.s_dw = np.zeros((layerHeight, prevLayer.layerHeight))
            self.s_dgamma = np.zeros((layerHeight, 1))
            self.s_dmeanB = np.zeros((layerHeight, 1))
            self.t = 1
            self.a = None
            self.z_norm = None
            self.d = None
            self.mu, self.r2 = 0, 0
            self.mu_avg, self.r2_avg = 0, 0
            if activationFunc == "sigmoid":
                self.activation = sigmoid
                self.da_dz = lambda a: np.multiply(a, (1 - a))
            elif activationFunc == "tanh":
                self.activation = np.tanh
                self.da_dz = lambda a: (1 - np.square(a))
            elif activationFunc == "relu":
                self.activation = reLU
                self.da_dz = lambda a: np.where(a > 0, 1,
                                                0)  #just taking advantage of the fact a = z for positive and a = 0 for negative
            elif activationFunc == "leaky_relu":
                self.activation = leakyReLU
                self.da_dz = lambda a: np.where(a > 0, 1, 0.01)
            self.prevLayer = prevLayer if prevLayer else None
            self.nextLayer = nextLayer if nextLayer else None

        def predict(self, a_Prev: np.array, keepActiveProb: float):
            z = (self.w @ a_Prev)
            z_proc = z - self.mu_avg
            z_proc = z_proc / np.sqrt(self.r2_avg + 10 ** -8)
            self.z_norm = self.gamma * z_proc + self.meanB
            self.a = self.activation(self.z_norm)
            return self.nextLayer.predict(self.a, keepActiveProb)

        def forw_propagate(self, a_Prev: np.array, keepActiveProb: float):
            self.d = np.random.random((self.layerHeight, a_Prev.shape[1])) < keepActiveProb
            z = (self.w @ a_Prev)
            self.mu = np.mean(z, axis=1, keepdims=True)
            self.mu_avg = 0.99 * self.mu_avg + 0.01 * self.mu
            z_proc = z - self.mu
            self.r2 = np.mean(np.square(z_proc), axis=1, keepdims=True)
            self.r2_avg = 0.99 * self.r2_avg + 0.01 * self.r2
            z_proc = z_proc / np.sqrt(self.r2 + 10 ** -8)
            self.z_norm = self.gamma * z_proc + self.meanB
            self.a = self.activation(self.z_norm) * self.d
            self.a /= keepActiveProb
            return self.nextLayer.forw_propagate(self.a, keepActiveProb)

        def back_propagate(self, dz_Next: np.array, trainingStep: float, l2_lambda=0):
            da = (self.nextLayer.w.T @ dz_Next) * self.d
            dz_norm = self.da_dz(self.a) * da
            dr2 = np.mean(dz_norm * self.z_norm, axis=1, keepdims=True)
            dmeanB = np.mean(dz_norm, axis=1, keepdims=True)
            dz_proc = self.gamma * dz_norm
            dz = dz_proc / np.sqrt(self.r2 + 10 ** -8)
            dw = ((dz @ self.prevLayer.a.T) / dz.shape[1]) + ((l2_lambda / dz.shape[1]) * self.w)

            beta1 = 0.9
            beta2 = 0.99
            self.v_dw = (beta1 * self.v_dw) + ((1 - beta1) * dw)
            self.v_dgamma = (beta1 * self.v_dgamma) + ((1 - beta1) * dr2)
            self.v_dmeanB = (beta1 * self.v_dmeanB) + ((1 - beta1) * dmeanB)
            self.s_dw = (beta2 * self.s_dw) + ((1 - beta2) * np.square(dw))
            self.s_dgamma = (beta2 * self.s_dgamma) + ((1 - beta2) * np.square(dr2))
            self.s_dmeanB = (beta2 * self.s_dmeanB) + ((1 - beta2) * np.square(dmeanB))
            v_dw_cor = self.v_dw / (1 - (beta1 ** self.t))
            v_dgamma_cor = self.v_dgamma / (1 - (beta1 ** self.t))
            v_dmeanB_cor = self.v_dmeanB / (1 - (beta1 ** self.t))
            s_dw_cor = self.s_dw / (1 - (beta2 ** self.t))
            s_dgamma_cor = self.s_dgamma / (1 - (beta2 ** self.t))
            s_dmeanB_cor = self.s_dmeanB / (1 - (beta2 ** self.t))

            self.prevLayer.back_propagate(dz, trainingStep, l2_lambda)
            self.w -= trainingStep * (v_dw_cor / (np.sqrt(s_dw_cor) + (10 ** -8)))
            self.gamma -= trainingStep * (v_dgamma_cor / (np.sqrt(s_dgamma_cor) + (10 ** -8)))
            self.meanB -= trainingStep * (v_dmeanB_cor / (np.sqrt(s_dmeanB_cor) + (10 ** -8)))
            self.t += 1

    class Head(Layer):
        def __init__(self, layerHeight, prevLayer, nextLayer, activationFunc="sigmoid", costFunc="logistic"):
            super().__init__(layerHeight, prevLayer, nextLayer, activationFunc)
            if costFunc == "logistic":
                self.dy_hat = lambda y_hat, y: ((1 - y) / (1 - y_hat + 10 ** -8)) - (y / (y_hat + 10 ** -8))
                self.error = lambda y_hat, y: -np.mean(
                    (y * np.log(np.clip(y_hat, 0.001, 1))) + (
                            (1 - y) * np.log(np.clip(1 - y_hat, 0.001, 1))))
            if costFunc == "r2":
                self.dy_hat = lambda y_hat, y: 2 * (y_hat - y)
                self.error = lambda y_hat, y: np.mean(np.square(y - y_hat))

        def predict(self, a_Prev: np.array, keepActiveProb: float):
            z = (self.w @ a_Prev)
            z_proc = z - self.mu_avg
            z_proc = z_proc / np.sqrt(self.r2_avg + 10 ** -8)
            self.z_norm = self.gamma * z_proc + self.meanB
            self.a = self.activation(self.z_norm)
            return self.a

        def forw_propagate(self, a_Prev: np.array, keepActiveProb: float):
            z = (self.w @ a_Prev)
            self.mu = np.mean(z, axis=1, keepdims=True)
            self.mu_avg = 0.99 * self.mu_avg + 0.01 * self.mu
            z_proc = z - self.mu
            self.r2 = np.mean(np.square(z_proc), axis=1, keepdims=True)
            self.r2_avg = 0.99 * self.r2_avg + 0.01 * self.r2
            z_proc = z_proc / np.sqrt(self.r2 + 10 ** -8)
            self.z_norm = self.gamma * z_proc + self.meanB
            self.a = self.activation(self.z_norm)
            return self.a

        def back_propagate(self, realY, trainingStep, l2_lambda=0):
            da = self.dy_hat(self.a, realY)
            dz_norm = self.da_dz(self.a) * da
            dgamma = np.mean(dz_norm * self.z_norm, axis=1, keepdims=True)
            dmeanB = np.mean(dz_norm, axis=1, keepdims=True)
            dz_proc = self.gamma * dz_norm
            dz = dz_proc / np.sqrt(self.r2 + 10 ** -8)
            dw = ((dz @ self.prevLayer.a.T) / dz.shape[1]) + ((l2_lambda / dz.shape[1]) * self.w)

            beta1 = 0.9
            beta2 = 0.99
            self.v_dw = (beta1 * self.v_dw) + ((1 - beta1) * dw)
            self.v_dgamma = (beta1 * self.v_dgamma) + ((1 - beta1) * dgamma)
            self.v_dmeanB = (beta1 * self.v_dmeanB) + ((1 - beta1) * dmeanB)
            self.s_dw = (beta2 * self.s_dw) + ((1 - beta2) * np.square(dw))
            self.s_dgamma = (beta2 * self.s_dgamma) + ((1 - beta2) * np.square(dgamma))
            self.s_dmeanB = (beta2 * self.s_dmeanB) + ((1 - beta2) * np.square(dmeanB))
            v_dw_cor = self.v_dw / (1 - (beta1 ** self.t))
            v_dgamma_cor = self.v_dgamma / (1 - (beta1 ** self.t))
            v_dmeanB_cor = self.v_dmeanB / (1 - (beta1 ** self.t))
            s_dw_cor = self.s_dw / (1 - (beta2 ** self.t))
            s_dgamma_cor = self.s_dgamma / (1 - (beta2 ** self.t))
            s_dmeanB_cor = self.s_dmeanB / (1 - (beta2 ** self.t))

            self.prevLayer.back_propagate(dz, trainingStep, l2_lambda)
            self.w -= trainingStep * (v_dw_cor / (np.sqrt(s_dw_cor) + (10 ** -8)))
            self.gamma -= trainingStep * (v_dgamma_cor / (np.sqrt(s_dgamma_cor) + (10 ** -8)))
            self.meanB -= trainingStep * (v_dmeanB_cor / (np.sqrt(s_dmeanB_cor) + (10 ** -8)))
            self.t += 1

    class Input(Layer):
        def __init__(self, layerHeight, nextLayer):
            self.layerHeight = layerHeight
            self.a = None
            self.w = np.identity(self.layerHeight)
            self.b = np.zeros((layerHeight, 1))
            self.nextLayer = nextLayer if nextLayer else None

        def predict(self, a_Prev: np.array, keepActiveProb: float):
            self.a = (self.w @ a_Prev) + self.b
            return self.nextLayer.predict(self.a, keepActiveProb)

        def forw_propagate(self, a_Prev: np.array, keepActiveProb: float):
            self.a = (self.w @ a_Prev) + self.b
            return self.nextLayer.forw_propagate(self.a, keepActiveProb)

        def back_propagate(self, realY, trainingStep, l2_lambda=0):
            return

    def __init__(self, xSize, layers, activationFunc="tanh", headActivation="sigmoid", costFunc="logistic"):
        """
        activationFunc = "sigmoid" | "tanh" | "relu" | "leaky_relu" \n
        costFunc = "logistic" | "r2"
        """
        self.head = None

        def createLayer(prevLayer, depth):
            if depth >= len(layers):
                layer = self.Head(1, prevLayer, None, activationFunc=headActivation, costFunc=costFunc)
                self.head = layer
                return layer
            else:
                layer = self.Layer(layers[depth], prevLayer, None, activationFunc=activationFunc)
                layer.nextLayer = createLayer(layer, depth + 1)
                return layer

        self.input = self.Input(xSize, None)
        self.input.nextLayer = createLayer(self.input, 0)
        self.trainingData = pd.DataFrame([], columns=['Name', 'Iteration', 'Value'])

    def predict(self, a: np.array, keepActiveProb=1.0):
        return self.input.predict(a, keepActiveProb)

    def train(self, X: np.array, Y: np.array, trainingStep, trainingIterations,
              debugStep=100, x_dev=None, y_dev=None, epochSize=1024, keepActiveProb=1.0, l2_reg_lambda=0):
        """
        epochSize ~ 128 | 256 | 512 \n
        keepActiveProb ~ ratio of active neurons {>0.8} \n
        l2_reg_lambda ~ weight decay coefficient {<2.0} \n
        """
        trainSetMean = np.mean(X, axis=1, keepdims=True)
        trainSetR2 = np.mean(np.square(X - trainSetMean), axis=1, keepdims=True)
        self.input.w = self.input.w / np.sqrt(trainSetR2)
        self.input.b = -trainSetMean / np.sqrt(trainSetR2)
        epochs = X.shape[1] // epochSize
        display('epochs Amount: ' + str(epochs))
        # lastW = list()
        # currentLayer = self.input.nextLayer
        # while currentLayer:
        #     lastW.append(currentLayer.w.copy())
        #     currentLayer = currentLayer.nextLayer
        for tr_i in range(trainingIterations):
            perm_indices = np.random.permutation(X.shape[1])
            x = X[:, perm_indices]
            y = Y[:, perm_indices]
            for e in range(epochs):
                self.input.forw_propagate(x[:, epochSize * e: epochSize * (e + 1)], keepActiveProb)
                self.head.back_propagate(y[:, epochSize * e: epochSize * (e + 1)], trainingStep, l2_reg_lambda)
            if x_dev is not None and tr_i % debugStep == 0:
                self.predict(x_dev)
                self.trainingData.loc[len(self.trainingData)] = (
                    'ErrorDev', tr_i // debugStep, self.head.error(self.head.a, y_dev))
                # self.trainingData.loc[len(self.trainingData)] = (
                #     'PrecisionDev', tr_i // debugStep, np.mean((y_dev == (self.head.a > 0.5)).astype(int)))
            if tr_i % debugStep == 0:
                self.predict(x)
                self.trainingData.loc[len(self.trainingData)] = (
                    'Error', tr_i // debugStep, self.head.error(self.head.a, y))
                # self.trainingData.loc[len(self.trainingData)] = (
                #     'Precision', tr_i // debugStep, np.mean((y == (self.head.a > 0.5)).astype(int)))
                # wChange = 0
                # i = 0
                # currentLayer = self.input.nextLayer
                # while currentLayer:
                #     wChange += np.sum(np.abs(currentLayer.w - lastW[i]))
                #     lastW[i] = currentLayer.w.copy()
                #     i += 1
                #     currentLayer = currentLayer.nextLayer
                # self.trainingData.loc[len(self.trainingData)] = ('w', tr_i // debugStep, wChange)


In [4]:
df = pd.read_csv('Datasets/coffee-prices-historical-data.csv')
df = df.fillna(df.mean())

x_train, x_dev = np.zeros((57, int(len(df) * 0.8))), np.zeros((57, 2000))
y_train, y_dev = np.zeros((1, int(len(df) * 0.8))), np.zeros((1, 2000)),
case = 0
for i in range(15, int(len(df) * 0.8) + 15):
    x_train[0, case] = df.iloc[i - 1][' value']
    for j in range(2, 16):
        x_train[j - 1, case] = (df.iloc[i - (j - 1)][' value'] / df.iloc[i - j][' value']) - 1

    for j in range(1, 9):
        if i - (7 * (j + 1)) >= 0:
            x_train[14 + j, case] = df.iloc[i - (7 * j): i - (7 * (j - 1))][' value'].mean() / \
                                    df.iloc[i - (7 * (j + 1)): i - (7 * j)][' value'].mean() - 1
        else:
            x_train[14 + j, case] = 0

    for j in range(1, 25):
        if i - (28 * (j + 1)) >= 0:
            x_train[22 + j, case] = df.iloc[i - (28 * j): i - (28 * (j - 1))][' value'].mean() / \
                                    df.iloc[i - (28 * (j + 1)): i - (28 * j)][' value'].mean() - 1
        else:
            x_train[22 + j, case] = 0

    for j in range(1, 11):
        if i - (336 * (j + 1)) >= 0:
            x_train[46 + j, case] = df.iloc[i - (336 * j): i - (336 * (j - 1))][' value'].mean() / \
                                    df.iloc[i - (336 * (j + 1)): i - (336 * j)][' value'].mean() - 1
        else:
            x_train[46 + j, case] = 0

    y_train[0, case] = df.iloc[i][' value']
    case += 1

case = 0
for i in range(int(len(df) * 0.8) + 15, int(len(df) * 0.8) + 2015):
    x_dev[0, case] = df.iloc[i - 1][' value']
    for j in range(2, 16):
        x_dev[j - 1, case] = (df.iloc[i - (j - 1)][' value'] / df.iloc[i - j][' value']) - 1

    for j in range(1, 9):
        x_dev[14 + j, case] = df.iloc[i - (7 * j): i - (7 * (j - 1))][' value'].mean() / \
                              df.iloc[i - (7 * (j + 1)): i - (7 * j)][' value'].mean() - 1

    for j in range(1, 25):
        x_dev[22 + j, case] = df.iloc[i - (28 * j): i - (28 * (j - 1))][' value'].mean() / \
                              df.iloc[i - (28 * (j + 1)): i - (28 * j)][' value'].mean() - 1

    for j in range(1, 11):
        x_dev[46 + j, case] = df.iloc[i - (336 * j): i - (336 * (j - 1))][' value'].mean() / \
                              df.iloc[i - (336 * (j + 1)): i - (336 * j)][' value'].mean() - 1

    y_dev[0, case] = df.iloc[i][' value']
    case += 1

display("Generated")


  df = df.fillna(df.mean())


'Generated'

In [20]:
np.random.seed(69)
model_predictive = Model(57, [1000, 1000, 1000], activationFunc="tanh", headActivation="relu", costFunc="r2")
model_predictive.train(x_train, y_train, 0.0001, 101, x_dev=x_dev, y_dev=y_dev, epochSize=256, debugStep=1)

'epochs Amount: 39'

In [None]:
np.random.seed(69)
model_BN = Model_BatchNorm(57, [1000, 1000, 1000], activationFunc="tanh", headActivation="relu", costFunc="r2")
model_BN.train(x_train, y_train, 0.001, 101, x_dev=x_dev, y_dev=y_dev, epochSize=256, debugStep=1)

'epochs Amount: 39'

In [21]:
px.line(model_predictive.trainingData, x='Iteration', y='Value', color='Name')

In [61]:
px.line(model_BN.trainingData, x='Iteration', y='Value', color='Name')

In [22]:
predict = px.line(x=df.loc[range(15, x_train.shape[1] + 15)]['date'],
                  y=model_predictive.predict(x_train).flatten()).update_traces(line=dict(color='white'))
forecast = px.line(x=df.loc[range(x_train.shape[1] + 15, x_train.shape[1] + 15 + x_dev.shape[1])]['date'],
                   y=model_predictive.predict(x_dev).flatten()).update_traces(
    line=dict(color='red'))
data = px.line(x=df['date'], y=df[' value']).update_traces(line=dict(color='green'))
combined_fig = predict.add_traces(data.data)
combined_fig = combined_fig.add_traces(forecast.data)

# Display the combined plot
combined_fig.show()