#Experimentation using RNN
Notebook with the experimentation for the Time Series Forecasting Using Artificial Neural Networks paper to presented in IGPL 2022



In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import sys
path = '/content/drive/My Drive/Research Papers/IGPL 2022/'
directory = path + 'src/'
sys.path.append(directory)

In [3]:
import matplotlib.pyplot as plt
import seaborn as sns
import time

In [4]:
import numpy as np
import pandas as pd
from ann import training, model_LSTM_1, model_GRU_1, model_LSTM_2, model_RNN_1
from ann_mlp import model_MLP_1
from utils import preprocessing, plotting, metrics

In [5]:
np.random.seed(0)
data_original = pd.read_csv(path+'data/IBEX35.csv')
data_original.describe

<bound method NDFrame.describe of             Date    Close     Open      Max      Min  Volume
0     02-12-2022   8382.6   8390.0   8414.1   8342.3  146.28
1     01-12-2022   8407.9   8426.2   8469.0   8381.8  209.72
2     30-11-2022   8363.2   8350.4   8364.6   8304.4  325.06
3     29-11-2022   8322.1   8348.5   8354.5   8282.8  145.89
4     28-11-2022   8323.2   8380.7   8396.4   8316.7  125.89
...          ...      ...      ...      ...      ...     ...
3304  08-01-2010  12163.0  12218.0  12232.2  12079.9  213.11
3305  07-01-2010  12166.3  12163.0  12199.7  12079.1  192.31
3306  06-01-2010  12222.5  12216.4  12230.7  12147.6  123.74
3307  05-01-2010  12204.4  12141.8  12240.5  12139.8  238.43
3308  04-01-2010  12145.1  11986.5  12145.1  11986.1  184.13

[3309 rows x 6 columns]>

In [6]:
data_original_inv = data_original.iloc[::-1]

In [7]:
preprocessing.normality_test(data_original_inv['Close'])

The null hypothesis can be rejected
With a value of 2.4812615646047063e-18
It's not normally distributed


In [8]:
data = data_original_inv.drop(columns="Date")
data_norma = preprocessing.norma(data, True)

In [9]:
num_epochs = 1000 #1000
learning_rate = 0.001 

In [10]:
models = ["LSTM 1", "GRU 1", "LSTM 2", "RNN 1", "MLP 1"]

In [11]:
expr_m = {"LSTM 1":[[1,1],[2,18],[2,32],[4,64],[4,128],[8,128]], "GRU 1":[[1,1],[2,18],[2,32],[4,64],[4,128],[8,128]], "LSTM 2":[[1,1],[2,18],[2,32],[4,64],[4,128],[8,128]],
          "RNN 1":[[1,1],[2,18],[2,32],[4,64],[4,128],[8,128]], "MLP 1":[[1,1],[2,18],[2,32],[4,64],[4,128],[8,128]]} 

In [None]:
results = []
n_class = 0
for t in range(3):
    n_class = n_class+1
    for i in range(6):
        input_size = 8
        hidden_n = 2
        dropout_prob = 0.2
        silent_mode = True
        split_at = 2482
        
        X, y = preprocessing.get_steps(data_norma['Close'], data_norma['Volume'], n_class)
        X_train, X_test, y_train, y_test = preprocessing.splitting(X,y, split_at)

        X_train_tensors, X_test_tensors, y_train_tensors, y_test_tensors = training.convert_tensor(X_train,X_test,y_train,y_test)
        X_train_tensors_final = training.reshape_3d(X_train_tensors, X_train_tensors.shape[0], 1, X_train_tensors.shape[1])
        X_test_tensors_final = training.reshape_3d(X_test_tensors, X_test_tensors.shape[0], 1, X_test_tensors.shape[1]) 

        lstm1 = model_LSTM_1(input_size, n_class,expr_m["LSTM 1"][i][0], expr_m["LSTM 1"][i][1], dropout_prob)
        gru1 = model_GRU_1(input_size, n_class, expr_m["GRU 1"][i][0], expr_m["GRU 1"][i][1], dropout_prob)
        lstm2 = model_LSTM_2(input_size, n_class, expr_m["LSTM 2"][i][0], expr_m["LSTM 2"][i][1], dropout_prob)
        rnn1 = model_RNN_1(input_size, n_class, expr_m["RNN 1"][i][0], expr_m["RNN 1"][i][1], dropout_prob)
        mlp1 = model_MLP_1(input_size, n_class, expr_m["MLP 1"][i][0], expr_m["MLP 1"][i][1], "sigmoid")

        prediction_1, running_time1 = training.training_iteration(num_epochs, learning_rate, lstm1, X_train_tensors_final, X_test_tensors_final, y_train_tensors, silent_mode)
        spc_1 = str(expr_m["LSTM 1"][i][0])+" hidden nodes + "+str(expr_m["LSTM 1"][i][1])+" layers"
        results.append([models[0], str(i+1), n_class-1, spc_1,metrics.mse(y_test, prediction_1),running_time1, prediction_1.tolist()])

        prediction_2, running_time2 = training.training_iteration(num_epochs, learning_rate, gru1, X_train_tensors_final, X_test_tensors_final, y_train_tensors, silent_mode)
        spc_2 = str(expr_m["GRU 1"][i][0])+" hidden nodes + "+str(expr_m["GRU 1"][i][1])+" layers"
        results.append([models[1], str(i+1), n_class-1, spc_2,metrics.mse(y_test, prediction_2),running_time2, prediction_2.tolist()])

        prediction_3, running_time3 = training.training_iteration(num_epochs, learning_rate, lstm2, X_train_tensors_final, X_test_tensors_final, y_train_tensors, silent_mode)
        spc_3 = str(expr_m["LSTM 2"][i][0])+" hidden nodes + "+str(expr_m["LSTM 2"][i][1])+"  layers + 128"
        results.append([models[2], str(i+1), n_class-1, spc_3,metrics.mse(y_test, prediction_3),running_time3, prediction_3.tolist()])

        prediction_4, running_time4 = training.training_iteration(num_epochs, learning_rate, rnn1, X_train_tensors_final, X_test_tensors_final, y_train_tensors, silent_mode)
        spc_4 = str(expr_m["RNN 1"][i][0])+" hidden nodes + "+str(expr_m["RNN 1"][i][1])+" layers"
        results.append([models[3], str(i+1), n_class-1, spc_4,metrics.mse(y_test, prediction_4),running_time4, prediction_4.tolist()])

        prediction_5, running_time5 = mlp1.training(num_epochs, learning_rate, X_train, y_train, X_test)
        spc_5 = str(expr_m["MLP 1"][i][0])+" - "+str(expr_m["MLP 1"][i][1])+' layers'
        results.append([models[4], str(i+1), n_class-1, spc_5,metrics.mse(y_test, prediction_5),running_time5, prediction_5.tolist()])

In [None]:
results_df = pd.DataFrame(results, columns=['Model','Configuration','t','Specifications','MSE','Running Time', 'Predictions'])
results_df.head()

In [None]:
time_string = time.strftime("%d_%m_%Y-%H_%M_%S", time.localtime())
results_df.to_csv(path+"/results/final_results_"+time_string+'.csv')