In [15]:
from alpha_vantage.timeseries import TimeSeries

from statsmodels.tsa.stattools import acf
from statsmodels.tsa.stattools import pacf
from scipy.stats import shapiro, jarque_bera
from statsmodels.tsa.stattools import adfuller

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

from torchinfo import summary
from torch.nn import TransformerEncoder, TransformerEncoderLayer

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import confusion_matrix, classification_report

from tqdm import tqdm
import pickle

from plotly.subplots import make_subplots
import plotly.graph_objs as go

import math
import time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pmdarima as pm
import statsmodels.graphics.tsaplots as sgt
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA

In [16]:
slide_windows = 5

In [17]:
train = np.load('./Train/train_close.npy')
train_df = pd.DataFrame(train)
train_df.columns = ['Close']

In [18]:
data_for_trading = pd.read_csv('LSTM_for_trading.csv')
data_for_trading.rename(columns={'Predicted': 'Predicted_LSTM'}, inplace=True)
data_for_trading_trans = pd.read_csv('Transformer_for_stock.csv')
data_for_trading["Predicted_Transformer"] = data_for_trading_trans["tomorrow_price"]

In [19]:
# train the arima model
arima_model = pm.auto_arima(train_df['Close'],
                     d=1,
                     seasonal=False,
                     stepwise=False,
                     suppress_warnings=True,
                     error_action="ignore",
                     max_p=6, 
                     max_order=None, 
                     trace=True)

 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=4560.958, Time=0.01 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=4559.525, Time=0.03 sec
 ARIMA(0,1,2)(0,0,0)[0] intercept   : AIC=4556.568, Time=0.04 sec
 ARIMA(0,1,3)(0,0,0)[0] intercept   : AIC=4557.032, Time=0.06 sec
 ARIMA(0,1,4)(0,0,0)[0] intercept   : AIC=4558.831, Time=0.09 sec
 ARIMA(0,1,5)(0,0,0)[0] intercept   : AIC=4549.483, Time=0.10 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=4560.086, Time=0.02 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=4559.512, Time=0.06 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=inf, Time=0.10 sec
 ARIMA(1,1,3)(0,0,0)[0] intercept   : AIC=4555.402, Time=0.14 sec
 ARIMA(1,1,4)(0,0,0)[0] intercept   : AIC=4557.315, Time=0.16 sec
 ARIMA(1,1,5)(0,0,0)[0] intercept   : AIC=4549.524, Time=0.16 sec
 ARIMA(2,1,0)(0,0,0)[0] intercept   : AIC=4556.411, Time=0.03 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=4555.653, Time=0.08 sec
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=4557.334, Time=0.22 sec
 ARIMA(2,1,3)(0

In [20]:
# predict the close price for the next n_slide_windows days
predictions = []
for actual_value_idx in range(0, len(data_for_trading) - slide_windows, slide_windows):
    next_n_day_prediction_series = arima_model.predict(n_periods=slide_windows)
    if type(next_n_day_prediction_series) == np.ndarray:
        next_n_day_prediction_series = pd.Series(next_n_day_prediction_series)
    next_day = next_n_day_prediction_series.to_list()
    predictions.append(next_day)
    arima_model.update(data_for_trading['Real'][actual_value_idx:actual_value_idx + slide_windows])
    
predictions = np.array(predictions)
predictions = predictions.flatten()
data_for_trading["Predicted_ARIMA"] = np.nan
data_for_trading.loc[1:, "Predicted_ARIMA"] = predictions

In [21]:
# msa for the arima model
msa_arima = np.mean(np.abs(data_for_trading["Real"] - data_for_trading["Predicted_ARIMA"].shift(-1)))
print("Mean Absolute Error for ARIMA model: ", msa_arima)
rmse_arima = np.sqrt(np.mean((data_for_trading["Real"] - data_for_trading["Predicted_ARIMA"].shift(-1))**2))
print("Root Mean Squared Error for ARIMA model: ", rmse_arima)
mse_arima = np.mean((data_for_trading["Real"] - data_for_trading["Predicted_ARIMA"].shift(-1))**2)
print("Mean Squared Error for ARIMA model: ", mse_arima)

Mean Absolute Error for ARIMA model:  17.91054213130565
Root Mean Squared Error for ARIMA model:  26.56927848796555
Mean Squared Error for ARIMA model:  705.9265593710691


In [22]:
# null model
data_for_trading["Predicted_Null"] = data_for_trading["Real"].shift(1)
msa_null = np.mean(np.abs(data_for_trading["Real"] - data_for_trading["Predicted_Null"]))
print("Mean Absolute Error for Null model: ", msa_null)
rmse_null = np.sqrt(np.mean((data_for_trading["Real"] - data_for_trading["Predicted_Null"])**2))
print("Root Mean Squared Error for Null model: ", rmse_null)
mse_null = np.mean((data_for_trading["Real"] - data_for_trading["Predicted_Null"])**2)
print("Mean Squared Error for Null model: ", mse_null)

Mean Absolute Error for Null model:  9.932520644000002
Root Mean Squared Error for Null model:  15.254295942158924
Mean Squared Error for Null model:  232.6935446909662


In [23]:
data_for_trading.to_csv('Algorithmic_for_stock.csv', index=False)