In [1]:
import numpy as np 
import pandas as pd 
import os
import matplotlib.pyplot as plt 
from statsmodels.tsa.arima_model import ARIMA 
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler

np.random.seed(1)
TICKER_DIR = "C:\\Users\\anton\\Documents\\antoniouaa\\msc_thesis\\data\\tickers\\ticker_data"

ticker = "AMZN"

dataset = pd.read_csv(os.path.join(TICKER_DIR, f"{ticker}.csv"))
dataset = dataset.dropna(how="any")
close = dataset["close"].tail(1000).values

sc = MinMaxScaler()
scaled = sc.fit_transform(np.reshape(close, (-1, 1)))

In [2]:
def evaluate_arima(X, order):
    size = int(len(X) * 0.8)
    train, test = X[:size], X[size:]
    history = [x for x in train]
    predictions = []
    # perform walk-forward validation
    for t, _ in enumerate(test):
        # fit arima model to the history of values
        model = ARIMA(history, order=order)
        model_fit = model.fit(disp=-1)
        # predict the next value
        yhat = model_fit.forecast()[0]
        predictions.append(yhat)
        history.append(test[t])

    mse = mean_squared_error(test, predictions)
    mae = mean_absolute_error(test, predictions)
    return mse, mae, model

In [3]:
import warnings
from statsmodels.tools.sm_exceptions import ConvergenceWarning, HessianInversionWarning
warnings.simplefilter("ignore", ConvergenceWarning)
warnings.simplefilter("ignore", HessianInversionWarning)

order = (2, 2, 0)

mse, mae, model = evaluate_arima(scaled, order)
print(f"MSE: {mse}\nMAE: {mae}")

MSE: 0.0002858896879250826
MAE: 0.012965114718674519


In [4]:
# with open("scores_arima.csv", "a") as scores_file:
#     scores_file.write(f"{ticker},{mae},{mse}\n")

In [5]:
def evaluate_models(X, p_values, d_values, q_values):
    scores = []
    for p in p_values:
        for d in d_values:
            for q in q_values:
                order = (p, d, q)
                try:
                    mse, mae, _ = evaluate_arima(X, order)
                    scores.append((mse, order))
                    print(f"ARIMA ORDER: {order}\tMSE: {mse}\tMAE: {mae}")
                except:
                    continue
    return scores

In [6]:
scores = evaluate_models(scaled, [0, 1, 2], range(2), range(2))
best_score = sorted(scores)[0]
print(f"BEST SCORE: {best_score[0]}\tORDER: {best_score[1]}")

ARIMA ORDER: (0, 0, 0)	MSE: 0.1574234570770829	MAE: 0.3916602208372353
ARIMA ORDER: (0, 0, 1)	MSE: 0.041847315229036494	MAE: 0.19795050266631378
ARIMA ORDER: (0, 1, 0)	MSE: 0.00021723940803458497	MAE: 0.011140579242843618
ARIMA ORDER: (0, 1, 1)	MSE: 0.00021779801742155178	MAE: 0.011131383653015121
ARIMA ORDER: (1, 0, 0)	MSE: 0.000217319656074252	MAE: 0.01118495657183967
ARIMA ORDER: (1, 0, 1)	MSE: 0.0002178071343206536	MAE: 0.01118151058356682
ARIMA ORDER: (1, 1, 0)	MSE: 0.00021773913225797724	MAE: 0.011132024411860281
ARIMA ORDER: (1, 1, 1)	MSE: 0.00021733893027178235	MAE: 0.011099856812265106
ARIMA ORDER: (2, 0, 0)	MSE: 0.0002177619995626575	MAE: 0.011181690346788153
ARIMA ORDER: (2, 0, 1)	MSE: 0.00022017451839316068	MAE: 0.011224561262178604
ARIMA ORDER: (2, 1, 0)	MSE: 0.00021795551563839308	MAE: 0.01111831960292713
ARIMA ORDER: (2, 1, 1)	MSE: 0.08257043986333334	MAE: 0.03146388044899173
BEST SCORE: 0.00021723940803458497	ORDER: (0, 1, 0)
