In [None]:
import numpy as np 
import pandas as pd 
import os
import matplotlib.pyplot as plt 
from statsmodels.tsa.arima_model import ARIMA 
from sklearn.metrics import mean_squared_error

TICKER_DIR = "C:\\Users\\anton\\Documents\\antoniouaa\\msc_thesis\\data\\tickers\\ticker_data\\_Rolling"

dataset = pd.read_csv(os.path.join(TICKER_DIR, "MA_AIZP.csv"))
dataset = dataset.dropna(how="any")
dataset

In [None]:
import warnings
from statsmodels.tools.sm_exceptions import ConvergenceWarning, HessianInversionWarning
warnings.simplefilter("ignore", ConvergenceWarning)
warnings.simplefilter("ignore", HessianInversionWarning)

X = dataset["close"].values
scores = evaluate_models(X, [0, 1, 2, 4, 6], range(3), range(3))
best_score = sorted(scores)[0]
print(f"BEST SCORE: {best_score[1]}\tORDER: {best_score[0]}")

In [None]:
# Try a prediction with the optimal ARIMA parameters
X = dataset["close"].values
model = ARIMA(X, (4, 1, 2))
model_ = model.fit(disp=False)
yhat = model_.forecast()[0]
print(f"Current Value: {X[-1]}\nForecast: {yhat}")

In [None]:
# two step forecast
forecasting = model_.forecast(steps=5)
forecast_vals = forecasting[0]
# reshape the arrays to have the same length
preds = np.zeros(len(X)+len(forecast_vals))
preds[-len(forecast_vals):] = forecast_vals
preds[preds==0] = np.NaN
# plot them on top of each other
plt.plot(X)
plt.plot(preds)

In [None]:
test_set = pd.read_csv(os.path.join(TICKER_DIR, "MA_BAP.csv"))
test_set = test_set.dropna(how="any")
test_X = test_set["close"].values
scores = evaluate_models(test_X, [0, 1, 2, 4, 6], range(3), range(3))
best_score = sorted(scores)[0]
print(f"BEST SCORE: {best_score[1]}\tORDER: {best_score[0]}")

In [1]:
def evaluate_arima(X, order):
    size = int(len(X) * 0.6)
    train, test = X[:size], X[size:]
    history = [x for x in train]
    predictions = []
    # perform walk-forward validation
    for t in range(len(test)):
        # fit arima model to the history of values
        model = ARIMA(history, order=order)
        model_fit = model.fit(disp=-1)
        # predict the next value
        yhat = model_fit.forecast()[0]
        predictions.append(yhat)
        history.append(test[t])

    error = mean_squared_error(test, predictions)
    return error

In [2]:
def evaluate_models(X, p_values, d_values, q_values):
    scores = []
    for p in p_values:
        for d in d_values:
            for q in q_values:
                order = (p, d, q)
                try:
                    mse = evaluate_arima(X, order)
                    scores.append((mse, order))
                    print(f"ARIMA ORDER: {order}\tMSE: {mse}")
                except:
                    continue
    return scores

In [3]:
# Unify all data and normalize them to [0, 1]
# Then perform grid search to find the optimal ARIMA hyperparameters

import os
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

TICKER_DIR = "C:\\Users\\anton\\Documents\\antoniouaa\\msc_thesis\\data\\tickers\\ticker_data\\_Rolling"
tickers = []
os.getcwd()
cols = ["close"]
for tick in os.listdir(TICKER_DIR):
    path = os.path.join(TICKER_DIR, tick)
    tick_df = pd.read_csv(path, header=0, usecols=cols, squeeze=True)
    tickers.append(tick_df)

df = pd.concat(tickers)
X = df.values.reshape(-1, 1)
scaler = MinMaxScaler()
scaled_df = scaler.fit_transform(X)
scaled_df

array([[0.41735308],
       [0.42277325],
       [0.42311468],
       ...,
       [0.06845632],
       [0.06773078],
       [0.06794418]])

In [None]:
from statsmodels.tsa.arima_model import ARIMA 
from sklearn.metrics import mean_squared_error
from statsmodels.tools.sm_exceptions import ConvergenceWarning, HessianInversionWarning
import warnings

warnings.simplefilter("ignore", ConvergenceWarning)
warnings.simplefilter("ignore", HessianInversionWarning)

possible_orders = ((0, 1, 2, 4, 6), (0, 1, 2), (0, 1, 2))
scores = evaluate_models(scaled_df, [0, 1, 2, 4, 6], range(3), range(3))
# evaluate_arima(scaled_df, (0, 1, 0))