In [None]:
# Import libraries
import warnings
import itertools
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from modules.load_data import load_data

# Defaults
plt.rcParams["figure.figsize"] = (20.0, 10.0)
plt.rcParams.update({"font.size": 12})
plt.style.use("ggplot")

In [None]:
# Load the data
data = load_data()["prices"].resample("D").fillna(method="ffill")
data.index = pd.to_datetime(data.index, format="%Y-%m-%d")
data.index.name = None

# Keeping just the selected stocks
sel_stocks = ["DPZ", "WST", "ODFL", "MKTX", "TYL", "AAPL", "CPRT", "MSCI", "EXR", "KR"]

for stock in data.keys():
    if stock not in sel_stocks:
        data = data.drop(stock, axis=1)

# Plot the data
data.plot()
plt.ylabel("Stock Prices")
plt.xlabel("Date")
plt.yscale("log")
plt.show()

data

In [None]:
# Define the d and q parameters to take any value between 0 and 1
q = d = range(0, 2)
# Define the p parameters to take any value between 0 and 3
p = range(0, 4)

# Generate all different combinations of p, q and q triplets
pdq = list(itertools.product(p, d, q))

# Generate all different combinations of seasonal p, q and q triplets
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]

print("Examples of parameter combinations for Seasonal ARIMA...")
print("SARIMAX: {} x {}".format(pdq[1], seasonal_pdq[1]))
print("SARIMAX: {} x {}".format(pdq[1], seasonal_pdq[2]))
print("SARIMAX: {} x {}".format(pdq[2], seasonal_pdq[3]))
print("SARIMAX: {} x {}".format(pdq[2], seasonal_pdq[4]))

In [None]:
sel_data = data["2017-12-29":"2019-06-28"]
sel_stocks = ["DPZ", "WST", "ODFL", "MKTX", "TYL", "AAPL", "CPRT", "MSCI", "EXR", "KR"]

train_dict = {"stocks": [], "train_data": [], "results": []}

for stock in sel_stocks:
    train_data = np.log(sel_data[stock]).diff()
    train_data = train_data.drop(sel_data.index[0])
    train_dict["stocks"].append(stock)
    train_dict["train_data"].append(train_data[:-1])

train_dict

In [None]:
params = [(1, 0, 0), (0, 0, 0, 12)]

for stock in train_dict["stocks"]:
    #     warnings.filterwarnings("ignore") # specify to ignore warning messages

    #     AIC = []
    #     SARIMAX_model = []
    #     for param in pdq:
    #         for param_seasonal in seasonal_pdq:
    #             try:
    #                 mod = sm.tsa.statespace.SARIMAX(train_dict['train_data'][stock_index],
    #                                                 order=param,
    #                                                 seasonal_order=param_seasonal,
    #                                                 enforce_stationarity=False,
    #                                                 enforce_invertibility=False)

    #                 results = mod.fit()

    #                 print('SARIMAX{}x{} - AIC:{}'.format(param, param_seasonal, results.aic), end='\r')
    #                 AIC.append(results.aic)
    #                 SARIMAX_model.append([param, param_seasonal])
    #             except:
    #                 continue

    #     print('The smallest AIC is {} for model SARIMAX{}x{}'.format(min(AIC), SARIMAX_model[AIC.index(min(AIC))][0],SARIMAX_model[AIC.index(min(AIC))][1]))

    # Let's fit this model
    mod = sm.tsa.statespace.SARIMAX(
        train_dict["train_data"][stock_index],
        order=params[0],
        seasonal_order=params[1],
        enforce_stationarity=False,
        enforce_invertibility=False,
    )

    result = mod.fit()

    train_dict["results"].append(result)

In [None]:
dpz_index = train_dict["stocks"].index("DPZ")
train_dict["results"][dpz_index].plot_diagnostics(figsize=(20, 14))
plt.show()

kr_index = train_dict["stocks"].index("KR")
train_dict["results"][kr_index].plot_diagnostics(figsize=(20, 14))
plt.show()

In [None]:
train_dict["pred_results"] = []

for stock in train_dict["stocks"]:
    stock_index = train_dict["stocks"].index(stock)
    result = train_dict["results"][stock_index]
    pred0 = result.get_prediction(start=train_data.index[-80], dynamic=False)
    pred0_mean = pred0.predicted_mean

    pred1 = result.get_prediction(start=train_data.index[-80], dynamic=True)
    pred1_mean = pred1.predicted_mean

    # Use get_prediction with the last index as the end parameter
    pred_result = result.get_forecast(train_data.index[-1] + pd.Timedelta(days=30))
    train_dict["pred_results"].append(pred_result)

    # Extract the predicted mean
    pred_mean = pred_result.predicted_mean
    print(np.expm1(pred_mean))
    pred_ci = pred_result.conf_int()

    ax = (
        np.log(data[stock])
        .diff()[sel_data.index[1] : sel_data.index[-1]]
        .plot(figsize=(20, 16))
    )
    pred0.predicted_mean.plot(
        ax=ax, label="1-step-ahead Forecast (get_predictions, dynamic=False)"
    )
    pred1.predicted_mean.plot(
        ax=ax, label="Dynamic Forecast (get_predictions, dynamic=True)"
    )
    pred_result.predicted_mean.plot(ax=ax, label="Dynamic Forecast (get_forecast)")
    ax.fill_between(
        pred_ci.index, pred_ci.iloc[:, 0], pred_ci.iloc[:, 1], color="k", alpha=0.1
    )
    plt.ylabel("Stock Price")
    plt.xlabel("Date")
    plt.legend()
    plt.show()

In [None]:
train_dict["returns"] = []
train_dict["real_returns"] = []
train_dict["weights"] = []

for stock in train_dict["stocks"]:
    stock_index = train_dict["stocks"].index(stock)
    pred_result = train_dict["pred_results"][stock_index].predicted_mean[2]
    final_log_return = np.float64(pred_result)
    final_return = np.expm1(final_log_return)
    real_return = data[stock].diff()[-1] / data[stock][-2]
    train_dict["returns"].append(final_return)
    train_dict["real_returns"].append(real_return)

weights_df = pd.DataFrame(
    {
        "Stock": train_dict["stocks"],
        "Predicted Returns": train_dict["returns"],
        "Real Returns": train_dict["real_returns"],
    }
)
weights_df