In [39]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller
from pmdarima import auto_arima
A = pd.read_csv("../data/processed/A_returns.csv")

1. Check for Stationary

In [40]:
A['Date'] = pd.to_datetime(A['Date'])
A.set_index('Date', inplace = True)

In [41]:
returns = A['A'].dropna()
result = adfuller(returns)
print(f"ADF Statistic: {result[0]}")
print(f"p-value: {result[1]}")

ADF Statistic: -19.115000184909754
p-value: 0.0


P-value is 0.0 < 0.5 which mean the timeseries is stationary

In [48]:
train_size = int(len(returns) * 0.8)
train, test = returns[:train_size], returns[train_size:]

In [70]:
stepwise_fit = auto_arima(train)
print(stepwise_fit.order)

(2, 0, 0)


In [65]:
history = list(train)
predictions = []

In [81]:
import warnings
from statsmodels.tools.sm_exceptions import ConvergenceWarning
history = list(train)
predictions = []
convergence_warnings = 0

for t in range(len(test)):
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always", category=ConvergenceWarning)

        try:
            model = ARIMA(history, order=stepwise_fit.order)
            model_fit = model.fit(method_kwargs={'maxiter': 1000})
            yhat = model_fit.forecast()[0]
        except Exception as e:
            print(f"Error at step {t}: {e}")
            yhat = np.nan

        # Check for convergence warning
        for warn in w:
            if issubclass(warn.category, ConvergenceWarning):
                convergence_warnings += 1

    predictions.append(yhat)
    history.append(test.iloc[t])  # add true value to history

# Convert predictions to Series with the same index as test
pred_series = pd.Series(predictions, index=test.index)

# Evaluation
from sklearn.metrics import mean_absolute_error, mean_squared_error
mae = mean_absolute_error(test, pred_series)
rmse = np.sqrt(mean_squared_error(test, pred_series))

print(f"MAE: {mae:.4f}")
print(f"RMSE: {rmse:.4f}")

MAE: 0.0121
RMSE: 0.0166


MAE: 0.0121 (1.21%) - This means on error rate is 1.21%

RMSE: 0.0166

In [89]:
np.average(abs(returns))

0.012008138859187025

In [88]:
returns

Date
2015-01-05   -0.018738
2015-01-06   -0.015578
2015-01-07    0.013272
2015-01-08    0.029975
2015-01-09   -0.007337
                ...   
2024-12-24    0.011144
2024-12-26   -0.001984
2024-12-27   -0.002135
2024-12-30   -0.008263
2024-12-31    0.001252
Name: A, Length: 2515, dtype: float64

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 5))
plt.plot(test, label='Actual')
plt.plot(pred_series, label='Forecast', color='red')
plt.legend()
plt.title('Walk-Forward Forecasting with ARIMA')
plt.show()
