Setup 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller
import joblib
import os 
import sklearn



df = pd.read_parquet("../data/processed/model_ready.parquet")
aapl = df[df['ticker'] == 'AAPL'][['close']]  

# Plot to verify
aapl.plot(title='AAPL Closing Price', figsize=(12, 5))
plt.show()

Check Stationarity

In [None]:
# Augmented Dickey-Fuller test
result = adfuller(aapl['close'])
print('ADF p-value:', result[1])  # If >0.05, difference the data

aapl.plot(title='AAPL Stock Price', figsize=(12, 5))
plt.show()

If p-value > 0.05: Data is non-stationary → difference it:

In [None]:
aapl_diff = aapl.diff().dropna()  # 1st-order differencing
aapl_diff.plot(title='Differenced AAPL Price')

Train ARIMA Model

Starting with simple parameters (order=(5,1,0)):

In [None]:
# Train a simpler ARIMA(1,1,0) - fixes insignificant terms
simple_model = ARIMA(aapl['close'], order=(1,1,0)).fit()
print(simple_model.summary())

# Save the better model
joblib.dump(simple_model, "../src/models/arima_aapl.pkl") 
print("Simplified ARIMA(1,1,0) model saved!")

 Validate Predictions

In [None]:
# Split data (80% train, 20% test)
train = aapl.iloc[:int(0.8*len(aapl))]
test = aapl.iloc[int(0.8*len(aapl)):]

# Train SIMPLER model (ARIMA(1,1,0))
model = ARIMA(train['close'], order=(1,1,0)).fit()  # Changed from (5,1,0) to (1,1,0)
forecast = model.forecast(steps=len(test))

# Plot predictions vs actual
plt.figure(figsize=(12, 5))
plt.plot(train.index, train['close'], label='Train', linewidth=1)
plt.plot(test.index, test['close'], label='Actual', linewidth=1)
plt.plot(test.index, forecast, label='ARIMA(1,1,0) Predicted', color='red', linestyle='--')
plt.legend()
plt.title('AAPL Price: Actual vs Simplified ARIMA Predictions')
plt.grid(True)
plt.show()

import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Ensure forecast is a NumPy array
forecast = np.array(forecast)

# Compute errors
mae = mean_absolute_error(test['close'], forecast)
rmse = mean_squared_error(test['close'], forecast) ** 0.5  # Alternative method

print(f"\nModel Performance:")
print(f"- MAE (Average Error): ${mae:.2f}")
print(f"- RMSE (Big Errors Penalized): ${rmse:.2f}")


# Save the better model
import joblib
joblib.dump(model, "../src/models/arima_aapl.pkl")
print("Simplified ARIMA(1,1,0) model saved!")

In [None]:
os.makedirs("../src/models", exist_ok=True)
joblib.dump(simple_model, "../src/models/arima_aapl.pkl")
print("Model saved to ../src/models/arima_aapl.pkl")