# Rolling Forecast


# 1. Setup and Data Download

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
import warnings

# Suppress warnings for cleaner output
warnings.filterwarnings("ignore")

# 1. Download Data
# We use 'CL=F' (Crude Oil Futures)
print("Downloading WTI Crude Oil data...")
df = yf.download('CL=F', start='2020-01-01', end='2024-01-01', progress=False)
df = df[['Close']].dropna()

# 2. Split into Train (Historical Context) and Test (Simulation Period)
# We will simulate trading over the last 100 days
test_days = 100
train = df.iloc[:-test_days]
test = df.iloc[-test_days:]

print(f"Training Data: {len(train)} days")
print(f"Test Data (Simulation): {len(test)} days")

# 2. The Rolling Forecast Loop

This loop simulates the real world: every day, you get a new closing price, update your history, and forecast the next day.

Note: To save time, we will use a fixed order of $(1,1,1)$. If you ran Auto-ARIMA and found a better order (e.g., 2,1,2), replace order=(1,1,1) below.

In [None]:
history = [x for x in train['Close']]
predictions = []
naive_forecasts = [] # To benchmark against Random Walk

print("\n--- Starting Rolling Forecast (this may take a minute) ---")

for t in range(len(test)):
    # 1. Define and fit the model on the CURRENT history
    # (In a real production system, you might only re-fit parameters every week, 
    # but here we re-fit daily for maximum accuracy)
    model = ARIMA(history, order=(1,1,1)) 
    model_fit = model.fit()
    
    # 2. Forecast the next step (t+1)
    # get_forecast gives us the predicted mean and intervals
    output = model_fit.get_forecast(steps=1)
    yhat = output.predicted_mean.iloc[0]
    predictions.append(yhat)
    
    # 3. Create a Naive Forecast (Random Walk) for comparison
    # The "prediction" for tomorrow is simply today's price (the last item in history)
    naive_forecast = history[-1]
    naive_forecasts.append(naive_forecast)
    
    # 4. Update history with the ACTUAL observed price from the test set
    obs = test.iloc[t]['Close']
    history.append(obs)
    
    # Optional: Print progress every 10 days
    if (t+1) % 10 == 0:
        print(f"Day {t+1}/{len(test)}: Predicted={yhat:.2f}, Actual={obs:.2f}")

print("Forecast loop complete.")

# 3. Visual Evaluation

We will visualize three lines:

1. Actual Price: The ground truth.

2. ARIMA Forecast: Your model.

3. Naive Forecast: The Random Walk (just shifting the chart by 1 day).

In [None]:
# Create a DataFrame for easy plotting
results_df = test.copy()
results_df['ARIMA_Pred'] = predictions
results_df['Naive_Pred'] = naive_forecasts

plt.figure(figsize=(14,7))
plt.plot(results_df.index, results_df['Close'], label='Actual Price', color='black', linewidth=1.5)
plt.plot(results_df.index, results_df['ARIMA_Pred'], label='ARIMA Model', color='red', linestyle='--', alpha=0.8)
plt.plot(results_df.index, results_df['Naive_Pred'], label='Naive (Random Walk)', color='green', linestyle=':', alpha=0.6)

plt.title(f"WTI Crude Oil: ARIMA vs Random Walk ({test_days} Days)")
plt.ylabel("Price (USD)")
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()