In [None]:
# GARCH(1,1) Model for Benchmark 
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from arch import arch_model
from sklearn.metrics import mean_absolute_error, mean_squared_error

os.chdir("C:/Programming/Stock prediction") #set your own working directory

# load & preprocess data function
def load_stock_data(filename="AAPL.csv"):
    df = pd.read_csv(filename)
    df['date'] = pd.to_datetime(df['date'])
    df['log_return'] = np.log(df['close'] / df['close'].shift(1))
    df['volatility_5d'] = np.log(df['close'] / df['close'].shift(1)).rolling(window=5).std()
    df['volatility_21d'] = np.log(df['close'] / df['close'].shift(1)).rolling(window=21).std()
    df = df.dropna().reset_index(drop=True)
    return df

def plot_returns_and_volatility(df):
    plt.figure(figsize=(12, 6))
    plt.plot(df['date'], df['log_return'], label="Daily Log Return", color='skyblue', alpha=0.7)
    plt.title("Apple Daily Log Returns")
    plt.xlabel("Date")
    plt.ylabel("Log Return")
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()

    plt.figure(figsize=(12, 6))
    plt.plot(df['date'], df['volatility_5d'], label="5-Day Volatility", color='blue', alpha=0.8)
    plt.plot(df['date'], df['volatility_21d'], label="21-Day Volatility",color='orange', alpha=0.8)
    plt.title("Realized Volatility (5-day vs 21-day)")
    plt.xlabel("Date")
    plt.ylabel("Volatility")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

df = load_stock_data("AAPL.csv")
plot_returns_and_volatility(df)



In [None]:
# Model fit and Forecast
# prepare data 
g_window = int(len(df) * 0.6)  
forecast_horizon = 5
start_train = int(len(df) * 0.2)
out_of_sample_size =int(len(df) * 0.2)

garch_vol_forecast = []
actual_vol_test = []
dates_g = []

# sliding window forecast 
for t_start in range(start_train, start_train + out_of_sample_size + 1 - forecast_horizon):
    t_end = t_start + g_window
    garch_train = df['log_return'].iloc[t_start:t_end] * 100
    garch_test = df['log_return'].iloc[t_end:t_end + forecast_horizon] * 100
    
    garch_model = arch_model(garch_train, vol='GARCH', p=1, q=1)
    garch_result = garch_model.fit(disp='off')
    omega = garch_result.params['omega']
    alpha = garch_result.params['alpha[1]']
    beta = garch_result.params['beta[1]']
    
    last_variance = garch_result.conditional_volatility.iloc[-1] ** 2
    mean_return = garch_train.mean()
    garch_vols = []
    for i in range(forecast_horizon):
        if i == 0:
            last_return = garch_train.iloc[-1]
        else:
            last_return = mean_return 
        next_variance = omega + alpha * last_return ** 2 + beta * last_variance
        garch_vols.append(np.sqrt(next_variance))
        last_variance = next_variance
    
    garch_vol_forecast.append(np.mean(garch_vols))
    actual_vol_test.append(np.std(garch_test))
    dates_g.append(df['date'].iloc[t_end])

garch_vol_forecast_log = np.array(garch_vol_forecast) / 100
actual_vol_test_log = np.array(actual_vol_test) / 100

plt.figure()
plt.plot(dates_g, actual_vol_test_log, label='Actual Volatility', color='black')
plt.plot(dates_g, garch_vol_forecast_log, label='GARCH Forecast', color='blue')
plt.title('GARCH(1,1) Forecast vs Actual Volatility (Test Set)')
plt.xlabel('Date')
plt.ylabel('Volatility')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

mae = mean_absolute_error(actual_vol_test_log, garch_vol_forecast_log)
mse = mean_squared_error(actual_vol_test_log, garch_vol_forecast_log)
rmse = np.sqrt(mse)
print(f"GARCH Test - MAE: {mae:.6f} | MSE: {mse:.6f} | RMSE: {rmse:.6f}")

In [None]:
metrics_text = f"GARCH Test - MAE: {mae:.6f} | MSE: {mse:.6f} | RMSE: {rmse:.6f}"
file_path = "garch_metrics_test.txt"
with open(file_path, "w") as f:
    f.write(metrics_text)