In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt


In [2]:
# === Load Model Predictions ===
# Load GARCH & Heston predictions
df_garch_heston = pd.read_csv("volatility_predictions.csv", index_col=0, parse_dates=True)


In [3]:
# Load LSTM predictions
df_lstm_pred = pd.read_csv("lstm_volatility_predictions.csv", index_col=0, parse_dates=True)


In [4]:
# Merge all data
df_compare = df_garch_heston.copy()
df_compare["LSTM_Volatility"] = df_lstm_pred["Predicted Volatility (LSTM)"]


In [5]:
# === Compute Model Performance Metrics ===
def calculate_rmse(actual, predicted, model_name):
    mse = mean_squared_error(actual, predicted)
    rmse = np.sqrt(mse)
    print(f"{model_name} RMSE: {rmse:.6f}")
    return rmse

In [6]:
# Keep only data from 2022-10-27 onward
df_garch_heston = df_garch_heston[df_garch_heston.index >= "2022-10-27"]

# Verify that both datasets have the same start date now
print("Updated GARCH/Heston Index Range:", df_garch_heston.index.min(), "to", df_garch_heston.index.max())
print("LSTM Predictions Index Range:", df_lstm_pred.index.min(), "to", df_lstm_pred.index.max())


Updated GARCH/Heston Index Range: 2022-10-27 00:00:00 to 2024-10-31 00:00:00
LSTM Predictions Index Range: 2022-11-14 00:00:00 to 2024-10-31 00:00:00


In [13]:
df_compare["LSTM_Volatility"].fillna(method="bfill", inplace=True)
df_compare["Actual Volatility"].fillna(method="bfill", inplace=True)


In [14]:
# Merge datasets correctly
df_compare = df_garch_heston.join(df_lstm_pred, how="left")

# Rename column for consistency
df_compare.rename(columns={"Predicted Volatility (LSTM)": "LSTM_Volatility"}, inplace=True)

# Check if NaNs are gone
print(df_compare.isna().sum())


Close                  0
Net                    0
%Chg                   0
Open                   0
Low                    0
High                   0
Volume                 0
Turnover - USD         0
Log_Returns            0
Rolling_Volatility     0
Scaled_Log_Returns     0
GARCH_Volatility       0
Heston_Volatility      0
LSTM_Volatility       12
Actual Volatility     12
dtype: int64


In [15]:
print(df_compare.isna().sum())  # Check for NaNs in all columns


Close                  0
Net                    0
%Chg                   0
Open                   0
Low                    0
High                   0
Volume                 0
Turnover - USD         0
Log_Returns            0
Rolling_Volatility     0
Scaled_Log_Returns     0
GARCH_Volatility       0
Heston_Volatility      0
LSTM_Volatility       12
Actual Volatility     12
dtype: int64


In [16]:
# Calculate RMSE for each model
rmse_garch = calculate_rmse(df_compare["Rolling_Volatility"], df_compare["GARCH_Volatility"], "GARCH")
rmse_heston = calculate_rmse(df_compare["Rolling_Volatility"], df_compare["Heston_Volatility"], "Heston")
rmse_lstm = calculate_rmse(df_compare["Rolling_Volatility"], df_compare["LSTM_Volatility"], "LSTM")


GARCH RMSE: 0.041376
Heston RMSE: 2.160322


ValueError: Input contains NaN.

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(df_compare.index, df_compare["Rolling_Volatility"], label="Actual Volatility", color="black")
plt.plot(df_compare.index, df_compare["GARCH_Volatility"], label="GARCH Forecast", linestyle="dashed", color="blue")
plt.plot(df_compare.index, df_compare["Heston_Volatility"], label="Heston Forecast", linestyle="dotted", color="green")
plt.plot(df_compare.index, df_compare["LSTM_Volatility"], label="LSTM Forecast", linestyle="dashdot", color="red")
plt.title("Volatility Forecasting: GARCH vs. Heston vs. LSTM")
plt.legend()
plt.show()

In [None]:
df_compare.to_csv("final_volatility_comparison.csv")
print("Final volatility comparison results saved!")
