In [None]:
from AlgorithmImports import *
from QuantConnect.DataSource import *
from QuantConnect.Research import QuantBook
from datetime import datetime
from datetime import timedelta
import pandas as pd 
from sklearn.svm import SVR
from scipy.stats import uniform as sp_rand
from sklearn.model_selection import RandomizedSearchCV
import matplotlib.pyplot as plt
import numpy as np 
from statsmodels.graphics.tsaplots import plot_acf
from arch import arch_model
import pandas as pd 
import scipy.optimize as spop
from sklearn.metrics import mean_squared_error
from dateutil.relativedelta import relativedelta
from scipy.stats import uniform as sp_rand
from dateutil.relativedelta import relativedelta
import warnings
warnings.filterwarnings('ignore')


In [None]:
qb = QuantBook()

spx = qb.AddIndex('SPX').Symbol ##Defines SPX index symbol object

start_time = datetime(2005, 1, 1) ##Start date for SPX symbol object data
end_time = datetime.now() ##End date for SPX symbol object data (current date)

single_history_df = qb.History(spx, start_time, end_time) 
subset_history_df = qb.History([spx], start_time, end_time)

all_history_df = qb.History(qb.Securities.Keys, start_time, end_time) ##Gets trade data, including closing price data, for SPX Index



In [None]:

all_history_df.reset_index(inplace=True) 

all_history_df['time'] = pd.to_datetime(all_history_df['time']).dt.date ##Converts string time stamp data to datetime 

all_history_df = all_history_df.drop_duplicates(subset = "time", keep = "last") ##Gets only the last quoted closing price data for SPX Index 


##Bellow discards all columns of all_history_df besides SPX Closing price data 

all_history_df = all_history_df.drop(['high', 'low', 'open'], axis = 1)

all_history_df.set_index('time', inplace = True)

all_history_df.drop(['symbol'], axis = 1) 


all_history_df.index = pd.to_datetime(all_history_df.index)

all_history_df = all_history_df.resample('M').last()  ##Resamples daily return data to be monthly return data instead 

new = all_history_df['close'] = pd.to_numeric(all_history_df['close'], errors='coerce') ##Creates another dataframe, new, that will be used to train SVR-GARCH model


new 

In [None]:
ret = 100 * (new.pct_change()[1:]) ##Calculates monthly returns for SPX Index 

realized_vol = ret.rolling(5).std() ##Calculate realized volatility (rolling)

realized_vol = pd.DataFrame(realized_vol) ##Creates dataframe with realized volatility values to be used by SVR-GARCH model 

realized_vol.reset_index(drop=True, inplace=True)
returns_svm = ret ** 2 ##Squares  monthly returns to be used in SVR-GARCH model
returns_svm = returns_svm.reset_index()
del returns_svm['time'] 

ret 

In [None]:
X = pd.concat([realized_vol, returns_svm], axis=1, ignore_index=True)
X 
X = X.reset_index() 
X.drop('index', axis=1, inplace=True) 


realized_vol = realized_vol.dropna().reset_index()
realized_vol.drop('index', axis=1, inplace=True)

svr_lin = SVR(kernel='linear') #Initializes SVR-GARCH Linear "Kernel Trick" for better model fitting 

n = 30 #Initializes a train-test split, with the testing set being the last 30 months of the time period of interest (80/20 train/test split)

X_clean = X.dropna() ##Creates a X_clean data frame of realized volatility and monthly return values, but drops NA and null values


In [None]:
para_grid = {'gamma': sp_rand(),
'C': sp_rand(),
'epsilon': sp_rand()} ##Initializes parametric grid 

clf = RandomizedSearchCV(svr_lin, para_grid) ##Utilizes randomized search as part of SVR-GARCH 

clf.fit(X_clean.iloc[:-n].values,
realized_vol.iloc[1:-(n-1)].values.reshape(-1,)) ##Fits SVR-GARCH model to the data on SPX monthly returns and volatility wihin te train set 

predict_svr_lin = clf.predict(X_clean.iloc[-n:]) ##Predicts volatility using fitted SVR-GARCH on test set 

predict_svr_lin = pd.DataFrame(predict_svr_lin) ## Puts predicted volatility using fitted SVR-GARCH on test set into a dataframe 

predict_svr_lin.index = ret.iloc[-n:].index


rmse_svr = np.sqrt(mean_squared_error(realized_vol.iloc[-n:] / 100,
predict_svr_lin / 100)) ##Scales the predicted SVR-GACH volatility down by a factor of 100, consistent with Machine Learning for Financial Risk Management with Python by Abdullah Karasan

rmse_svr ##RMSE was calculated to be 0.003844866195521824 on the testing data 

In [None]:
##Bellow function turns SVR-GARCH predicted volatility model into a function to be used within a function to predict cumilative volatility from the present day to some date in the future
def predict_volatility_to_date(end_date_str, new):
    # Convert end_date to datetime if it's a string
    if isinstance(end_date_str, str):
        end_date = datetime.strptime(end_date_str, '%Y-%m-%d')
    else:
        end_date = end_date_str

    # Ensure current_date is a datetime object
    current_date = datetime.now()

    # Calculate the number of months to predict
    year_diff = end_date.year - current_date.year
    month_diff = end_date.month - current_date.month
    months_to_predict = year_diff * 12 + month_diff

    #Data Preparation; see above 
    ret = 100 * (new.pct_change()[1:])
    realized_vol = ret.rolling(5).std()
    realized_vol = realized_vol.dropna().reset_index(drop=True)
    
    X_clean = realized_vol.dropna()

    #Train SVR-GARCH Model
    svr_lin = SVR(kernel='linear')
    para_grid = {'gamma': sp_rand(), 'C': sp_rand(), 'epsilon': sp_rand()}
    clf = RandomizedSearchCV(svr_lin, para_grid)
    clf.fit(X_clean.values.reshape(-1, 1), realized_vol.values.reshape(-1,))

    #Initialize a DataFrame to store predictions
    predictions = pd.DataFrame()

    #Initialize the rolling window with the last 12 months of historical data
    rolling_window = X_clean[-12:]

    #Loop through each month and prepare data for prediction
    for i in range(months_to_predict):
        # Generate feature from the current rolling window
        feature_value = rolling_window.mean()  # Example using mean; adjust as needed

        #Predict the volatility for this month
        predicted_vol = clf.predict([[feature_value]])

        #Store the prediction
        iter_date = current_date + relativedelta(months=i)
        predictions = predictions.append({'Date': iter_date, 'PredictedVol': predicted_vol[0]}, ignore_index=True)

        #Update the rolling window
        if len(rolling_window) >= 12:
            rolling_window = rolling_window[1:]  # Drop the oldest month
        rolling_window = rolling_window.append(pd.Series(predicted_vol[0]))
        

    return predictions

In [None]:
def predict_cumulative_volatility(end_date_str, new):
    # Convert end_date to datetime if it's a string
    if isinstance(end_date_str, str):
        end_date = datetime.strptime(end_date_str, '%Y-%m-%d')
    else:
        end_date = end_date_str

    # Ensure current_date is a datetime object
    current_date = datetime.now()

    # Calculate the number of months to predict
    year_diff = end_date.year - current_date.year
    month_diff = end_date.month - current_date.month
    months_to_predict = year_diff * 12 + month_diff + (1 if end_date.day > current_date.day else 0)

    # Data Preparation and Feature Engineering
    ret = 100 * (new.pct_change()[1:])
    realized_vol = ret.rolling(5).std()
    realized_vol = realized_vol.dropna().reset_index(drop=True)
    
    X_clean = realized_vol.dropna()

    # Train SVR Model
    svr_lin = SVR(kernel='linear')
    para_grid = {'gamma': sp_rand(), 'C': sp_rand(), 'epsilon': sp_rand()}
    clf = RandomizedSearchCV(svr_lin, para_grid)
    clf.fit(X_clean.values.reshape(-1, 1), realized_vol.values.reshape(-1,))

    # Initialize cumulative volatility
    cumulative_volatility = 0

    # Initialize the rolling window with the last 12 months of historical data
    rolling_window = X_clean[-6:]

    # Loop through each month and prepare data for prediction
    for i in range(months_to_predict):
        # Generate feature from the current rolling window
        feature_value = rolling_window.mean()  # Example using mean; adjust as needed

        # Predict the volatility for this month
        predicted_vol = clf.predict([[feature_value]])

        # Update the rolling window
        if len(rolling_window) >= 12:
            rolling_window = rolling_window[1:]  #Drop the oldest month
        rolling_window = rolling_window.append(pd.Series(predicted_vol[0]))

        #Calculate the number of days to prorate in the first and last month
        days_in_month = (current_date + relativedelta(months=i+1, days=-1)).day
        start_day = current_date.day if i == 0 else 1
        end_day = end_date.day if i == months_to_predict - 1 else days_in_month
        days_to_prorate = end_day - start_day + 1

        # Prorate the volatility for partial months
        prorated_vol = (predicted_vol[0] / days_in_month) * days_to_prorate
        cumulative_volatility += prorated_vol

    return cumulative_volatility


In [None]:

cumil_vol = predict_cumulative_volatility('2024-3-3', new) ##Example predicted SVR-GARCH cumilative volatility 

cumil_vol / 100 ##Scales the predicted cumilative SVR-GACH volatility down by a factor of 100, consistent with Machine Learning for Financial Risk Management with Python by Abdullah Karasan