# Idea
Predict realised volatility for returns for 1 day ahead. 

# Articles:
["Forecasting the value-at-risk of Chinese stock market using the HARQ model and extreme value theory"](https://github.com/dcherechukin/HARQ_model_and_EVT/blob/e7ea7fc6f01d859611fccf6a746ee020fc25ad11/Forecasting%20the%20value-at-risk%20of%20Chinese%20stock%20market%20using%20the%20HARQ%20model%20and%20extreme%20value%20theory.pdf)

# Theory and formulas
[Notes]()

# Class 

In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import datetime

import warnings
warnings.filterwarnings("ignore")

In [93]:
class HAR_realised_volatility:
    
    def __init__(self, data):
        self.data = data

    def data_transformation(data):
        '''
        Create date from columns '<DATE>' and '<TIME>'
        Input: raw data in pd.DataFrame format with columns '<DATE>' and '<TIME>' in format '%Y%m%d' and '%H%M%S' 
        and ends with the full trding day information
        Output: data in DataFrame with columns 'Date' in datetime.datetime format and 'Close'
        '''

        # required functions
        def date_preprocessing(x):
            return datetime.datetime.strptime(str(x), '%Y%m%d%H%M%S')

        ####
        data['Date'] = (
            data['<DATE>'].astype(str) +
            data['<TIME>'].astype(str)).map(lambda x: date_preprocessing(x))
        data = data.drop(
            columns=[x for x in data.columns if x not in ['<CLOSE>', 'Date']])
        data = data.rename(columns={'<CLOSE>': 'Close'})
        return data

    def create_data_for_models(data):
        '''
            Create 2 DataFrames: returns and realised_volatility, which we need as data for models.
            Input: data with 'Date' and 'Clase' columns
            Output: returns - pd.DataFrame with columns: 'date' - date, 
                                                         'returns' - returns from close prices]
                    realised_volatility - pd.DataFrame with columns: 'date' - date, 
                                                                     'rv_daily' - realised volatility for this date, 
                                                                     'BPV' - for HAR_j and SHAR models,
                                                                     'RQ' -  for HARQ and HARQF models]
        '''
        returns = pd.DataFrame({
            'returns':
            data['Close'].pct_change().dropna(),
            'date':
            data['Date'][1:],
            'only_date':
            data['Date'].map(lambda x: x.date())[1:]
        })

        # required functions
        def rv_function(array):
            '''
            Calculate realised volatility per 1 day
            '''
            return np.sum(array**2) / len(array)

        def BPV(array):
            '''
            Calculate BPV from the formula: $BPV_t = \sqrt(\frac{2}{\pi})^{-2} \sum_{i = 1}^{M - 1} {|r_{t,i}| \times |r_{t,i+1}|}
            '''
            return (np.pi / 2) * np.sum(np.abs(
                array * array.shift(1))) / len(array)

        def RQ(array):
            '''
            Calculate BPV from the formula: $RQ_t = \frac{1}{3M} \sum_{i = 1}^{M} {r_{t, i}^4}$
            '''
            return np.sum(array**4) / (3 * len(array))

        ####

        realised_volatility = returns.groupby('only_date', as_index=False).agg(
            {'returns': {rv_function, BPV, RQ}})
        realised_volatility.columns = [
            ' '.join(col).strip() for col in realised_volatility.columns.values
        ]
        realised_volatility.rename(columns={
            'only_date': 'date',
            'returns rv_function': 'rv_daily',
            'returns BPV': 'BPV',
            'returns RQ': 'RQ'
        },
                                   inplace=True)
        return returns, realised_volatility

    def HAR(self, mode='fit'):
        '''
        Create pd.DataFrame for HAR model
        Input: returns - pd.DataFrame with columns: 'date' - date, 
                                                    'returns' - returns from close prices
               realised_volatility - pd.DataFrame with columns: 'date' - date, 
                                                                'rv_daily' - realised volatility for this date, 
                                                                'BPV' - for HAR_j and SHAR models,
                                                                'RQ' -  for HARQ and HARQF models
        Output: pd.DataFrame with columns: 'date' - date,
                                           'day' - realised volatility value for previous date,
                                           'week' - average realised volatility value for previous 5 dates,
                                           'month' - average realised volatility value for previous 22 dates,
                                           'rv' - realised volatility value for this date
        '''

        # required functions
        def previous(array):
            return [array[i - 1] for i in range(23, len(array))]

        def week_sum(array):
            return [sum(array[i - 6:i - 1]) / 5 for i in range(23, len(array))]

        def month_sum(array):
            return [
                sum(array[i - 23:i - 1]) / 22 for i in range(23, len(array))
            ]

        ####
        if mode == 'fit':
            HAR_data = pd.DataFrame()
            HAR_data['date'] = self.df_rv.date[23:]
            HAR_data['day'] = previous(self.df_rv.rv_daily.values)
            HAR_data['week'] = week_sum(self.df_rv.rv_daily.values)
            HAR_data['month'] = month_sum(self.df_rv.rv_daily.values)
            HAR_data['rv'] = self.df_rv.rv_daily[23:]
            return HAR_data
        else:
            data_for_prediction = pd.DataFrame({
                'date':
                self.df_rv_test.date.values[-2],
                'day':
                self.df_rv_test.rv_daily.values[-2],
                'week':
                week_sum(self.df_rv_test.rv_daily.values),
                'month':
                month_sum(self.df_rv_test.rv_daily.values)
            })
            return data_for_prediction

    def HAR_j(self, mode='fit'):
        '''
        Create pd.DataFrame for HAR_j model
        Input: returns - pd.DataFrame with columns: 'date' - date, 
                                                    'returns' - returns from close prices
               realised_volatility - pd.DataFrame with columns: 'date' - date, 
                                                                'rv_daily' - realised volatility for this date, 
                                                                'BPV' - for HAR_j and SHAR models,
                                                                'RQ' -  for HARQ and HARQF models
        Output: pd.DataFrame with columns: 'date' - date,
                                           'day' - realised volatility value for previous date,
                                           'week' - average realised volatility value for previous 5 dates,
                                           'month' - average realised volatility value for previous 22 dates,
                                           'jump' - jump value for model,
                                           'rv' - realised volatility value for this date
        '''

        # required functions
        def previous(array):
            return [array[i - 1] for i in range(23, len(array))]

        def week_sum(array):
            return [sum(array[i - 6:i - 1]) / 5 for i in range(23, len(array))]

        def month_sum(array):
            return [
                sum(array[i - 23:i - 1]) / 22 for i in range(23, len(array))
            ]

        ####
        if mode == 'fit':
            HAR_data = pd.DataFrame()
            HAR_data['date'] = self.df_rv.date[23:]
            HAR_data['day'] = previous(self.df_rv.rv_daily.values)
            HAR_data['week'] = week_sum(self.df_rv.rv_daily.values)
            HAR_data['month'] = month_sum(self.df_rv.rv_daily.values)
            HAR_data['jump'] = [
                max(0, x)
                for x in np.array(previous(self.df_rv.rv_daily.values)) -
                np.array(previous(self.df_rv.BPV.values))
            ]
            HAR_data['rv'] = self.df_rv.rv_daily[23:]
            return HAR_data
        else:
            data_for_prediction = pd.DataFrame({
                'date':
                self.df_rv_test.date.values[-2],
                'day':
                self.df_rv_test.rv_daily.values[-2],
                'week':
                week_sum(self.df_rv_test.rv_daily.values),
                'month':
                month_sum(self.df_rv_test.rv_daily.values),
                'jump': [
                    max(0, x)
                    for x in np.array(previous(self.df_rv.rv_daily.values)) -
                    np.array(previous(self.df_rv.BPV.values))
                ][-1]
            })
            return data_for_prediction

    def CHAR(self, mode='fit'):
        '''
        Create pd.DataFrame for CHAR model
        Input: returns - pd.DataFrame with columns: 'date' - date, 
                                                    'returns' - returns from close prices
               realised_volatility - pd.DataFrame with columns: 'date' - date, 
                                                                'rv_daily' - realised volatility for this date, 
                                                                'BPV' - for HAR_j and SHAR models,
                                                                'RQ' -  for HARQ and HARQF models
        Output: pd.DataFrame with columns: 'date' - date,
                                           'day' - BPV value for previous date,
                                           'week' - average BPV value for previous 5 dates,
                                           'month' - average BPV value for previous 22 dates,
                                           'rv' - realised volatility value for this date
        '''

        # required functions
        def previous(array):
            return [array[i - 1] for i in range(23, len(array))]

        def week_sum(array):
            return [sum(array[i - 6:i - 1]) / 5 for i in range(23, len(array))]

        def month_sum(array):
            return [
                sum(array[i - 23:i - 1]) / 22 for i in range(23, len(array))
            ]

        ####
        if mode == 'fit':
            HAR_data = pd.DataFrame()
            HAR_data['date'] = self.df_rv.date[23:]
            HAR_data['day'] = previous(self.df_rv.BPV.values)
            HAR_data['week'] = week_sum(self.df_rv.BPV.values)
            HAR_data['month'] = month_sum(self.df_rv.BPV.values)
            HAR_data['rv'] = self.df_rv.rv_daily[23:]

            return HAR_data
        else:
            data_for_prediction = pd.DataFrame({
                'date':
                self.df_rv_test.date.values[-2],
                'day':
                self.df_rv_test.BPV.values[-2],
                'week':
                week_sum(self.df_rv_test.rv_daily.values),
                'month':
                month_sum(self.df_rv_test.rv_daily.values)
            })
            return data_for_prediction

    def SHAR(self, mode='fit'):
        '''
        Create pd.DataFrame for SHAR model
        Input: returns - pd.DataFrame with columns: 'date' - date, 
                                                    'returns' - returns from close prices
               realised_volatility - pd.DataFrame with columns: 'date' - date, 
                                                                'rv_daily' - realised volatility for this date, 
                                                                'BPV' - for HAR_j and SHAR models,
                                                                'RQ' -  for HARQ and HARQF models
        Output: pd.DataFrame with columns: 'date' - date,
                                           'day+' - realised volitility value for positive returns for previous date,
                                           'day-' - realised volitility value for negative returns for previous date,
                                           'week' - average realised volatility value for previous 5 dates,
                                           'month' - average realised volatility value for previous 22 dates,
                                           'rv' - realised volatility value for this date
        '''

        # required functions
        def previous(array):
            return [array[i - 1] for i in range(23, len(array))]

        def week_sum(array):
            return [sum(array[i - 6:i - 1]) / 5 for i in range(23, len(array))]

        def month_sum(array):
            return [
                sum(array[i - 23:i - 1]) / 22 for i in range(23, len(array))
            ]

        def day_positive(array):
            return np.sum([x**2 for x in array if x > 0])

        def day_negative(array):
            return np.sum([x**2 for x in array if x < 0])

        ####
        if mode == 'fit':
            HAR_data = pd.DataFrame()

            HAR_data['date'] = self.df_rv.date[23:]
            HAR_data['day+'] = self.df_r.groupby('only_date').agg({
                'returns':
                day_positive
            }).returns.shift(1).values[23:]
            HAR_data['day-'] = self.df_r.groupby('only_date').agg({
                'returns':
                day_negative
            }).returns.shift(1).values[23:]
            HAR_data['week'] = week_sum(self.df_rv.rv_daily.values)
            HAR_data['month'] = month_sum(self.df_rv.rv_daily.values)
            HAR_data['rv'] = self.df_rv.rv_daily[23:]
            return HAR_data
        else:
            data_for_prediction = pd.DataFrame({
                'date':
                self.df_rv_test.date.values[-2],
                'day+':
                self.df_r_test.groupby('only_date').agg({
                    'returns': day_positive
                }).returns.shift(1).values[-1],
                'day-':
                self.df_r_test.groupby('only_date').agg({
                    'returns': day_negative
                }).returns.shift(1).values[-1],
                'week':
                week_sum(self.df_rv_test.rv_daily.values),
                'month':
                month_sum(self.df_rv_test.rv_daily.values)
            })
            return data_for_prediction

    def HARQ(self, mode='fit'):
        '''
        Create pd.DataFrame for SHAR model
        Input: returns - pd.DataFrame with columns: 'date' - date, 
                                                    'returns' - returns from close prices
               realised_volatility - pd.DataFrame with columns: 'date' - date, 
                                                                'rv_daily' - realised volatility for this date, 
                                                                'BPV' - for HAR_j and SHAR models,
                                                                'RQ' -  for HARQ and HARQF models
        Output: pd.DataFrame with columns: 'date' - date,
                                           'day' - realised volitility value for previous date,
                                           'day_RQ^1/2' - square root of RQ multiply realised volitility value for previous date,
                                           'week' - average realised volatility value for previous 5 dates,
                                           'month' - average realised volatility value for previous 22 dates,
                                           'rv' - realised volatility value for this date
        '''

        # required functions
        def previous(array):
            return [array[i - 1] for i in range(23, len(array))]

        def week_sum(array):
            return [sum(array[i - 6:i - 1]) / 5 for i in range(23, len(array))]

        def month_sum(array):
            return [
                sum(array[i - 23:i - 1]) / 22 for i in range(23, len(array))
            ]

        ####
        if mode == 'fit':
            HAR_data = pd.DataFrame()
            HAR_data['date'] = self.df_rv.date[23:]
            HAR_data['day'] = previous(self.df_rv.rv_daily.values)
            HAR_data['day_RQ^1/2'] = np.array(
                previous(self.df_rv.rv_daily.values)) * np.array(
                    previous(self.df_rv.RQ.values))**(1 / 2)
            HAR_data['week'] = week_sum(self.df_rv.rv_daily.values)
            HAR_data['month'] = month_sum(self.df_rv.rv_daily.values)
            HAR_data['rv'] = self.df_rv.rv_daily[23:]
            HAR_data.reset_index(drop=True)
            return HAR_data
        else:
            data_for_prediction = pd.DataFrame({
                'date':
                self.df_rv_test.date.values[-2],
                'day':
                self.df_rv_test.rv_daily.values[-2],
                'day_RQ^1/2':
                np.array(previous(self.df_rv_test.rv_daily.values))[-1] *
                np.array(previous(self.df_rv_test.RQ.values))[-1]**(1 / 2),
                'week':
                week_sum(self.df_rv_test.rv_daily.values),
                'month':
                month_sum(self.df_rv_test.rv_daily.values)
            })
            return data_for_prediction

    def HARQF(self, mode='fit'):
        '''
        Create pd.DataFrame for SHAR model
        Input: returns - pd.DataFrame with columns: 'date' - date, 
                                                    'returns' - returns from close prices
               realised_volatility - pd.DataFrame with columns: 'date' - date, 
                                                                'rv_daily' - realised volatility for this date, 
                                                                'BPV' - for HAR_j and SHAR models,
                                                                'RQ' -  for HARQ and HARQF models
        Output: pd.DataFrame with columns: 'date' - date,
                                           'day' - realised volitility value for previous date,
                                           'day_RQ^1/2' - square root of RQ multiply realised volitility value for previous date,
                                           'week' - average realised volatility value for previous 5 dates,
                                           'week_RQ^1/2' - square root of average week RQ multiply average realised volatility value for previous 5 dates,
                                           'month' - average realised volatility value for previous 22 dates,
                                           'month_RQ^1/2' - square root of average month RQ multiply average realised volatility value for previous 22 dates,
                                           'rv' - realised volatility value for this date
        '''

        # required functions
        def previous(array):
            return [array[i - 1] for i in range(23, len(array))]

        def week_sum(array):
            return [sum(array[i - 6:i - 1]) / 5 for i in range(23, len(array))]

        def month_sum(array):
            return [
                sum(array[i - 23:i - 1]) / 22 for i in range(23, len(array))
            ]

        ####
        if mode == 'fit':
            HAR_data = pd.DataFrame()
            HAR_data['date'] = self.df_rv.date[23:]
            HAR_data['day'] = previous(self.df_rv.rv_daily.values)
            HAR_data['day_RQ^1/2'] = np.array(
                previous(self.df_rv.rv_daily.values)) * np.array(
                    previous(self.df_rv.RQ.values))**(1 / 2)
            HAR_data['week'] = week_sum(self.df_rv.rv_daily.values)
            HAR_data['week_RQ^1/2'] = np.array(
                week_sum(self.df_rv.rv_daily.values)) * np.array(
                    week_sum(self.df_rv.RQ.values))**(1 / 2)
            HAR_data['month'] = month_sum(self.df_rv.rv_daily.values)
            HAR_data['month_RQ^1/2'] = np.array(
                month_sum(self.df_rv.rv_daily.values)) * np.array(
                    month_sum(self.df_rv.RQ.values))**(1 / 2)
            HAR_data['rv'] = self.df_rv.rv_daily[23:]
            return HAR_data
        else:
            data_for_prediction = pd.DataFrame({
                'date':
                self.df_rv_test.date.values[-2],
                'day':
                self.df_rv_test.rv_daily.values[-2],
                'day_RQ^1/2': (self.df_rv_test.rv_daily.values[-2] *
                               self.df_rv_test.RQ.values[-2])**(1 / 2),
                'week':
                week_sum(self.df_rv_test.rv_daily.values),
                'week_RQ^1/2':
                np.array(week_sum(self.df_rv_test.rv_daily.values)) *
                np.array(week_sum(self.df_rv_test.RQ.values))**(1 / 2),
                'month':
                month_sum(self.df_rv_test.rv_daily.values),
                'month_RQ^1/2':
                np.array(month_sum(self.df_rv_test.rv_daily.values)) *
                np.array(month_sum(self.df_rv_test.RQ.values))**(1 / 2),
            })
            return data_for_prediction

    def OLS_model(self, df):
        '''
        Return trained OLS model based on specific data
        Input: DataFrame with features and target columns for OLS model
        Output: OLS model based on on specific data 
        '''
        features = [x for x in df.columns if (x != 'rv') & (x != 'date')]
        target = ['rv']
        self.features = features
        reg = sm.OLS(df[target], df[features]).fit()
        return reg

    def models_compairing(self):
        '''
        Compare different models efficiency by adj R^2 value
        Output: model with the highiest adj R^2 value on data
    
        '''
        r_squares = []
        best_model = []
        bes_function = []
        r_square_best = 0
        for function in [
                HAR_realised_volatility.HAR, HAR_realised_volatility.HAR_j,
                HAR_realised_volatility.CHAR, HAR_realised_volatility.SHAR,
                HAR_realised_volatility.HARQ, HAR_realised_volatility.HARQF
        ]:
            df = function(self)
            features = [x for x in df.columns if (x != 'rv') & (x != 'date')]
            target = ['rv']
            model = sm.OLS(df[target], df[features]).fit()
            if r_square_best < model.rsquared_adj:
                r_square_best = model.rsquared_adj
                best_model = model
                best_function = function
                self.model_df = df
                self.features = features
            r_squares.append(round(model.rsquared_adj, 4))
        comparsion_of_models = pd.DataFrame({'adj R^2': r_squares})
        comparsion_of_models.index = [
            'HAR', 'HAR_j', 'CHAR', 'SHAR', 'HARQ', 'HARQF'
        ]
        display(comparsion_of_models.sort_values(by=['adj R^2']))
        self.function = best_function
        return best_model

    def fit(self, model_type='best'):
        '''
        Fit specified model type. 
        Input: model type - HAR, HAR_j, CHAR, SHAR, HARQ, HARQF. If not specified - choose model with the best adj R^2
        Output: print adj R^2 and return model
        '''
        self.transformed_data = HAR_realised_volatility.data_transformation(
            data)
        self.df_r, self.df_rv = HAR_realised_volatility.create_data_for_models(
            self.transformed_data)
        if model_type == 'best':
            self.model = HAR_realised_volatility.models_compairing(self)
            model_df = self.model_df
        elif model_type == 'HAR':
            model_df = HAR_realised_volatility.HAR(self)
            self.function = HAR_realised_volatility.HAR
        elif model_type == 'HAR_j':
            model_df = HAR_realised_volatility.HAR_j(self)
            self.function = HAR_realised_volatility.HAR_j
        elif model_type == 'SHAR':
            model_df = HAR_realised_volatility.SHAR(self)
            self.function = HAR_realised_volatility.SHAR
        elif model_type == 'CHAR':
            model_df = HAR_realised_volatility.CHAR(self)
            self.function = HAR_realised_volatility.CHAR
        elif model_type == 'HARQ':
            model_df = HAR_realised_volatility.HARQ(self)
            self.function = HAR_realised_volatility.HARQ
        elif model_type == 'HARQF':
            model_df = HAR_realised_volatility.HARQF(self)
            self.function = HAR_realised_volatility.HARQF
        self.model = HAR_realised_volatility.OLS_model(self, model_df)
        print('Adj R^2 = ', round(self.model.rsquared_adj, 4))

    def predict_1_day(self):
        '''
        Predict realised volatility for returns for 1 day after last day in data. 
        Input: need .fit before using first time
        Output: value of realised volatility 
        '''
        self.df_rv_test = self.df_rv[-23:].append(pd.Series(),
                                                  ignore_index=True)
        self.df_r_test = self.df_r.loc[
            self.df_r.only_date >= min(self.df_rv[-23:].date.values)].append(
                pd.Series(), ignore_index=True)
        self.df_rv_test = self.df_rv_test.reset_index()
        self.df_r_test = self.df_r_test.reset_index()
        data_for_prediction = self.function(self, 'test')
        result = self.model.predict(
            data_for_prediction[self.features]).values[0]
        return result

    def update(self, new_data):
        '''
        Updating input data with new raw data
        Input: new raw data which starts where old data ends from the new trading day and ends with the full trading day information
        '''
        self.data = self.data.append([new_data], ignore_index=True)

# Example

In [90]:
# Download data from finam.ru
first = pd.read_csv('INDEX.CSI300_120101_141231.csv', delimiter = ",")
second = pd.read_csv('INDEX.CSI300_150101_171231.csv', delimiter = ",")
third = pd.read_csv('INDEX.CSI300_180101_201231.csv', delimiter = ",")
fourth = pd.read_csv('INDEX.CSI300_210101_221128.csv', delimiter = ",")

data = pd.DataFrame()
data = data.append([first, second, third, fourth], ignore_index = True)
# Data should end with the end of the day, so data, where last not the last transaction in day - prohabited
data = data.loc[data['<DATE>'] < 20221128]
display(data)

del first
del second
del third
del fourth

Unnamed: 0,<TICKER>,<PER>,<DATE>,<TIME>,<OPEN>,<HIGH>,<LOW>,<CLOSE>,<VOL>
0,INDEX.CSI300,5,20120104,54500,2361.4990,2361.4990,2361.4990,2361.4990,245403
1,INDEX.CSI300,5,20120104,55000,2363.8790,2364.2060,2355.8910,2355.8910,2013863
2,INDEX.CSI300,5,20120104,55500,2355.7500,2358.2700,2355.3040,2358.0250,1148331
3,INDEX.CSI300,5,20120104,60000,2358.4710,2365.8240,2358.2800,2363.9270,1411218
4,INDEX.CSI300,5,20120104,60500,2364.4720,2365.9150,2361.3150,2361.3150,1345723
...,...,...,...,...,...,...,...,...,...
155765,INDEX.CSI300,5,20221125,105000,3775.7764,3775.7764,3775.7764,3775.7764,53
155766,INDEX.CSI300,5,20221125,105500,3775.7764,3775.7764,3775.7764,3775.7764,55
155767,INDEX.CSI300,5,20221125,110000,3775.7764,3775.7764,3775.7764,3775.7764,53
155768,INDEX.CSI300,5,20221125,110500,3775.7764,3775.7764,3775.7764,3775.7764,55


In [94]:
# Input data in class
CSI300 = HAR_realised_volatility(data)

In [102]:
# Fit data with model we choose - 'HAR'
CSI300.fit('HAR')
print('Realised volatility of returns for the next day: ', round(CSI300.predict_1_day(), 8))

Adj R^2 =  0.449
Realised volatility of returns for the next day:  1.37e-06


In [103]:
# Fit data with model we choose - 'HAR_j'
CSI300.fit('HAR_j')
print('Realised volatility of returns for the next day: ', round(CSI300.predict_1_day(), 8))

Adj R^2 =  0.4685
Realised volatility of returns for the next day:  1.28e-06


In [104]:
# Fit data with model we choose - 'SHAR'
CSI300.fit('SHAR')
print('Realised volatility of returns for the next day: ', round(CSI300.predict_1_day(), 8))

Adj R^2 =  0.4544
Realised volatility of returns for the next day:  1.38e-06


In [105]:
# Fit data with model we choose - 'CHAR'
CSI300.fit('CHAR')
print('Realised volatility of returns for the next day: ', round(CSI300.predict_1_day(), 8))

Adj R^2 =  0.4846
Realised volatility of returns for the next day:  1.78e-06


In [106]:
# Fit data with model we choose - 'HARQ'
CSI300.fit('HARQ')
print('Realised volatility of returns for the next day: ', round(CSI300.predict_1_day(), 8))

Adj R^2 =  0.4817
Realised volatility of returns for the next day:  1.28e-06


In [107]:
# Fit data with model we choose - 'HARQF'
CSI300.fit('HARQF')
print('Realised volatility of returns for the next day: ', round(CSI300.predict_1_day(), 8))

Adj R^2 =  0.4955
Realised volatility of returns for the next day:  7.2e-07


In [108]:
# Fit data using model with the best adj. R^2
CSI300.fit()
print('Realised volatility of returns for the next day: ', round(CSI300.predict_1_day(), 8))

Unnamed: 0,adj R^2
HAR,0.449
SHAR,0.4544
HAR_j,0.4685
HARQ,0.4817
CHAR,0.4846
HARQF,0.4955


Adj R^2 =  0.4955
Realised volatility of returns for the next day:  7.2e-07
