In [381]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.tsa as tsa
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from sklearn.metrics import mean_squared_error

FX_PRL = pd.read_csv('FX_PRL.csv',usecols=range(1,10), index_col='Time')
FX_PRL.index = pd.to_datetime(FX_PRL.index)
FX_PRL['log_mid_price'] = 0.5*(np.log(FX_PRL.BID_PRICE) + np.log(FX_PRL.ASK_PRICE))
FX_PRL['log_2sRt'] = FX_PRL.log_mid_price - FX_PRL.log_mid_price.shift(2)

In [338]:
def calc_z(data,sample_interval='2s'):
    n = data.resample('10min',closed='right').count().BID_SIZE.values
    K = data.resample(sample_interval,closed='right').last().resample('10min',closed='left').count()['BID_SIZE'].values
    return (n-K+1)/K, K, n

def var_All(data):
    def assist_func(input_data):
        input_data_forward1 = input_data.shift(-1)
        res = np.sum(np.power((input_data_forward1 - input_data).dropna().values, 2))
        return res
    res = data.log_2sRt.resample('10min',closed='right').apply(assist_func).values
    return res

def var_T(data,K,sample_interval='2s'):
    def assist_func(input_data):
        input_data_forward1 = input_data.shift(-1)
        res = np.sum(np.power((input_data_forward1 - input_data).dropna().values, 2))
        return res
    res = np.divide(data.log_2sRt.resample('2s',closed='right').last().resample('10min',closed='left').apply(assist_func).values,K)
    return res

def TSRV(XXKT, XXAllT, z, n):
    return np.divide(XXKT - np.divide(z,n)*XXAllT, 1 - np.divide(z,n) )

def return_TSRV(symbol):
    data = FX_PRL[FX_PRL.Symbol==symbol]
    z, K, n = calc_z(data)
    XXKT = var_T(data,K,sample_interval='2s')
    XXAllT = var_All(data)
    return TSRV(XXKT, XXAllT, z, n)

def calc_RV30min(data):
    return np.array(list(map(lambda i: data[i-3:i].sum(), list(range(3,len(data)+1)))))/3

def calc_RV1hr(data):
    return np.array(list(map(lambda i: data[i-6:i].sum(), list(range(6,len(data)+1)))))/6

def construct_dataset_forRegression(RV10min, RV30min, RV1hr):
    return sm.add_constant(np.matrix([RV10min[5:-1], RV30min[3:-1], RV1hr[:-1]]).T)

In [334]:
RV10min_EURJPY = np.sqrt(return_TSRV('EBS_BOOK::EUR/JPY'))
RV10min_EURUSD = np.sqrt(return_TSRV('EBS_BOOK::EUR/USD'))
RV10min_USDJPY = np.sqrt(return_TSRV('EBS_BOOK::USD/JPY'))

RV30min_EURJPY = calc_RV30min(RV10min_EURJPY)
RV30min_EURUSD = calc_RV30min(RV10min_EURUSD)
RV30min_USDJPY = calc_RV30min(RV10min_USDJPY)

RV1hr_EURJPY = calc_RV1hr(RV10min_EURJPY)
RV1hr_EURUSD = calc_RV1hr(RV10min_EURUSD)
RV1hr_USDJPY = calc_RV1hr(RV10min_USDJPY)

## OLS

In [357]:
# OLS function
def OLS(RV10min, RV30min, RV1hr):
    data_regression = construct_dataset_forRegression(RV10min, RV30min, RV1hr)
    RV10min_tplus10 = RV10min[6:]
    X_train, X_test, y_train, y_test = train_test_split(data_regression, RV10min_tplus10, test_size=0.3, shuffle=False)
    model = sm.OLS(y_train,X_train)
    result = model.fit()
    print(result.summary())
    y_predict = result.predict(X_test)
    print('---------------------')
    print('out of sample RMSE = ', mean_squared_error(y_test, y_predict))

In [358]:
OLS(RV10min_EURJPY, RV30min_EURJPY, RV1hr_EURJPY)

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.521
Model:                            OLS   Adj. R-squared:                  0.445
Method:                 Least Squares   F-statistic:                     6.884
Date:                Wed, 21 Feb 2018   Prob (F-statistic):            0.00251
Time:                        19:13:58   Log-Likelihood:                 226.16
No. Observations:                  23   AIC:                            -444.3
Df Residuals:                      19   BIC:                            -439.8
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       4.503e-06   8.38e-06      0.537      0.5

In [359]:
OLS(RV10min_EURUSD, RV30min_EURUSD, RV1hr_EURUSD)

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.216
Model:                            OLS   Adj. R-squared:                  0.092
Method:                 Least Squares   F-statistic:                     1.747
Date:                Wed, 21 Feb 2018   Prob (F-statistic):              0.191
Time:                        19:14:01   Log-Likelihood:                 224.92
No. Observations:                  23   AIC:                            -441.8
Df Residuals:                      19   BIC:                            -437.3
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       9.297e-06   7.26e-06      1.281      0.2

In [360]:
OLS(RV10min_USDJPY, RV30min_USDJPY, RV1hr_USDJPY)

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.523
Model:                            OLS   Adj. R-squared:                  0.447
Method:                 Least Squares   F-statistic:                     6.933
Date:                Wed, 21 Feb 2018   Prob (F-statistic):            0.00242
Time:                        19:14:04   Log-Likelihood:                 238.63
No. Observations:                  23   AIC:                            -469.3
Df Residuals:                      19   BIC:                            -464.7
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       7.222e-06   4.99e-06      1.447      0.1

## AR(6)

In [422]:
AR_model = tsa.ar_model.AR(RV10min_EURJPY)
result = AR_model.fit(maxlag=6,method='mle')
print(result.params)
print('out of sample RMSE = ', mean_squared_error(RV10min_EURJPY, result.fittedvalues))

[  1.15958313e-05   5.33760415e-01   6.09975157e-02   1.31080382e-01
   1.75127256e-01  -4.02884787e-02  -1.33164402e-01]
out of sample RMSE =  1.72556804706e-10


In [423]:
AR_model = tsa.ar_model.AR(RV10min_EURUSD)
result = AR_model.fit(maxlag=6,method='mle')
print(result.params)
print('out of sample RMSE = ', mean_squared_error(RV10min_EURJPY, result.fittedvalues))

[ 0.00488232 -0.01415113 -0.02717678 -0.01994303 -0.02039321 -0.02530495
 -0.02538412]
out of sample RMSE =  1.98860425711e-05




In [424]:
AR_model = tsa.ar_model.AR(RV10min_USDJPY)
result = AR_model.fit(maxlag=6,method='mle')
print(result.params)
print('out of sample RMSE = ', mean_squared_error(RV10min_EURJPY, result.fittedvalues))

[ 0.00422965 -0.02571068 -0.02629834 -0.0409161  -0.00549284 -0.01391141
 -0.01975841]
out of sample RMSE =  1.53311599877e-05


