# Estimating VaR in EURUSD from IV using ML and QR

## Modeling-Quantile Regression

### Data Preparation

In [1]:
import pandas as pd
import numpy as np
import pickle
import statsmodels.api as sm

In [2]:
pd.set_option('mode.chained_assignment',  None)

In [3]:
df_spot = pd.read_hdf('df_spot.h5')
df_spread = pd.read_hdf('df_spread.h5')

In [4]:
df_spot['RET'] = df_spot['SPOT'].pct_change()
df_spot['Y'] = df_spot['RET'].shift(-1)
df_spot = df_spot.iloc[1:-1]

df_spread['RET'] = df_spread['SPOT'].pct_change()
df_spread['Y'] = df_spread['RET'].shift(-1)
df_spread = df_spread.iloc[1:-1]

In [5]:
def rolling_minmax(df, window):
    df2 = df.iloc[:,:-1]
    def func(data):
        x = data.values
        return (x[-1] - min(x)) / (max(x) - min(x))
    df2 = df2.rolling(window).apply(func)
    df = pd.concat([df2, df.iloc[:,-1]], axis=1)
    return df.iloc[window-1:]

In [6]:
window_size = 252 # number of yearly trading days
df_spot_scaled = rolling_minmax(df_spot, window_size)
df_spread_scaled = rolling_minmax(df_spread, window_size)

In [7]:
df_spot_is = df_spot_scaled[df_spot_scaled.index.year<=2017]
df_spot_os = df_spot_scaled[df_spot_scaled.index.year>=2018]
df_spread_is = df_spread_scaled[df_spread_scaled.index.year<=2017]
df_spread_os = df_spread_scaled[df_spread_scaled.index.year>=2018]

### Quantile Regression

In [8]:
quantiles = [0.01, 0.025, 0.05, 0.95, 0.975, 0.99]

In [9]:
quantile_results = {}
for quantile in quantiles:
    model = sm.QuantReg(df_spot_is['Y'], df_spot_is.iloc[:,:-1])
    result = model.fit(q=quantile)
    quantile_results[quantile] = result

In [10]:
df_pval = pd.DataFrame()
for quantile, result in quantile_results.items():
    df_pval[quantile] = result.pvalues

pd.options.display.float_format = '{:.4f}'.format
df_pval

Unnamed: 0,0.0100,0.0250,0.0500,0.9500,0.9750,0.9900
SPOT,0.1357,0.1653,0.1688,0.8052,0.1962,0.6498
IV_ATM,0.0,0.0,0.0,0.0,0.0,0.0
BT_10D,0.0051,0.0005,0.3775,0.0016,0.3763,0.4841
BT_25D,0.0202,0.0759,0.1329,0.4379,0.3349,0.0431
RR_10D,0.2544,0.0661,0.1384,0.321,0.4048,0.2501
RR_25D,0.0394,0.0037,0.0051,0.066,0.1793,0.1159
RET,0.488,0.65,0.0259,0.1571,0.0404,0.0923


In [11]:
with open('model_qr.pickle', 'wb') as f:
    pickle.dump(quantile_results, f, pickle.HIGHEST_PROTOCOL)

## Quantile Regression with new movement variable

In [12]:
df_pval.median(axis=1).sort_values()

IV_ATM   0.0000
RR_25D   0.0527
BT_25D   0.1044
RET      0.1247
SPOT     0.1825
BT_10D   0.1907
RR_10D   0.2522
dtype: float64

In [13]:
df_pval.min(axis=1).sort_values()

IV_ATM   0.0000
BT_10D   0.0005
RR_25D   0.0037
BT_25D   0.0202
RET      0.0259
RR_10D   0.0661
SPOT     0.1357
dtype: float64

In [14]:
df_spot_is.iloc[:,:-1].corr()

Unnamed: 0,SPOT,IV_ATM,BT_10D,BT_25D,RR_10D,RR_25D,RET
SPOT,1.0,0.3489,0.2634,0.0798,0.1896,0.1559,0.0865
IV_ATM,0.3489,1.0,0.5275,0.4356,-0.3267,-0.3244,0.0244
BT_10D,0.2634,0.5275,1.0,0.5962,-0.1812,-0.1889,0.0833
BT_25D,0.0798,0.4356,0.5962,1.0,-0.2438,-0.2624,-0.0633
RR_10D,0.1896,-0.3267,-0.1812,-0.2438,1.0,0.9714,-0.0786
RR_25D,0.1559,-0.3244,-0.1889,-0.2624,0.9714,1.0,-0.082
RET,0.0865,0.0244,0.0833,-0.0633,-0.0786,-0.082,1.0


In [15]:
# add iv-rr25 iv-bt10 rr25-bt10 as new movement variable
df_spot2 = df_spot[['Y']+df_spot.columns.to_list()[:-1]]
df_spot2['IV_RR25'] = df_spot2['IV_ATM'] * df_spot2['RR_25D']
df_spot2['IV_BT10'] = df_spot2['IV_ATM'] * df_spot2['BT_10D']
df_spot2['RR25_BT10'] = df_spot2['RR_25D'] * df_spot2['BT_10D']
df_spot2 = df_spot2[df_spot2.columns.to_list()[1:]+['Y']]
df_spot2.head()

Unnamed: 0_level_0,SPOT,IV_ATM,BT_10D,BT_25D,RR_10D,RR_25D,RET,IV_RR25,IV_BT10,RR25_BT10,Y
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2010-07-02,0.7971,12.75,0.7063,0.4,0.4,0.25,-0.0077,3.1875,9.0053,0.1766,0.001
2010-07-05,0.7978,11.905,0.7063,0.4,0.4,-0.15,0.001,-1.7857,8.4085,-0.1059,-0.0096
2010-07-06,0.7901,12.75,0.7063,0.4,0.4,0.4,-0.0096,5.1,9.0053,0.2825,0.0048
2010-07-07,0.794,11.705,0.7063,0.4,0.4,0.4,0.0048,4.682,8.2672,0.2825,-0.0052
2010-07-08,0.7898,12.25,0.7063,0.4,0.4,0.4,-0.0052,4.9,8.6522,0.2825,0.0014


In [16]:
df_spot2_scaled = rolling_minmax(df_spot2, window_size)
df_spot2_is = df_spot2_scaled[df_spot2_scaled.index.year<=2017]
df_spot2_os = df_spot2_scaled[df_spot2_scaled.index.year>=2018]

In [17]:
quantile_results2 = {}
for quantile in quantiles:
    model2 = sm.QuantReg(df_spot2_is['Y'], df_spot2_is.iloc[:,:-1])
    result = model2.fit(q=quantile)
    quantile_results2[quantile] = result

In [18]:
df_pval2 = pd.DataFrame()
for quantile, result in quantile_results2.items():
    df_pval2[quantile] = result.pvalues

df_pval2

Unnamed: 0,0.0100,0.0250,0.0500,0.9500,0.9750,0.9900
SPOT,0.1802,0.1717,0.0491,0.7915,0.6953,0.0627
IV_ATM,0.0007,0.0,0.0,0.0,0.0,0.0
BT_10D,0.0491,0.0,0.0052,0.0,0.0017,0.0017
BT_25D,0.1114,0.1412,0.4357,0.7979,0.669,0.1936
RR_10D,0.2551,0.0102,0.0774,0.275,0.112,0.373
RR_25D,0.0254,0.0001,0.0023,0.8034,0.6816,0.8697
RET,0.2977,0.6927,0.0913,0.5249,0.3797,0.6993
IV_RR25,0.1107,0.8119,0.64,0.1345,0.0642,0.0687
IV_BT10,0.3095,0.0236,0.0072,0.039,0.0218,0.0039
RR25_BT10,0.0195,0.0526,0.0357,0.143,0.5259,0.8831


In [19]:
with open('model_qr2.pickle', 'wb') as f:
    pickle.dump(quantile_results2, f, pickle.HIGHEST_PROTOCOL)

In [21]:
data_scale = dict()
data_scale['df_spot_is'] = df_spot_is
data_scale['df_spot_os'] = df_spot_os
data_scale['df_spot2_is'] = df_spot2_is
data_scale['df_spot2_os'] = df_spot2_os
data_scale['df_spread_is'] = df_spread_is
data_scale['df_spread_os'] = df_spread_os

with open('data_scale.pickle', 'wb') as f:
    pickle.dump(data_scale, f, pickle.HIGHEST_PROTOCOL)