## Compile fully cleaned, Master Dataframe with all available base regressors/classifiers

In [1]:
import pickle
import os
import time

import numpy as np
import pandas as pd
from datetime import datetime

### Load & Compile primary intraday data into one dataframe

In [2]:
fname_1 = r'\vx1_opt.pkl'
fname_2 = r'\vix_vol_2terms.pkl'
fname_3 = r'\spx_returns.pkl'

filepath = os.getcwd() + r'\Data\Intraday\Pickle'

vx1_opt = pickle.load(open(filepath + fname_1, "rb"))
vix_ivol = pickle.load(open(filepath + fname_2, "rb"))
spx_index = pickle.load(open(filepath + fname_3, "rb"))

#### Load SPX Options & VIX Futures data

In [3]:
vx1_opt.head()

Unnamed: 0,Strike1,Strike2,Option1,Option2,t1,t2,FWD_PRICE,OPT,VX1-OPT_Spread
2015-01-02 09:40:00,2070.0,2070.0,0.13262,0.13698,11.993056,32.993056,17.175,13.940883,3.234117
2015-01-02 09:50:00,2070.0,2070.0,0.132455,0.136215,11.986111,32.986111,17.275,13.831527,3.443473
2015-01-02 10:00:00,2070.0,2065.0,0.134345,0.138615,11.979167,32.979167,17.375,14.099285,3.275715
2015-01-02 10:10:00,2065.0,2060.0,0.139675,0.14398,11.972222,32.972222,17.725,14.637765,3.087235
2015-01-02 10:20:00,2065.0,2060.0,0.14071,0.14493,11.965278,32.965278,17.725,14.728039,2.996961


#### Load VIX Options data

In [4]:
vix_ivol.tail()

Unnamed: 0_level_0,Strike1,Strike2,Option1,Option2,t1,trade_dates/expiry,Weight_Opt
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-12-30 15:20:00,16.0,18.0,1.004045,0.919635,11.756944,18.0,0.974769
2016-12-30 15:30:00,15.0,17.0,0.930065,0.866925,11.75,18.0,0.908141
2016-12-30 15:40:00,16.0,17.0,1.01176,0.8723,11.743056,18.0,0.963283
2016-12-30 15:50:00,16.0,18.0,1.025765,0.909335,11.736111,18.0,0.985248
2016-12-30 16:00:00,16.0,18.0,1.012165,0.91931,11.729167,18.0,0.979816


#### Load SPX Cash data

In [5]:
spx_index.head()

Unnamed: 0_level_0,UPRICE_BID,UPRICE_ASK,UPRICE_MID
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-01-02 09:30:00,0.0,0.0,0.0
2015-01-02 09:40:00,2072.1499,2072.1499,2072.1499
2015-01-02 09:50:00,2068.86011,2068.86011,2068.86011
2015-01-02 10:00:00,2068.5,2068.5,2068.5
2015-01-02 10:10:00,2063.32007,2063.32007,2063.32007


#### Add Weighted Imp. vol of ATM front month VIX options to VIX futures prices & SPX options approximation to create MASTER dataframe

In [6]:
master_mid = vx1_opt.merge(vix_ivol.iloc[:,6].to_frame(), left_index=True, right_index=True)

In [7]:
master_mid.head()

Unnamed: 0,Strike1,Strike2,Option1,Option2,t1,t2,FWD_PRICE,OPT,VX1-OPT_Spread,Weight_Opt
2015-01-02 09:40:00,2070.0,2070.0,0.13262,0.13698,11.993056,32.993056,17.175,13.940883,3.234117,1.144474
2015-01-02 09:50:00,2070.0,2070.0,0.132455,0.136215,11.986111,32.986111,17.275,13.831527,3.443473,1.140503
2015-01-02 10:00:00,2070.0,2065.0,0.134345,0.138615,11.979167,32.979167,17.375,14.099285,3.275715,1.14277
2015-01-02 10:10:00,2065.0,2060.0,0.139675,0.14398,11.972222,32.972222,17.725,14.637765,3.087235,1.183701
2015-01-02 10:20:00,2065.0,2060.0,0.14071,0.14493,11.965278,32.965278,17.725,14.728039,2.996961,1.183615


#### Rename specific columns

In [8]:
master_mid.rename(columns={'Strike1':'SPX_k1', 'Strike2':'SPX_k2', 'Option1':'SPX_ImpVol1', 'Option2':'SPX_ImpVol2',\
                           't1':'DaysTo_VX1_Expiry', 't2':'DaysTo_VX2_Expiry', 'FWD_PRICE':'VX1_Future',\
                           'Weight_Opt':'VIX_ImpVol'}, inplace=True)

In [9]:
master_mid.head()

Unnamed: 0,SPX_k1,SPX_k2,SPX_ImpVol1,SPX_ImpVol2,DaysTo_VX1_Expiry,DaysTo_VX2_Expiry,VX1_Future,OPT,VX1-OPT_Spread,VIX_ImpVol
2015-01-02 09:40:00,2070.0,2070.0,0.13262,0.13698,11.993056,32.993056,17.175,13.940883,3.234117,1.144474
2015-01-02 09:50:00,2070.0,2070.0,0.132455,0.136215,11.986111,32.986111,17.275,13.831527,3.443473,1.140503
2015-01-02 10:00:00,2070.0,2065.0,0.134345,0.138615,11.979167,32.979167,17.375,14.099285,3.275715,1.14277
2015-01-02 10:10:00,2065.0,2060.0,0.139675,0.14398,11.972222,32.972222,17.725,14.637765,3.087235,1.183701
2015-01-02 10:20:00,2065.0,2060.0,0.14071,0.14493,11.965278,32.965278,17.725,14.728039,2.996961,1.183615


#### Add intraday SPX Index prices to *master*  

In [10]:
master_mid = master_mid.merge(spx_index.iloc[:,2].to_frame(), left_index=True, right_index=True)

In [11]:
master_mid.head()

Unnamed: 0,SPX_k1,SPX_k2,SPX_ImpVol1,SPX_ImpVol2,DaysTo_VX1_Expiry,DaysTo_VX2_Expiry,VX1_Future,OPT,VX1-OPT_Spread,VIX_ImpVol,UPRICE_MID
2015-01-02 09:40:00,2070.0,2070.0,0.13262,0.13698,11.993056,32.993056,17.175,13.940883,3.234117,1.144474,2072.1499
2015-01-02 09:50:00,2070.0,2070.0,0.132455,0.136215,11.986111,32.986111,17.275,13.831527,3.443473,1.140503,2068.86011
2015-01-02 10:00:00,2070.0,2065.0,0.134345,0.138615,11.979167,32.979167,17.375,14.099285,3.275715,1.14277,2068.5
2015-01-02 10:10:00,2065.0,2060.0,0.139675,0.14398,11.972222,32.972222,17.725,14.637765,3.087235,1.183701,2063.32007
2015-01-02 10:20:00,2065.0,2060.0,0.14071,0.14493,11.965278,32.965278,17.725,14.728039,2.996961,1.183615,2063.04004


#### Rename last column

In [12]:
master_mid.rename(columns={'UPRICE_MID':'SPX_Index(fwd)'}, inplace=True)

In [13]:
master_mid.head()

Unnamed: 0,SPX_k1,SPX_k2,SPX_ImpVol1,SPX_ImpVol2,DaysTo_VX1_Expiry,DaysTo_VX2_Expiry,VX1_Future,OPT,VX1-OPT_Spread,VIX_ImpVol,SPX_Index(fwd)
2015-01-02 09:40:00,2070.0,2070.0,0.13262,0.13698,11.993056,32.993056,17.175,13.940883,3.234117,1.144474,2072.1499
2015-01-02 09:50:00,2070.0,2070.0,0.132455,0.136215,11.986111,32.986111,17.275,13.831527,3.443473,1.140503,2068.86011
2015-01-02 10:00:00,2070.0,2065.0,0.134345,0.138615,11.979167,32.979167,17.375,14.099285,3.275715,1.14277,2068.5
2015-01-02 10:10:00,2065.0,2060.0,0.139675,0.14398,11.972222,32.972222,17.725,14.637765,3.087235,1.183701,2063.32007
2015-01-02 10:20:00,2065.0,2060.0,0.14071,0.14493,11.965278,32.965278,17.725,14.728039,2.996961,1.183615,2063.04004


#### Export/Load primary *master* dataframe to/from a pickle file

In [14]:
master_mid.to_pickle(filepath + r'\master_mid.pkl')

In [196]:
master_mid = pd.read_pickle(filepath + r'\master_mid.pkl')

### Load & Compile EOD intraday data into one dataframe

In [15]:
fname_4 = r'\vx1_opt_eod.pkl'
fname_5 = r'\vix_eod_vol_2terms.pkl'
fname_6 = r'\spx_returns_eod.pkl'

filepath = os.getcwd() + r'\Data\Intraday\Pickle'

vx1_opt_eod = pickle.load(open(filepath + fname_4, "rb"))
vix_ivol_eod = pickle.load(open(filepath + fname_5, "rb"))
spx_index_eod = pickle.load(open(filepath + fname_6, "rb"))

#### Load SPX Options & VIX Futures data

In [16]:
vx1_opt_eod.head()

Unnamed: 0_level_0,SPX_Strike1,SPX_Strike2,SPX_Option1,SPX_Option2,t1,t2,FWD_PRICE,OPT,VX1-OPT_Spread
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2015-01-02 16:10:00,2060.0,2055.0,0.132675,0.14268,11.722222,32.722222,17.725,14.797089,2.927911
2015-01-02 16:15:00,2060.0,2055.0,0.1351,0.14226,11.71875,32.71875,17.825,14.610308,3.214692
2015-01-05 16:10:00,2020.0,2020.0,0.164065,0.161785,10.722222,31.722222,18.925,16.060839,2.864161
2015-01-05 16:15:00,2020.0,2020.0,0.166455,0.16144,10.71875,31.71875,19.125,15.881923,3.243077
2015-01-06 16:10:00,2005.0,2000.0,0.17523,0.1723,9.722222,30.722222,19.825,17.092651,2.732349


#### Load VIX Options data

In [17]:
vix_ivol_eod.head()

Unnamed: 0_level_0,VIX_Strike1,VIX_Strike2,VIX_Option1,VIX_Option2,t1,t2,trade_dates/expiry,Weight_Opt
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2015-01-02 16:10:00,19.0,20.0,1.349465,1.094165,11.722222,21.722222,22.0,1.230196
2015-01-02 16:15:00,19.0,20.0,1.34344,1.085335,11.71875,21.71875,22.0,1.22282
2015-01-05 16:10:00,20.0,21.0,1.54275,1.1925,10.722222,21.722222,22.0,1.363203
2015-01-05 16:15:00,20.0,21.0,1.55063,1.192595,10.71875,21.71875,22.0,1.367035
2015-01-06 16:10:00,21.0,21.0,1.538645,1.15383,9.722222,21.722222,22.0,1.323887


#### Load SPX Cash data

In [18]:
spx_index_eod.head()

Unnamed: 0_level_0,UPRICE_BID,UPRICE_ASK,UPRICE_MID
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-01-02 16:10:00,2058.19995,2058.19995,2058.19995
2015-01-02 16:15:00,2058.19995,2058.19995,2058.19995
2015-01-05 16:10:00,2020.57996,2020.57996,2020.57996
2015-01-05 16:15:00,2020.57996,2020.57996,2020.57996
2015-01-06 16:10:00,2002.60999,2002.60999,2002.60999


#### Add Weighted Imp. vol of ATM front month VIX options to VIX futures prices & SPX options approximation to create MASTER EOD dataframe

In [19]:
master_eod = vx1_opt_eod.merge(vix_ivol_eod.iloc[:,7].to_frame(), left_index=True, right_index=True)

In [20]:
master_eod.head()

Unnamed: 0_level_0,SPX_Strike1,SPX_Strike2,SPX_Option1,SPX_Option2,t1,t2,FWD_PRICE,OPT,VX1-OPT_Spread,Weight_Opt
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-01-02 16:10:00,2060.0,2055.0,0.132675,0.14268,11.722222,32.722222,17.725,14.797089,2.927911,1.230196
2015-01-02 16:15:00,2060.0,2055.0,0.1351,0.14226,11.71875,32.71875,17.825,14.610308,3.214692,1.22282
2015-01-05 16:10:00,2020.0,2020.0,0.164065,0.161785,10.722222,31.722222,18.925,16.060839,2.864161,1.363203
2015-01-05 16:15:00,2020.0,2020.0,0.166455,0.16144,10.71875,31.71875,19.125,15.881923,3.243077,1.367035
2015-01-06 16:10:00,2005.0,2000.0,0.17523,0.1723,9.722222,30.722222,19.825,17.092651,2.732349,1.323887


#### Rename specific columns

In [21]:
master_eod.rename(columns={'SPX_Strike1':'SPX_k1', 'SPX_Strike2':'SPX_k2', 'SPX_Option1':'SPX_ImpVol1',\
                           'SPX_Option2':'SPX_ImpVol2','t1':'DaysTo_VX1_Expiry','t2':'DaysTo_VX2_Expiry',\
                           'FWD_PRICE':'VX1_Future', 'Weight_Opt':'VIX_ImpVol'}, inplace=True)

In [22]:
master_eod.head()

Unnamed: 0_level_0,SPX_k1,SPX_k2,SPX_ImpVol1,SPX_ImpVol2,DaysTo_VX1_Expiry,DaysTo_VX2_Expiry,VX1_Future,OPT,VX1-OPT_Spread,VIX_ImpVol
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-01-02 16:10:00,2060.0,2055.0,0.132675,0.14268,11.722222,32.722222,17.725,14.797089,2.927911,1.230196
2015-01-02 16:15:00,2060.0,2055.0,0.1351,0.14226,11.71875,32.71875,17.825,14.610308,3.214692,1.22282
2015-01-05 16:10:00,2020.0,2020.0,0.164065,0.161785,10.722222,31.722222,18.925,16.060839,2.864161,1.363203
2015-01-05 16:15:00,2020.0,2020.0,0.166455,0.16144,10.71875,31.71875,19.125,15.881923,3.243077,1.367035
2015-01-06 16:10:00,2005.0,2000.0,0.17523,0.1723,9.722222,30.722222,19.825,17.092651,2.732349,1.323887


#### Add intraday SPX Index prices to *master_eod* 

In [23]:
master_eod = master_eod.merge(spx_index_eod.iloc[:,2].to_frame(), left_index=True, right_index=True)

In [24]:
master_eod.head()

Unnamed: 0_level_0,SPX_k1,SPX_k2,SPX_ImpVol1,SPX_ImpVol2,DaysTo_VX1_Expiry,DaysTo_VX2_Expiry,VX1_Future,OPT,VX1-OPT_Spread,VIX_ImpVol,UPRICE_MID
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2015-01-02 16:10:00,2060.0,2055.0,0.132675,0.14268,11.722222,32.722222,17.725,14.797089,2.927911,1.230196,2058.19995
2015-01-02 16:15:00,2060.0,2055.0,0.1351,0.14226,11.71875,32.71875,17.825,14.610308,3.214692,1.22282,2058.19995
2015-01-05 16:10:00,2020.0,2020.0,0.164065,0.161785,10.722222,31.722222,18.925,16.060839,2.864161,1.363203,2020.57996
2015-01-05 16:15:00,2020.0,2020.0,0.166455,0.16144,10.71875,31.71875,19.125,15.881923,3.243077,1.367035,2020.57996
2015-01-06 16:10:00,2005.0,2000.0,0.17523,0.1723,9.722222,30.722222,19.825,17.092651,2.732349,1.323887,2002.60999


#### Rename last column

In [25]:
master_eod.rename(columns={'UPRICE_MID':'SPX_Index(fwd)'}, inplace=True)

In [26]:
master_eod.head()

Unnamed: 0_level_0,SPX_k1,SPX_k2,SPX_ImpVol1,SPX_ImpVol2,DaysTo_VX1_Expiry,DaysTo_VX2_Expiry,VX1_Future,OPT,VX1-OPT_Spread,VIX_ImpVol,SPX_Index(fwd)
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2015-01-02 16:10:00,2060.0,2055.0,0.132675,0.14268,11.722222,32.722222,17.725,14.797089,2.927911,1.230196,2058.19995
2015-01-02 16:15:00,2060.0,2055.0,0.1351,0.14226,11.71875,32.71875,17.825,14.610308,3.214692,1.22282,2058.19995
2015-01-05 16:10:00,2020.0,2020.0,0.164065,0.161785,10.722222,31.722222,18.925,16.060839,2.864161,1.363203,2020.57996
2015-01-05 16:15:00,2020.0,2020.0,0.166455,0.16144,10.71875,31.71875,19.125,15.881923,3.243077,1.367035,2020.57996
2015-01-06 16:10:00,2005.0,2000.0,0.17523,0.1723,9.722222,30.722222,19.825,17.092651,2.732349,1.323887,2002.60999


#### Export *master_eod* dataframe to a pickle file

In [27]:
master_eod.to_pickle(filepath + r'\master_eod.pkl')

### Combine primary and EOD master dataframes

In [28]:
master = master_mid.append(master_eod)

#### Sort by timestamp (index)

In [29]:
master.sort_index(inplace=True)

In [30]:
master.tail()
#master.head(42)

Unnamed: 0,SPX_k1,SPX_k2,SPX_ImpVol1,SPX_ImpVol2,DaysTo_VX1_Expiry,DaysTo_VX2_Expiry,VX1_Future,OPT,VX1-OPT_Spread,VIX_ImpVol,SPX_Index(fwd)
2016-12-30 15:40:00,2240.0,2235.0,0.112185,0.12328,11.743056,33.743056,15.125,12.881164,2.243836,0.963283,2238.12988
2016-12-30 15:50:00,2235.0,2235.0,0.11475,0.12305,11.736111,33.736111,15.275,12.725649,2.549351,0.985248,2235.6001
2016-12-30 16:00:00,2240.0,2240.0,0.112725,0.120915,11.729167,33.729167,15.125,12.506244,2.618756,0.979816,2239.61011
2016-12-30 16:10:00,2240.0,2240.0,0.111685,0.121555,11.722222,33.722222,15.125,12.649992,2.475008,0.961698,2238.83008
2016-12-30 16:15:00,2240.0,2240.0,0.11145,0.12102,11.71875,33.71875,15.125,12.582087,2.542913,0.961768,2238.83008


In [31]:
len(master)

20250

In [32]:
master.iloc[:,[2,3,4,5,7,8,9]] = master.iloc[:,[2,3,4,5,7,8,9]].round(4)
master.iloc[:,[10]] = master.iloc[:,[10]].round(2)

In [33]:
master.tail()

Unnamed: 0,SPX_k1,SPX_k2,SPX_ImpVol1,SPX_ImpVol2,DaysTo_VX1_Expiry,DaysTo_VX2_Expiry,VX1_Future,OPT,VX1-OPT_Spread,VIX_ImpVol,SPX_Index(fwd)
2016-12-30 15:40:00,2240.0,2235.0,0.1122,0.1233,11.7431,33.7431,15.125,12.8812,2.2438,0.9633,2238.13
2016-12-30 15:50:00,2235.0,2235.0,0.1148,0.123,11.7361,33.7361,15.275,12.7256,2.5494,0.9852,2235.6
2016-12-30 16:00:00,2240.0,2240.0,0.1127,0.1209,11.7292,33.7292,15.125,12.5062,2.6188,0.9798,2239.61
2016-12-30 16:10:00,2240.0,2240.0,0.1117,0.1216,11.7222,33.7222,15.125,12.65,2.475,0.9617,2238.83
2016-12-30 16:15:00,2240.0,2240.0,0.1114,0.121,11.7188,33.7188,15.125,12.5821,2.5429,0.9618,2238.83


In [34]:
master.isna().sum()

SPX_k1               0
SPX_k2               0
SPX_ImpVol1          0
SPX_ImpVol2          0
DaysTo_VX1_Expiry    0
DaysTo_VX2_Expiry    0
VX1_Future           0
OPT                  0
VX1-OPT_Spread       0
VIX_ImpVol           0
SPX_Index(fwd)       0
dtype: int64

#### Export *master* dataframe to a pickle file

In [35]:
master.to_pickle(filepath + r'\master.pkl')

#### Export *master* dataframe to a .csv file

In [36]:
master.to_csv(os.getcwd() + r'\Data\Intraday\VIX_Master_DataFrame.csv')