In [1]:
import os
import numpy as np
import pandas as pd
import pickle
from datetime import datetime

### Load VIX/SPX Expiration Dates

In [2]:
expiration = pd.read_csv(os.getcwd() + (r'\Data' + '\expiration_dates.csv'), parse_dates=['vx1', 'spx'],\
                         date_parser = lambda x: datetime.strptime(x,'%d-%b-%y'))

expiration.head()

Unnamed: 0,vx1,spx,Unnamed: 2
0,2005-10-19,2005-10-21,
1,2005-11-16,2005-11-18,28.0
2,2005-12-21,2005-12-16,28.0
3,2006-01-18,2006-01-20,35.0
4,2006-02-15,2006-02-17,28.0


### Set filepath for End of Day (EOD) data -> 4:10 & 4:15 timestamps

In [3]:
fname_1 = 'optsnaps.csv'
fname_2 = 'vixsnaps3.csv'

filepath_1 = os.path.join('Data\Intraday', fname_1)
filepath_2 = os.path.join('Data\Intraday', fname_2)

### Set filepath for writing/retrieval of all necessary pickle files

In [4]:
pickle_filepath = r'Data\Intraday\Pickle'

### Load/Parse SPX/VIX Options, SPX Cash Data

In [5]:
eod = pd.read_csv(filepath_1, parse_dates=['TIMESTAMP'], date_parser = lambda x: datetime.strptime(x,'%m/%d/%Y %H:%M'),\
                    usecols = ['SYMBOL', 'TIMESTAMP', 'MID_VOL', 'DELTA', 'UPRICE_BID', 'UPRICE_ASK'])

# Screen/Remove deep OTM & ITM options from dataframe
eod = eod[(np.abs(eod['DELTA']) > 0.15) & (np.abs(eod['DELTA']) < 0.85)]

In [7]:
eod.head()

Unnamed: 0,SYMBOL,TIMESTAMP,MID_VOL,DELTA,UPRICE_BID,UPRICE_ASK
70,SPX 150220C01925000,2015-01-02 16:10:00,0.18312,0.84084,2058.19995,2058.19995
71,SPX 150220C01925000,2015-01-02 16:15:00,0.18006,0.84466,2058.19995,2058.19995
72,SPX 150220C01930000,2015-01-02 16:10:00,0.18203,0.83263,2058.19995,2058.19995
73,SPX 150220C01930000,2015-01-02 16:15:00,0.17852,0.83703,2058.19995,2058.19995
74,SPX 150220C01935000,2015-01-02 16:10:00,0.18023,0.82497,2058.19995,2058.19995


### Split $eod$ dataframe by specific option type (VIX or SPX)

In [8]:
spx_intra_eod = eod[eod['SYMBOL'].str.contains('SPX')].copy()
vix_intra_eod = eod[eod['SYMBOL'].str.contains('VIX')].copy()

#### SPX Options: Define function to split on space following 'SPX/SPXW" and take 2nd part of OPRA code: YY-MM-DD, Call/Put, Strike Price 

In [95]:
def parse_symbol(x):
    x = x.split()[1]
    date = datetime.strptime(x[:6],'%y%m%d')
    if date.weekday() == 5:
        date -= pd.Timedelta(days=1)
    optiontype = x[6]
    strike = float(x[8:12])
    return pd.Series([date, optiontype,strike], index=['exdate', 'cp_flag', 'strike_price'])    

#### Parse expiration date, Call/Put flag & strike price from SPX OPRA symbol

In [96]:
spx_intra_eod[['exdate', 'cp_flag', 'strike_price']] = spx_intra_eod['SYMBOL'].apply(parse_symbol)
spx_intra_eod.head()

Unnamed: 0,SYMBOL,TIMESTAMP,MID_VOL,DELTA,FWD_PRICE,exdate,cp_flag,strike_price
70,SPX 150220C01925000,2015-01-02 16:10:00,0.18312,0.84084,2053.82544,2015-02-20,C,1925.0
71,SPX 150220C01925000,2015-01-02 16:15:00,0.18006,0.84466,2053.82544,2015-02-20,C,1925.0
72,SPX 150220C01930000,2015-01-02 16:10:00,0.18203,0.83263,2053.82544,2015-02-20,C,1930.0
73,SPX 150220C01930000,2015-01-02 16:15:00,0.17852,0.83703,2053.82544,2015-02-20,C,1930.0
74,SPX 150220C01935000,2015-01-02 16:10:00,0.18023,0.82497,2053.82544,2015-02-20,C,1935.0


In [97]:
spx_intra_eod.tail()

Unnamed: 0,SYMBOL,TIMESTAMP,MID_VOL,DELTA,FWD_PRICE,exdate,cp_flag,strike_price
532290,SPX 170217P02310000,2016-12-30 16:10:00,0.09575,-0.81738,2236.31128,2017-02-17,P,2310.0
532291,SPX 170217P02310000,2016-12-30 16:15:00,0.09784,-0.81203,2236.31104,2017-02-17,P,2310.0
532292,SPX 170217P02315000,2016-12-30 16:10:00,0.09467,-0.8361,2236.31128,2017-02-17,P,2315.0
532293,SPX 170217P02315000,2016-12-30 16:15:00,0.09687,-0.83034,2236.31104,2017-02-17,P,2315.0
532295,SPX 170217P02320000,2016-12-30 16:15:00,0.09593,-0.84773,2236.31104,2017-02-17,P,2320.0


### Select appropriate SPX options for each timestamp

#### Build/Apply function to determine the relevant expirys for VIX (1st) & SPX (1st & 2nd) given the current trade date

In [58]:
# Define column numbers for VIX, SPX expiration dates in 'expiration_dates.csv'
vx_expirys = 0
sp_expirys = 1

def findTerms(date):
    for i in range(expiration.shape[vx_expirys]): # Determine month & year of VIX expiry corresponding to current date
        if expiration.iloc[i,vx_expirys].month == date.month and expiration.iloc[i,vx_expirys].year == date.year:
            break                                 # Effectively reduces search area of posible dates
    
    # Determine EXCEPTION CASES: 4 Times/yr 1st SPX option expiry falls BEFORE VIX expiry
    if expiration.iloc[i,sp_expirys] < expiration.iloc[i,vx_expirys]: 
        if date >= expiration.iloc[i,vx_expirys]:
            vix_expiry = expiration.iloc[i+1,vx_expirys]
            term1 = expiration.iloc[i+1,sp_expirys]
            term2 = expiration.iloc[i+2,sp_expirys]
        else:
            vix_expiry = expiration.iloc[i+0,vx_expirys]
            term1 = expiration.iloc[i+0,sp_expirys] + pd.Timedelta(days=7)
            term2 = expiration.iloc[i+1,sp_expirys]
    elif expiration.iloc[i+1,sp_expirys] < expiration.iloc[i+1,vx_expirys]:
        if date < expiration.iloc[i,vx_expirys]:
            vix_expiry = expiration.iloc[i+0,vx_expirys]
            term1 = expiration.iloc[i+0,sp_expirys]
            term2 = expiration.iloc[i+1,sp_expirys]
        else:
            vix_expiry = expiration.iloc[i+1,vx_expirys]
            term1 = expiration.iloc[i+1,sp_expirys] + pd.Timedelta(days=7)
            term2 = expiration.iloc[i+2,sp_expirys]
    else:
        if date >= expiration.iloc[i,vx_expirys]:
            vix_expiry = expiration.iloc[i+1,vx_expirys]
            term1 = expiration.iloc[i+1,sp_expirys]
            term2 = expiration.iloc[i+2,sp_expirys]
        else:
            vix_expiry = expiration.iloc[i+0,vx_expirys]
            term1 = expiration.iloc[i+0,sp_expirys]
            term2 = expiration.iloc[i+1,sp_expirys]


    return pd.Series([vix_expiry, term1, term2], index=['VIX_Expiry', 'Term_1', 'Term_2'])

In [59]:
spx_intra_eod[['VIX_Expiry', 'SPX_Term1', 'SPX_Term2']] = spx_intra_eod['TIMESTAMP'].apply(findTerms)

In [60]:
spx_intra_eod.head()

Unnamed: 0,SYMBOL,TIMESTAMP,MID_VOL,DELTA,FWD_PRICE,exdate,cp_flag,strike_price,VIX_Expiry,SPX_Term1,SPX_Term2
70,SPX 150220C01925000,2015-01-02 16:10:00,0.18312,0.84084,2053.82544,2015-02-20,C,1925.0,2015-01-21,2015-01-23,2015-02-20
71,SPX 150220C01925000,2015-01-02 16:15:00,0.18006,0.84466,2053.82544,2015-02-20,C,1925.0,2015-01-21,2015-01-23,2015-02-20
72,SPX 150220C01930000,2015-01-02 16:10:00,0.18203,0.83263,2053.82544,2015-02-20,C,1930.0,2015-01-21,2015-01-23,2015-02-20
73,SPX 150220C01930000,2015-01-02 16:15:00,0.17852,0.83703,2053.82544,2015-02-20,C,1930.0,2015-01-21,2015-01-23,2015-02-20
74,SPX 150220C01935000,2015-01-02 16:10:00,0.18023,0.82497,2053.82544,2015-02-20,C,1935.0,2015-01-21,2015-01-23,2015-02-20


#### Check for any Null values (NaNs)

In [61]:
spx_intra_eod.isna().sum()

SYMBOL          0
TIMESTAMP       0
MID_VOL         0
DELTA           0
FWD_PRICE       0
exdate          0
cp_flag         0
strike_price    0
VIX_Expiry      0
SPX_Term1       0
SPX_Term2       0
dtype: int64

#### Save interim version of dataframe to pickle for retrieval after initial cleaning/calculation (speeds future calculation times) 

In [62]:
spx_intra_eod.to_pickle(os.path.join(pickle_filepath, 'spx_intra_eod(mid).pkl'))

In [4]:
#spx_intra_eod = pickle.load(open(pickle_filepath + r'\spx_intraday(mid).pkl', "rb"))

In [7]:
spx_intra_eod = pd.read_pickle(pickle_filepath + r'\spx_intra_eod(mid).pkl')

#### Calculate individual trade dates for intraday dataframe

In [63]:
trade_dates = np.array([pd.Timestamp(x) for x in np.unique(spx_intra_eod['TIMESTAMP'].apply(lambda x: x.strftime('%Y-%m-%d')))])

In [64]:
trade_dates[-5:]

array([Timestamp('2016-12-23 00:00:00'), Timestamp('2016-12-27 00:00:00'),
       Timestamp('2016-12-28 00:00:00'), Timestamp('2016-12-29 00:00:00'),
       Timestamp('2016-12-30 00:00:00')], dtype=object)

#### Insert additional trade dates for additional 1.5 months after end of official dataset (to calculate time series)

In [65]:
extra_dates = pd.date_range('2017-01-01', '2017-02-17', freq=pd.tseries.offsets.BDay()).tolist()
extra_dates

[Timestamp('2017-01-02 00:00:00', freq='B'),
 Timestamp('2017-01-03 00:00:00', freq='B'),
 Timestamp('2017-01-04 00:00:00', freq='B'),
 Timestamp('2017-01-05 00:00:00', freq='B'),
 Timestamp('2017-01-06 00:00:00', freq='B'),
 Timestamp('2017-01-09 00:00:00', freq='B'),
 Timestamp('2017-01-10 00:00:00', freq='B'),
 Timestamp('2017-01-11 00:00:00', freq='B'),
 Timestamp('2017-01-12 00:00:00', freq='B'),
 Timestamp('2017-01-13 00:00:00', freq='B'),
 Timestamp('2017-01-16 00:00:00', freq='B'),
 Timestamp('2017-01-17 00:00:00', freq='B'),
 Timestamp('2017-01-18 00:00:00', freq='B'),
 Timestamp('2017-01-19 00:00:00', freq='B'),
 Timestamp('2017-01-20 00:00:00', freq='B'),
 Timestamp('2017-01-23 00:00:00', freq='B'),
 Timestamp('2017-01-24 00:00:00', freq='B'),
 Timestamp('2017-01-25 00:00:00', freq='B'),
 Timestamp('2017-01-26 00:00:00', freq='B'),
 Timestamp('2017-01-27 00:00:00', freq='B'),
 Timestamp('2017-01-30 00:00:00', freq='B'),
 Timestamp('2017-01-31 00:00:00', freq='B'),
 Timestamp

#### Remove MLK Holiday as a trade date

In [66]:
extra_dates.pop(10)

Timestamp('2017-01-16 00:00:00', freq='B')

#### Combine original trade dates with additional dates

In [67]:
trade_dates = np.append(trade_dates, extra_dates)

In [68]:
trade_dates[-5:]

array([Timestamp('2017-02-13 00:00:00', freq='B'),
       Timestamp('2017-02-14 00:00:00', freq='B'),
       Timestamp('2017-02-15 00:00:00', freq='B'),
       Timestamp('2017-02-16 00:00:00', freq='B'),
       Timestamp('2017-02-17 00:00:00', freq='B')], dtype=object)

#### Build/Apply function to assign appropriate SPX options to each timestamp

In [69]:
def findOptions(df):
    term1 = df[df['exdate'] == df['SPX_Term1'].iloc[0]]
    term2 = df[df['exdate'] == df['SPX_Term2'].iloc[0]]
    #t1 = len(trade_dates[(trade_dates > df['date'].iloc[0]) & ( trade_dates <= df['VIX_Expiry'].iloc[0])]) 
    #t2 = len(trade_dates[(trade_dates > df['date'].iloc[0]) & ( trade_dates <= df['Term_2'].iloc[0] )])
    
    # Calculate current timestamp & future VIX expirys out to the hour & minute 
    # This approach allows for a more continuous view of Time to Expiry (TTE)
    partial_day = (df['TIMESTAMP'].iloc[0].hour + df['TIMESTAMP'].iloc[0].minute/60)/24 - (9.5)/24
    t1 = len(trade_dates[(trade_dates > df['TIMESTAMP'].iloc[0]) & ( trade_dates <= df['VIX_Expiry'].iloc[0])]) - partial_day
    t2 = len(trade_dates[(trade_dates > df['TIMESTAMP'].iloc[0]) & ( trade_dates <= df['SPX_Term2'].iloc[0] )]) - partial_day
    
    if term1.shape[0] == 0 or term2.shape[0] == 0:
        return pd.Series([np.nan, np.nan, np.nan, np.nan, t1, t2],\
                         index=['SPX_Strike1', 'SPX_Strike2','SPX_Option1', 'SPX_Option2', 't1', 't2'])
        
    #print(df['date'].iloc[0])
    strike_list1 = np.unique(term1['strike_price'])
    #print(strike_list1)
    mindelta1 = 5
    for strikeIndx, strike in enumerate(strike_list1): 
        x = np.abs(term1[term1['strike_price'] == strike]['DELTA'].sum())
        if x < mindelta1:
            mindelta1 = x
            minStrike1 = strike
            
    strike_list2 = np.unique(term2['strike_price'])
    mindelta2 = 5
    for strikeIndx, strike in enumerate(strike_list2): 
        x = np.abs(term2[term2['strike_price'] == strike]['DELTA'].sum())
        if x < mindelta2:
            mindelta2 = x
            minStrike2 = strike
    avg1 = term1[term1['strike_price'] == minStrike1]['MID_VOL'].mean()
    avg2 = term2[term2['strike_price'] == minStrike2]['MID_VOL'].mean()
    
    return pd.Series([minStrike1, minStrike2, avg1, avg2, t1, t2],\
                     index=['SPX_Strike1', 'SPX_Strike2','SPX_Option1', 'SPX_Option2', 't1', 't2'])  

In [70]:
spx_intra_eod.head()

Unnamed: 0,SYMBOL,TIMESTAMP,MID_VOL,DELTA,FWD_PRICE,exdate,cp_flag,strike_price,VIX_Expiry,SPX_Term1,SPX_Term2
70,SPX 150220C01925000,2015-01-02 16:10:00,0.18312,0.84084,2053.82544,2015-02-20,C,1925.0,2015-01-21,2015-01-23,2015-02-20
71,SPX 150220C01925000,2015-01-02 16:15:00,0.18006,0.84466,2053.82544,2015-02-20,C,1925.0,2015-01-21,2015-01-23,2015-02-20
72,SPX 150220C01930000,2015-01-02 16:10:00,0.18203,0.83263,2053.82544,2015-02-20,C,1930.0,2015-01-21,2015-01-23,2015-02-20
73,SPX 150220C01930000,2015-01-02 16:15:00,0.17852,0.83703,2053.82544,2015-02-20,C,1930.0,2015-01-21,2015-01-23,2015-02-20
74,SPX 150220C01935000,2015-01-02 16:10:00,0.18023,0.82497,2053.82544,2015-02-20,C,1935.0,2015-01-21,2015-01-23,2015-02-20


#### Look for SPX Options expiration dates needed for a given timestamp

In [28]:
#spx_intra_eod[spx_intra_eod['TIMESTAMP'] == '2015-10-21 16:10:00']

In [71]:
spx_options_eod = spx_intra_eod.groupby('TIMESTAMP').apply(findOptions)

In [72]:
spx_options_eod.head()

Unnamed: 0_level_0,SPX_Strike1,SPX_Strike2,SPX_Option1,SPX_Option2,t1,t2
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-02 16:10:00,2060.0,2055.0,0.132675,0.14268,11.722222,32.722222
2015-01-02 16:15:00,2060.0,2055.0,0.1351,0.14226,11.71875,32.71875
2015-01-05 16:10:00,2020.0,2020.0,0.164065,0.161785,10.722222,31.722222
2015-01-05 16:15:00,2020.0,2020.0,0.166455,0.16144,10.71875,31.71875
2015-01-06 16:10:00,2005.0,2000.0,0.17523,0.1723,9.722222,30.722222


#### Check for any Null values (NaNs)

In [73]:
spx_options_eod.isna().sum()

SPX_Strike1    78
SPX_Strike2    78
SPX_Option1    78
SPX_Option2    78
t1              0
t2              0
dtype: int64

In [24]:
#spx_options_eod[spx_options_eod['SPX_Strike1'].isna()]

In [74]:
spx_options_eod = spx_options_eod[~spx_options_eod['SPX_Strike1'].isna()]

#### Save/Load final SPX Options EOD dataframe to/from pickle file  

In [75]:
spx_options_eod.to_pickle(os.path.join(pickle_filepath, 'spx_options_eod.pkl'))

In [154]:
spx_options_eod = pd.read_pickle(pickle_filepath + r'\spx_options_eod.pkl')

### Load/Parse VIX Futures Data 

#### Recall VIX options dataframe loaded from combined SPX/VIX options .csv file

In [147]:
vix_intra_eod.head()

Unnamed: 0,SYMBOL,TIMESTAMP,MID_VOL,DELTA,FWD_PRICE
766,VIX 150121C00014500,2015-01-02 16:10:00,1.00348,0.83967,17.725
768,VIX 150121C00015000,2015-01-02 16:10:00,1.072,0.7899,17.725
769,VIX 150121C00015000,2015-01-02 16:15:00,1.00126,0.8081,17.825
770,VIX 150121C00016000,2015-01-02 16:10:00,1.11977,0.70182,17.725
771,VIX 150121C00016000,2015-01-02 16:15:00,1.08682,0.71251,17.825


In [148]:
vix_intra_eod.head()

Unnamed: 0,SYMBOL,TIMESTAMP,MID_VOL,DELTA,FWD_PRICE
766,VIX 150121C00014500,2015-01-02 16:10:00,1.00348,0.83967,17.725
768,VIX 150121C00015000,2015-01-02 16:10:00,1.072,0.7899,17.725
769,VIX 150121C00015000,2015-01-02 16:15:00,1.00126,0.8081,17.825
770,VIX 150121C00016000,2015-01-02 16:10:00,1.11977,0.70182,17.725
771,VIX 150121C00016000,2015-01-02 16:15:00,1.08682,0.71251,17.825


In [149]:
vix_intra_eod[['exdate', 'cp_flag', 'strike_price']] = vix_intra_eod['SYMBOL'].apply(parse_symbol)

In [151]:
vix_intra_eod.sort_values(by =['TIMESTAMP', 'exdate', 'strike_price', 'cp_flag'], inplace = True)
vix_intra_eod.head()

Unnamed: 0,SYMBOL,TIMESTAMP,MID_VOL,DELTA,FWD_PRICE,exdate,cp_flag,strike_price
766,VIX 150121C00014500,2015-01-02 16:10:00,1.00348,0.83967,17.725,2015-01-21,C,14.5
842,VIX 150121P00014500,2015-01-02 16:10:00,0.97725,-0.15528,17.725,2015-01-21,P,14.5
768,VIX 150121C00015000,2015-01-02 16:10:00,1.072,0.7899,17.725,2015-01-21,C,15.0
844,VIX 150121P00015000,2015-01-02 16:10:00,1.00589,-0.19941,17.725,2015-01-21,P,15.0
770,VIX 150121C00016000,2015-01-02 16:10:00,1.11977,0.70182,17.725,2015-01-21,C,16.0


In [152]:
# Define column numbers for VIX expiration dates in 'expiration_dates.csv'
vx_expirys = 0

def find_vixOptTerms(date):
    for i in range(expiration.shape[vx_expirys]): # Determine month & year of VIX expiry corresponding to current date
        if expiration.iloc[i,vx_expirys].month == date.month and expiration.iloc[i,vx_expirys].year == date.year:
            break                                 # Effectively reduces search area of posible dates
    
    if date < expiration.iloc[i,vx_expirys]:
        term1 = expiration.iloc[i+0,vx_expirys]
        term2 = expiration.iloc[i+1,vx_expirys]
    else:
        term1 = expiration.iloc[i+1,vx_expirys]
        term2 = expiration.iloc[i+2,vx_expirys]
        
    return pd.Series([term1, term2], index=['VIX_Term1', 'VIX_Term2'])

#### Screen for & apply correct 1st & 2nd VIX Expiry terms to dataframe

In [153]:
vix_intra_eod[['VIX_Term1', 'VIX_Term2']] = vix_intra_eod['TIMESTAMP'].apply(find_vixOptTerms)

In [155]:
vix_intra_eod.head()

Unnamed: 0,SYMBOL,TIMESTAMP,MID_VOL,DELTA,FWD_PRICE,exdate,cp_flag,strike_price,VIX_Term1,VIX_Term2
766,VIX 150121C00014500,2015-01-02 16:10:00,1.00348,0.83967,17.725,2015-01-21,C,14.5,2015-01-21,2015-02-18
842,VIX 150121P00014500,2015-01-02 16:10:00,0.97725,-0.15528,17.725,2015-01-21,P,14.5,2015-01-21,2015-02-18
768,VIX 150121C00015000,2015-01-02 16:10:00,1.072,0.7899,17.725,2015-01-21,C,15.0,2015-01-21,2015-02-18
844,VIX 150121P00015000,2015-01-02 16:10:00,1.00589,-0.19941,17.725,2015-01-21,P,15.0,2015-01-21,2015-02-18
770,VIX 150121C00016000,2015-01-02 16:10:00,1.11977,0.70182,17.725,2015-01-21,C,16.0,2015-01-21,2015-02-18


In [157]:
vx1_eod = vix_intra_eod.groupby('TIMESTAMP').apply(lambda x: x[x['exdate'] == x['VIX_Term1']].iloc[0,:])

In [163]:
vx1_eod.head()

Unnamed: 0_level_0,SYMBOL,TIMESTAMP,MID_VOL,DELTA,FWD_PRICE,exdate,cp_flag,strike_price,VIX_Term1,VIX_Term2
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2015-01-02 16:10:00,VIX 150121C00014500,2015-01-02 16:10:00,1.00348,0.83967,17.725,2015-01-21,C,14.5,2015-01-21,2015-02-18
2015-01-02 16:15:00,VIX 150121P00014500,2015-01-02 16:15:00,0.99344,-0.15241,17.825,2015-01-21,P,14.5,2015-01-21,2015-02-18
2015-01-05 16:10:00,VIX 150121C00016000,2015-01-05 16:10:00,1.2272,0.78348,18.925,2015-01-21,C,16.0,2015-01-21,2015-02-18
2015-01-05 16:15:00,VIX 150121C00016000,2015-01-05 16:15:00,1.22031,0.79625,19.125,2015-01-21,C,16.0,2015-01-21,2015-02-18
2015-01-06 16:10:00,VIX 150121C00017000,2015-01-06 16:10:00,1.22219,0.77236,19.825,2015-01-21,C,17.0,2015-01-21,2015-02-18


In [159]:
len(vx1_eod)

986

In [160]:
vx1_eod.isna().sum()

SYMBOL          0
TIMESTAMP       0
MID_VOL         0
DELTA           0
FWD_PRICE       0
exdate          0
cp_flag         0
strike_price    0
VIX_Term1       0
VIX_Term2       0
dtype: int64

### Add VIX futures prices to SPX options dataframe

In [164]:
vx1_opt_eod = spx_options_eod.merge(vx1_eod.iloc[:,4].to_frame(), left_index=True, right_index=True)

In [165]:
vx1_opt_eod.head()

Unnamed: 0_level_0,SPX_Strike1,SPX_Strike2,SPX_Option1,SPX_Option2,t1,t2,FWD_PRICE
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-01-02 16:10:00,2060.0,2055.0,0.132675,0.14268,11.722222,32.722222,17.725
2015-01-02 16:15:00,2060.0,2055.0,0.1351,0.14226,11.71875,32.71875,17.825
2015-01-05 16:10:00,2020.0,2020.0,0.164065,0.161785,10.722222,31.722222,18.925
2015-01-05 16:15:00,2020.0,2020.0,0.166455,0.16144,10.71875,31.71875,19.125
2015-01-06 16:10:00,2005.0,2000.0,0.17523,0.1723,9.722222,30.722222,19.825


#### Screen for occurrences where 1st implied vol is trading at a sharply higher level than the 2nd implied vol

In [166]:
a = (vx1_opt_eod['SPX_Option2']**2 *vx1_opt_eod['t2']/252 - vx1_opt_eod['SPX_Option1']**2 *vx1_opt_eod['t1']/252)
vx1_opt_eod[a<0]

Unnamed: 0_level_0,SPX_Strike1,SPX_Strike2,SPX_Option1,SPX_Option2,t1,t2,FWD_PRICE
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1


#### Set threshold screen and remove instances (individual timestamps) where it is violated

In [167]:
inside = (vx1_opt_eod['SPX_Option2']**2 *vx1_opt_eod['t2']/252 - vx1_opt_eod['SPX_Option1']**2 *vx1_opt_eod['t1']/252)

# Remove instances where the time-weighted implied variance of Term 1 > time-weighted implied variance of Term 2
# ie, cases where subtracting the first term from the second produces a negative value
vx1_opt_eod = vx1_opt_eod[inside > 0]

### Calculate synthetic VIX futures price from SPX implied vols & add it to dataframe

In [168]:
vx1_opt_eod['OPT'] = 100*(np.sqrt((vx1_opt_eod['SPX_Option2']**2 *vx1_opt_eod['t2']/252 - vx1_opt_eod['SPX_Option1']**2\
                                   *vx1_opt_eod['t1']/252) /(vx1_opt_eod['t2']/252 - vx1_opt_eod['t1']/252)))

In [169]:
vx1_opt_eod['VX1-OPT_Spread'] = vx1_opt_eod['FWD_PRICE'] - vx1_opt_eod['OPT']

In [170]:
vx1_opt_eod.head()

Unnamed: 0_level_0,SPX_Strike1,SPX_Strike2,SPX_Option1,SPX_Option2,t1,t2,FWD_PRICE,OPT,VX1-OPT_Spread
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2015-01-02 16:10:00,2060.0,2055.0,0.132675,0.14268,11.722222,32.722222,17.725,14.797089,2.927911
2015-01-02 16:15:00,2060.0,2055.0,0.1351,0.14226,11.71875,32.71875,17.825,14.610308,3.214692
2015-01-05 16:10:00,2020.0,2020.0,0.164065,0.161785,10.722222,31.722222,18.925,16.060839,2.864161
2015-01-05 16:15:00,2020.0,2020.0,0.166455,0.16144,10.71875,31.71875,19.125,15.881923,3.243077
2015-01-06 16:10:00,2005.0,2000.0,0.17523,0.1723,9.722222,30.722222,19.825,17.092651,2.732349


#### Save SPX options & VIX futures EOD datframe to pickle file

In [171]:
vx1_opt_eod.to_pickle(os.path.join(pickle_filepath, 'vx1_opt_eod.pkl'))

### Finish Parsing VIX options data

In [172]:
vix_intra_eod.head()

Unnamed: 0,SYMBOL,TIMESTAMP,MID_VOL,DELTA,FWD_PRICE,exdate,cp_flag,strike_price,VIX_Term1,VIX_Term2
766,VIX 150121C00014500,2015-01-02 16:10:00,1.00348,0.83967,17.725,2015-01-21,C,14.5,2015-01-21,2015-02-18
842,VIX 150121P00014500,2015-01-02 16:10:00,0.97725,-0.15528,17.725,2015-01-21,P,14.5,2015-01-21,2015-02-18
768,VIX 150121C00015000,2015-01-02 16:10:00,1.072,0.7899,17.725,2015-01-21,C,15.0,2015-01-21,2015-02-18
844,VIX 150121P00015000,2015-01-02 16:10:00,1.00589,-0.19941,17.725,2015-01-21,P,15.0,2015-01-21,2015-02-18
770,VIX 150121C00016000,2015-01-02 16:10:00,1.11977,0.70182,17.725,2015-01-21,C,16.0,2015-01-21,2015-02-18


#### Calculate individual trade dates from TIMESTAMP

In [111]:
trade_dates = pd.DataFrame(np.unique(vix_intra_eod['TIMESTAMP'].apply(lambda x: x.strftime('%Y-%m-%d'))))

In [112]:
trade_dates = np.array([pd.Timestamp(x) for x in np.unique(trade_dates)])

#### Add additional trade dates to dataset (pre-Jan 2015)

In [113]:
extra_dates = pd.date_range('2014-12-16', '2014-12-31', freq=pd.tseries.offsets.BDay()).tolist()

#### Remove Christmas as a trade date

In [114]:
extra_dates.pop(7)

Timestamp('2014-12-25 00:00:00', freq='B')

In [115]:
extra_dates

[Timestamp('2014-12-16 00:00:00', freq='B'),
 Timestamp('2014-12-17 00:00:00', freq='B'),
 Timestamp('2014-12-18 00:00:00', freq='B'),
 Timestamp('2014-12-19 00:00:00', freq='B'),
 Timestamp('2014-12-22 00:00:00', freq='B'),
 Timestamp('2014-12-23 00:00:00', freq='B'),
 Timestamp('2014-12-24 00:00:00', freq='B'),
 Timestamp('2014-12-26 00:00:00', freq='B'),
 Timestamp('2014-12-29 00:00:00', freq='B'),
 Timestamp('2014-12-30 00:00:00', freq='B'),
 Timestamp('2014-12-31 00:00:00', freq='B')]

#### Add 2nd additional set of trade dates to dataset (post-Dec 2016)

In [116]:
extra_dates_2 = pd.date_range('2017-01-01', '2017-01-20', freq=pd.tseries.offsets.BDay()).tolist()

#### Remove MLK Holiday as a trade date

In [117]:
extra_dates_2.pop(10)

Timestamp('2017-01-16 00:00:00', freq='B')

In [118]:
extra_dates_2

[Timestamp('2017-01-02 00:00:00', freq='B'),
 Timestamp('2017-01-03 00:00:00', freq='B'),
 Timestamp('2017-01-04 00:00:00', freq='B'),
 Timestamp('2017-01-05 00:00:00', freq='B'),
 Timestamp('2017-01-06 00:00:00', freq='B'),
 Timestamp('2017-01-09 00:00:00', freq='B'),
 Timestamp('2017-01-10 00:00:00', freq='B'),
 Timestamp('2017-01-11 00:00:00', freq='B'),
 Timestamp('2017-01-12 00:00:00', freq='B'),
 Timestamp('2017-01-13 00:00:00', freq='B'),
 Timestamp('2017-01-17 00:00:00', freq='B'),
 Timestamp('2017-01-18 00:00:00', freq='B'),
 Timestamp('2017-01-19 00:00:00', freq='B'),
 Timestamp('2017-01-20 00:00:00', freq='B')]

#### Combine and sort additional trade dates

In [119]:
trade_dates = np.append(trade_dates, extra_dates)

In [120]:
trade_dates = np.append(trade_dates, extra_dates_2)

In [121]:
trade_dates.sort()

In [123]:
len(trade_dates)

518

In [127]:
#trade_dates[0:20]
trade_dates[-20:]

array([Timestamp('2016-12-22 00:00:00'), Timestamp('2016-12-23 00:00:00'),
       Timestamp('2016-12-27 00:00:00'), Timestamp('2016-12-28 00:00:00'),
       Timestamp('2016-12-29 00:00:00'), Timestamp('2016-12-30 00:00:00'),
       Timestamp('2017-01-02 00:00:00', freq='B'),
       Timestamp('2017-01-03 00:00:00', freq='B'),
       Timestamp('2017-01-04 00:00:00', freq='B'),
       Timestamp('2017-01-05 00:00:00', freq='B'),
       Timestamp('2017-01-06 00:00:00', freq='B'),
       Timestamp('2017-01-09 00:00:00', freq='B'),
       Timestamp('2017-01-10 00:00:00', freq='B'),
       Timestamp('2017-01-11 00:00:00', freq='B'),
       Timestamp('2017-01-12 00:00:00', freq='B'),
       Timestamp('2017-01-13 00:00:00', freq='B'),
       Timestamp('2017-01-17 00:00:00', freq='B'),
       Timestamp('2017-01-18 00:00:00', freq='B'),
       Timestamp('2017-01-19 00:00:00', freq='B'),
       Timestamp('2017-01-20 00:00:00', freq='B')], dtype=object)

In [128]:
def find_vixOptions(df):
    prev_term = expiration[expiration['vx1'] == df['VIX_Term1'].iloc[0]].index[0] - 1
    term1 = df[df['exdate'] == df['VIX_Term1'].iloc[0]]
    term2 = df[df['exdate'] == df['VIX_Term2'].iloc[0]]
    #t1 = len(trade_dates[(trade_dates > df['TIMESTAMP'].iloc[0]) & ( trade_dates <= df['Term_1'].iloc[0] )])
    ##t2 = len(trade_dates[(trade_dates > df['TIMESTAMP'].iloc[0]) & ( trade_dates <= df['Term_2'].iloc[0] )])
    
    # Calculate number of total trading dates between previous & current VIX expiration dates
    tdates_per_expiry = len(trade_dates[(trade_dates >= expiration.iloc[prev_term]['vx1'])\
                                         & ( trade_dates < df['VIX_Term1'].iloc[0] )])
    
    # Calculate current timestamp & future VIX expirys out to the hour & minute 
    # This approach allows for a more continuous view of Time to Expiry (TTE)
    partial_day = (df['TIMESTAMP'].iloc[0].hour + df['TIMESTAMP'].iloc[0].minute/60)/24 - 9.5/24
    t1 = len(trade_dates[(trade_dates > df['TIMESTAMP'].iloc[0])\
                         & ( trade_dates <= df['VIX_Term1'].iloc[0] )]) - partial_day
    t2 = len(trade_dates[(trade_dates >= expiration.iloc[prev_term]['vx1'])\
                         & (trade_dates < df['VIX_Term1'].iloc[0])]) - partial_day
    
    if term1.shape[0] == 0 or term2.shape[0] == 0:
        return pd.Series([np.nan,np.nan,np.nan,np.nan,t1,t2,tdates_per_expiry], index=['VIX_Strike1','VIX_Strike2',\
                                                                    'VIX_Option1','VIX_Option2','t1','t2','trade_dates/expiry'])
        
    #print(df['date'].iloc[0])
    strike_list1 = np.unique(term1['strike_price'])
    #print(strike_list1)
    mindelta1 = 5
    for strikeIndx, strike in enumerate(strike_list1): 
        x = np.abs(term1[term1['strike_price'] == strike]['DELTA'].sum())
        if x < mindelta1:
            mindelta1 = x
            minStrike1 = strike
            
    strike_list2 = np.unique(term2['strike_price'])
    mindelta2 = 5
    for strikeIndx, strike in enumerate(strike_list2): 
        x = np.abs(term2[term2['strike_price'] == strike]['DELTA'].sum())
        if x < mindelta2:
            mindelta2 = x
            minStrike2 = strike
            
    avg1 = term1[term1['strike_price'] == minStrike1]['MID_VOL'].mean()
    avg2 = term2[term2['strike_price'] == minStrike2]['MID_VOL'].mean()
    
    return pd.Series([minStrike1, minStrike2, avg1, avg2, t1, t2, tdates_per_expiry],\
                     index=['VIX_Strike1', 'VIX_Strike2','VIX_Option1', 'VIX_Option2','t1','t2','trade_dates/expiry']) 

#### Apply 'findOptions' function to vix_opt dataframe to determine correct ATM implied vol of VIX options for each timestamp

In [129]:
vix_eod_vol = vix_intra_eod.groupby('TIMESTAMP').apply(find_vixOptions)

In [131]:
vix_eod_vol.tail()

Unnamed: 0_level_0,VIX_Strike1,VIX_Strike2,VIX_Option1,VIX_Option2,t1,t2,trade_dates/expiry
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-12-28 16:15:00,15.0,17.0,0.9192,0.87278,13.71875,17.71875,18.0
2016-12-29 16:10:00,16.0,17.0,0.98629,0.860325,12.722222,17.722222,18.0
2016-12-29 16:15:00,15.0,17.0,0.904865,0.87054,12.71875,17.71875,18.0
2016-12-30 16:10:00,16.0,17.0,1.01232,0.867175,11.722222,17.722222,18.0
2016-12-30 16:15:00,16.0,17.0,1.012445,0.86722,11.71875,17.71875,18.0


#### Calculate Time-Weighted average of implied vol between 1st & 2nd VIX options contracts

In [132]:
vix_eod_vol['Weight_Opt'] = (vix_eod_vol['VIX_Option1']*(vix_eod_vol['t1']/vix_eod_vol['trade_dates/expiry'])) + \
                        (vix_eod_vol['VIX_Option2']*(1-(vix_eod_vol['t1']/vix_eod_vol['trade_dates/expiry'])))

In [133]:
vix_eod_vol.head()

Unnamed: 0_level_0,VIX_Strike1,VIX_Strike2,VIX_Option1,VIX_Option2,t1,t2,trade_dates/expiry,Weight_Opt
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2015-01-02 16:10:00,19.0,20.0,1.349465,1.094165,11.722222,21.722222,22.0,1.230196
2015-01-02 16:15:00,19.0,20.0,1.34344,1.085335,11.71875,21.71875,22.0,1.22282
2015-01-05 16:10:00,20.0,21.0,1.54275,1.1925,10.722222,21.722222,22.0,1.363203
2015-01-05 16:15:00,20.0,21.0,1.55063,1.192595,10.71875,21.71875,22.0,1.367035
2015-01-06 16:10:00,21.0,21.0,1.538645,1.15383,9.722222,21.722222,22.0,1.323887


#### Check for any Null values (NaNs)

In [134]:
vix_eod_vol.isna().sum()

VIX_Strike1           1
VIX_Strike2           1
VIX_Option1           1
VIX_Option2           1
t1                    0
t2                    0
trade_dates/expiry    0
Weight_Opt            1
dtype: int64

In [135]:
vix_eod_vol[vix_eod_vol['VIX_Strike1'].isna()]

Unnamed: 0_level_0,VIX_Strike1,VIX_Strike2,VIX_Option1,VIX_Option2,t1,t2,trade_dates/expiry,Weight_Opt
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2015-02-13 16:10:00,,,,,1.722222,18.722222,19.0,


In [136]:
vix_eod_vol = vix_eod_vol[~vix_eod_vol['VIX_Strike1'].isna()]

#### Save/Retrieve vix_vol dataframe to/from pickle file 

In [137]:
vix_eod_vol.to_pickle(pickle_filepath + r'\vix_eod_vol_2terms.pkl')

### Finish parsing SPX Cash Data

In [9]:
spx_cash_eod = spx_intra_eod[['TIMESTAMP', 'UPRICE_BID', 'UPRICE_ASK']].copy()

In [10]:
spx_cash_eod.head()

Unnamed: 0,TIMESTAMP,UPRICE_BID,UPRICE_ASK
70,2015-01-02 16:10:00,2058.19995,2058.19995
71,2015-01-02 16:15:00,2058.19995,2058.19995
72,2015-01-02 16:10:00,2058.19995,2058.19995
73,2015-01-02 16:15:00,2058.19995,2058.19995
74,2015-01-02 16:10:00,2058.19995,2058.19995


In [11]:
len(spx_cash_eod)

133288

In [12]:
spx_cash_eod = spx_cash_eod.drop_duplicates(subset=['TIMESTAMP'])

In [13]:
len(spx_cash_eod)

986

In [14]:
spx_cash_eod.sort_values(by =['TIMESTAMP'], inplace = True)

In [15]:
spx_cash_eod['UPRICE_MID'] = (spx_cash_eod['UPRICE_BID'] + spx_cash_eod['UPRICE_ASK']) / 2

In [16]:
spx_cash_eod.tail()

Unnamed: 0,TIMESTAMP,UPRICE_BID,UPRICE_ASK,UPRICE_MID
529167,2016-12-28 16:15:00,2249.91992,2249.91992,2249.91992
530364,2016-12-29 16:10:00,2249.26001,2249.26001,2249.26001
530367,2016-12-29 16:15:00,2249.26001,2249.26001,2249.26001
531556,2016-12-30 16:10:00,2238.83008,2238.83008,2238.83008
531557,2016-12-30 16:15:00,2238.83008,2238.83008,2238.83008


In [17]:
spx_cash_eod.set_index('TIMESTAMP', inplace=True)

In [19]:
spx_cash_eod.tail()

Unnamed: 0_level_0,UPRICE_BID,UPRICE_ASK,UPRICE_MID
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-12-28 16:15:00,2249.91992,2249.91992,2249.91992
2016-12-29 16:10:00,2249.26001,2249.26001,2249.26001
2016-12-29 16:15:00,2249.26001,2249.26001,2249.26001
2016-12-30 16:10:00,2238.83008,2238.83008,2238.83008
2016-12-30 16:15:00,2238.83008,2238.83008,2238.83008


#### Save final intraday SPX Price Returns dataset to pickle 

In [18]:
spx_cash_eod.to_pickle(pickle_filepath + r'\spx_returns_eod.pkl')