In [1]:
import pandas as pd
import trading_vix_utils
import numpy as np

In [2]:
#load data
index_data = pd.read_csv("^VIX.csv")
index_data = index_data.rename(columns = {"Date":"Date",\
                        "Open":"vix_index_open",\
                        "High":'vix_index_high',\
                        'Low':'vix_index_low',\
                        'Close':'vix_index_close',\
                        'Adj Close':'vix_index_adj_close',\
                        'Volume':'vix_index_volume'})

vix_price_data = pd.read_csv('VIXY.csv')
vix_price_data = vix_price_data.rename(columns = {"Date":"Date",\
                        "Open":"vix_price_open",\
                        "High":'vix_price_high',\
                        'Low':'vix_price_low',\
                        'Close':'vix_price_close',\
                        'Adj Close':'vix_price_adj_close',\
                        'Volume':'vix_price_volume'})

spy_price_data = pd.read_csv('SPY.csv')
spy_price_data = spy_price_data.rename(columns = {"Date":"Date",\
                        "Open":"spy_price_open",\
                        "High":'spy_price_high',\
                        'Low':'spy_price_low',\
                        'Close':'spy_price_close',\
                        'Adj Close':'spy_price_adj_close',\
                        'Volume':'spy_price_volume'})

total_data = pd.merge(index_data, vix_price_data, on="Date",how = 'inner')
total_data = pd.merge(total_data, spy_price_data, on="Date",how = 'inner')

In [3]:
#build features for vix based on vix6 trading environment
#compute the exponential moving average
mv_10 = total_data['vix_index_adj_close'].ewm(span = 10).mean()
mv_20 = total_data['vix_index_adj_close'].ewm(span = 20).mean()
mv_30 = total_data['vix_index_adj_close'].ewm(span = 30).mean()
mv_50 = total_data['vix_index_adj_close'].ewm(span = 50).mean()
mv_100 = total_data['vix_index_adj_close'].ewm(span = 100).mean()

spot_to_mv_10 = total_data['vix_index_adj_close']/mv_10
spot_to_mv_20 = total_data['vix_index_adj_close']/mv_20
spot_to_mv_30 = total_data['vix_index_adj_close']/mv_30
spot_to_mv_50 = total_data['vix_index_adj_close']/mv_50
spot_to_mv_100 = total_data['vix_index_adj_close']/mv_100

vix_measure = spot_to_mv_10+spot_to_mv_20+spot_to_mv_30+spot_to_mv_50+spot_to_mv_100
vix_measure_list = vix_measure.tolist()


index_feature_dataframe = pd.DataFrame()
index_feature_dataframe['vix_price_adj_close'] = total_data['vix_price_adj_close'][1:] #[1:] for matching counting_days
index_feature_dataframe['vix_adj_close'] = total_data['vix_index_adj_close'][1:]
index_feature_dataframe['mv_ratio'] = vix_measure_list[1:]
threshold_list = [5,6,7]
for threshold in threshold_list:
    counting_days = trading_vix_utils.day_counter_helper(vix_measure_list,threshold)
    index_feature_dataframe['days_since_'+str(threshold)] = counting_days

index_feature_dataframe = index_feature_dataframe.iloc[-1000:] #there may be a vix regime change in 2018/1??
index_feature_dataframe = index_feature_dataframe.reset_index(drop=True)

In [4]:
#build spy observation

spy_data_per_day = 1
spy_max_observation_history = 3
spy_temp_data_max_rows = total_data.shape[0]-spy_max_observation_history*spy_data_per_day+1
spy_temp_data = np.zeros((spy_temp_data_max_rows,spy_max_observation_history))

for end_interval_index in range(total_data.shape[0]-spy_temp_data_max_rows+1,total_data.shape[0]+1):
    price_history = total_data['spy_price_adj_close'][end_interval_index-spy_data_per_day*spy_max_observation_history\
                                                     :end_interval_index]
    spy_temp_data[end_interval_index-spy_data_per_day*spy_max_observation_history,:] = price_history
    
spy_observation_data_list = []
for row_index in range(0,spy_temp_data.shape[0]):
    spy_observation_data_list.append(spy_temp_data[row_index,:].tolist())

index_feature_dataframe['spy_observation'] = spy_observation_data_list[-1000:]#because there is a cut for vix

In [5]:
index_feature_dataframe.tail()

Unnamed: 0,vix_price_adj_close,vix_adj_close,mv_ratio,days_since_5,days_since_6,days_since_7,spy_observation
995,25.27,16.66,4.535959,3,29,102,"[414.920013, 420.859985, 423.109985]"
996,24.77,16.32,4.476456,4,30,103,"[420.859985, 423.109985, 422.600006]"
997,24.23,15.97,4.416975,5,31,104,"[423.109985, 422.600006, 425.100006]"
998,23.870001,15.62,4.35983,6,32,105,"[422.600006, 425.100006, 426.609985]"
999,23.91,15.76,4.432283,7,33,106,"[425.100006, 426.609985, 427.470001]"
