In [548]:
import pandas as pd
import numpy as np
import xgboost as xgb

In [549]:
df_data = pd.read_csv('dix.csv', names=['Date','SP500','DIX','GEX'], index_col='Date', parse_dates=True, header=0)
df_vix = pd.read_csv('vixcurrent.csv', skiprows=1, index_col='Date', parse_dates=True)
df_vix = df_vix['VIX Close'].rename('VIX')

df_data = df_data.join(df_vix, how='left')

#df_data = df_data.tail(500)

def get_signal(x):
    ret = np.exp(x) - 1
    if ret > 0.008:
        return 1
    elif ret < -0.008:
        return -1
    else:
        return 0

def get_flip(row):
    a = row['GEX dir']
    b = row['GEX last dir']
    
    if a > 0 and b < 0:
        return 1
    elif a < 0 and b > 0:
        return -1
    else:
        return 0

def process_data(df):
    
    df['SP500 r'] = 100 * df['SP500'].pct_change()
    for col in ['DIX', 'GEX', 'VIX']:
        df[col + ' diff'] = df[col].diff()
    df['SP500 r next'] = df['SP500 r'].shift(periods=-1)
    #df['signal'] = df['SP500 lr next'].map(get_signal)
    df['GEX dir'] = df['GEX'].map(lambda x : -1 if x < 0 else 1)
    df['GEX last dir'] = df['GEX dir'].shift(periods=1)
    df['GEX flip'] = df[['GEX dir', 'GEX last dir']].apply(get_flip, axis=1)
    
    df.drop('SP500', axis=1, inplace=True)
    
    return df.iloc[1:]

df_data = process_data(df_data)
#df_data['signal'].value_counts()
df_data.head()

Unnamed: 0_level_0,DIX,GEX,VIX,SP500 r,DIX diff,GEX diff,VIX diff,SP500 r next,GEX dir,GEX last dir,GEX flip
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2011-05-03,0.383411,1859731000.0,16.7,-0.33793,0.004569,-37581920.0,0.71,-0.685531,1,1.0,0
2011-05-04,0.392122,1717764000.0,17.08,-0.685531,0.008711,-141966800.0,0.38,-0.906984,1,1.0,0
2011-05-05,0.405457,1361864000.0,18.2,-0.906984,0.013335,-355899600.0,1.12,0.381992,1,1.0,0
2011-05-06,0.418649,1490329000.0,18.4,0.381992,0.013192,128464300.0,0.2,0.454416,1,1.0,0
2011-05-09,0.410321,1677059000.0,17.16,0.454416,-0.008328,186730100.0,-1.24,0.807404,1,1.0,0


In [550]:
features = [
    'DIX',
    'GEX',
    'GEX flip',
    'VIX',
    'DIX diff',
    'SP500 r'
]

target = 'SP500 r next'

prediction_size = 10
labelled_size = len(df_data) - prediction_size

X_to_predict = df_data.tail(prediction_size)[features]

df_labelled = df_data.head(labelled_size)

In [551]:
model = xgb.XGBRegressor(objective='reg:squarederror', learning_rate=0.05, n_estimators=300)

X_train = df_labelled[features]
y_train = df_labelled[target]

model.fit(X_train, y_train)



XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0,
             importance_type='gain', learning_rate=0.05, max_delta_step=0,
             max_depth=3, min_child_weight=1, missing=None, n_estimators=300,
             n_jobs=1, nthread=None, objective='reg:squarederror',
             random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
             seed=None, silent=None, subsample=1, verbosity=1)

In [552]:
#df_data['Returns'] = np.exp(df_data['SP500 lr next']) - 1
df_data.tail(prediction_size)

Unnamed: 0_level_0,DIX,GEX,VIX,SP500 r,DIX diff,GEX diff,VIX diff,SP500 r next,GEX dir,GEX last dir,GEX flip
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-03-24,0.402208,600220000.0,61.67,9.382766,0.035637,668023500.0,0.08,1.153502,1,-1.0,1
2020-03-25,0.461185,-157646700.0,63.95,1.153502,0.058977,-757866600.0,2.28,6.241416,-1,1.0,-1
2020-03-26,0.480941,1106730000.0,61.0,6.241416,0.019756,1264376000.0,-2.95,-3.368732,1,-1.0,1
2020-03-27,0.482826,-289385500.0,65.54,-3.368732,0.001885,-1396115000.0,4.54,3.351604,-1,1.0,-1
2020-03-30,0.47274,98620250.0,57.08,3.351604,-0.010086,388005700.0,-8.46,-1.601279,1,-1.0,1
2020-03-31,0.497417,-148210800.0,53.54,-1.601279,0.024677,-246831100.0,-3.54,-4.41424,-1,1.0,-1
2020-04-01,0.511401,-772850000.0,57.06,-4.41424,0.013985,-624639200.0,3.52,2.282939,-1,-1.0,0
2020-04-02,0.485221,-507398400.0,50.91,2.282939,-0.02618,265451600.0,-6.15,-1.513712,-1,-1.0,0
2020-04-03,0.506475,-463079700.0,46.8,-1.513712,0.021254,44318740.0,-4.11,7.03313,-1,-1.0,0
2020-04-06,0.446698,1555165000.0,45.24,7.03313,-0.059777,2018244000.0,-1.56,,1,-1.0,1


In [553]:
pred = model.predict(X_to_predict)
growth = np.exp(pred) - 1
#X_to_predict['Projected lr'] = pred
X_to_predict['Projected Returns'] = pred
X_to_predict

Unnamed: 0_level_0,DIX,GEX,GEX flip,VIX,DIX diff,SP500 r,Projected Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-03-24,0.402208,600220000.0,1,61.67,0.035637,9.382766,-9.650722
2020-03-25,0.461185,-157646700.0,-1,63.95,0.058977,1.153502,-2.408029
2020-03-26,0.480941,1106730000.0,1,61.0,0.019756,6.241416,-3.030511
2020-03-27,0.482826,-289385500.0,-1,65.54,0.001885,-3.368732,-1.462933
2020-03-30,0.47274,98620250.0,1,57.08,-0.010086,3.351604,-0.389815
2020-03-31,0.497417,-148210800.0,-1,53.54,0.024677,-1.601279,-0.875075
2020-04-01,0.511401,-772850000.0,0,57.06,0.013985,-4.41424,0.345173
2020-04-02,0.485221,-507398400.0,0,50.91,-0.02618,2.282939,1.211602
2020-04-03,0.506475,-463079700.0,0,46.8,0.021254,-1.513712,-0.559591
2020-04-06,0.446698,1555165000.0,1,45.24,-0.059777,7.03313,-2.935199
