In [1]:
from typing import Any, Union, List, Tuple
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR

In [2]:
feature_columns = list([
    'SPY_1DRET', 'SPY_1MRET', 'SPY_6MRET', 'SPY_1YRET', 'SPY_1DVOL', 'SPY_1WVOL', 'SPY_1MVOL', 
    'IWV_1DRET', 'IWV_1MRET', 'IWV_6MRET', 'IWV_1YRET', 'IWV_1DVOL', 'IWV_1WVOL', 'IWV_1MVOL', 
    'QQQ_1DRET', 'QQQ_1MRET', 'QQQ_6MRET', 'QQQ_1YRET', 'QQQ_1DVOL', 'QQQ_1WVOL', 'QQQ_1MVOL', 
    'IYF_1DRET', 'IYF_1MRET', 'IYF_6MRET', 'IYF_1YRET', 'IYF_1DVOL', 'IYF_1WVOL', 'IYF_1MVOL', 
    'XRT_1DRET', 'XRT_1MRET', 'XRT_6MRET', 'XRT_1YRET', 'XRT_1DVOL', 'XRT_1WVOL', 'XRT_1MVOL', 
    'XLP_1DRET', 'XLP_1MRET', 'XLP_6MRET', 'XLP_1YRET', 'XLP_1DVOL', 'XLP_1WVOL', 'XLP_1MVOL', 
    'XLU_1DRET', 'XLU_1MRET', 'XLU_6MRET', 'XLU_1YRET', 'XLU_1DVOL', 'XLU_1WVOL', 'XLU_1MVOL', 
    'XLV_1DRET', 'XLV_1MRET', 'XLV_6MRET', 'XLV_1YRET', 'XLV_1DVOL', 'XLV_1WVOL', 'XLV_1MVOL', 
    'IYT_1DRET', 'IYT_1MRET', 'IYT_6MRET', 'IYT_1YRET', 'IYT_1DVOL', 'IYT_1WVOL', 'IYT_1MVOL', 
    'GLD_1DRET', 'GLD_1MRET', 'GLD_6MRET', 'GLD_1YRET', 'GLD_1DVOL', 'GLD_1WVOL', 'GLD_1MVOL', 
    'SLV_1DRET', 'SLV_1MRET', 'SLV_6MRET', 'SLV_1YRET', 'SLV_1DVOL', 'SLV_1WVOL', 'SLV_1MVOL', 
    'MXI_1DRET', 'MXI_1MRET', 'MXI_6MRET', 'MXI_1YRET', 'MXI_1DVOL', 'MXI_1WVOL', 'MXI_1MVOL', 
    'IGE_1DRET', 'IGE_1MRET', 'IGE_6MRET', 'IGE_1YRET', 'IGE_1DVOL', 'IGE_1WVOL', 'IGE_1MVOL', 
    'XLE_1DRET', 'XLE_1MRET', 'XLE_6MRET', 'XLE_1YRET', 'XLE_1DVOL', 'XLE_1WVOL', 'XLE_1MVOL', 
    '3M_TBILL', 'CPI', 'VIX', 'INDP', 'USHY_ADJ', 'US_LEADING', '30Y_FRMTG', '15Y_FRMTG', 'CPI_URBAN', 
    'RETAIL', 'PHARMA', 'UNEMP', 'UNEMP_PERM', 'UNEMP_MEN', 'UNEMP_WMN', 'UNEMP_WHT', 'UNEMP_BLK', 
    'UNEMP_HIS', 'INC', 'INC_DISP', 'INC_DISP_PC', 'TAX_HIGH', 'TAX_LOW'
])

""" 
We will use all 25 target columns in the neural network, for now we will use only the SPY 1 month return

target_columns = list([
    'SPY_1MRET', 'IWV_1MRET', 'QQQ_1MRET', 'IYF_1MRET', 'XRT_1MRET', 'XLP_1MRET', 'XLU_1MRET', 
    'XLV_1MRET', 'IYT_1MRET', 'GLD_1MRET', 'SLV_1MRET', 'MXI_1MRET', 'IGE_1MRET', 'XLE_1MRET',
    '3M_TBILL', 'CPI', 'VIX', 'INDP', 'USHY_ADJ', '30Y_FRMTG', '15Y_FRMTG', 'RETAIL', 'PHARMA', 'UNEMP', 'INC'
])
"""
target_columns = list(['SPY_1MRET'])


# construct a dictionary with all market data in divided into sets and features/targets
market_data = dict({
    "X" : pd.read_pickle("data/market_data.zip").loc[:, feature_columns],
    "y" : pd.read_pickle("data/market_data.zip").loc[:, target_columns].shift(-30)  # shift back by 1 month
})
market_data["X_train"] = market_data["X"].loc['2008-01-01':'2014-12-31', :]
market_data["y_train"] = market_data["y"].loc['2008-01-01':'2014-12-31', :]
market_data["X_train_all"] = market_data["X"].loc['2008-01-01':'2016-12-31', :]
market_data["y_train_all"] = market_data["y"].loc['2008-01-01':'2016-12-31', :]
market_data["X_held_out"] = market_data["X"].loc['2015-01-01':'2016-12-31', :]
market_data["y_held_out"] = market_data["y"].loc['2015-01-01':'2016-12-31', :]
market_data["X_test"] = market_data["X"].loc['2017-01-01':'2020-12-31', :]
market_data["y_test"] = market_data["y"].loc['2017-01-01':'2020-12-31', :]


# scale all data based on the training set (excluding the validation set)
X_scaler = StandardScaler()
y_scaler = StandardScaler()
X_scaler.fit(market_data["X_train"])
y_scaler.fit(market_data["y_train"])

market_data["X_train"] = pd.DataFrame(X_scaler.transform(market_data["X_train"]), index=market_data["X_train"].index, columns=feature_columns)
market_data["y_train"] = pd.DataFrame(y_scaler.transform(market_data["y_train"]), index=market_data["y_train"].index, columns=target_columns)
market_data["X_train_all"] = pd.DataFrame(X_scaler.transform(market_data["X_train_all"]), index=market_data["X_train_all"].index, columns=feature_columns)
market_data["y_train_all"] = pd.DataFrame(y_scaler.transform(market_data["y_train_all"]), index=market_data["y_train_all"].index, columns=target_columns)
market_data["X_held_out"] = pd.DataFrame(X_scaler.transform(market_data["X_held_out"]), index=market_data["X_held_out"].index, columns=feature_columns)
market_data["y_held_out"] = pd.DataFrame(y_scaler.transform(market_data["y_held_out"]), index=market_data["y_held_out"].index, columns=target_columns)
market_data["X_test"] = pd.DataFrame(X_scaler.transform(market_data["X_test"]), index=market_data["X_test"].index, columns=feature_columns)
market_data["y_test"] = pd.DataFrame(y_scaler.transform(market_data["y_test"]), index=market_data["y_test"].index, columns=target_columns)

In [3]:
# Create split on train_all with -1 for training data and 0 for validation data (data after '2015-01-01')
split = PredefinedSplit(test_fold=[-1 if i else 0 for i in market_data["X_train_all"].index >= '2015-01-01'])

# Use PredefinedSplit in GridSearchCV
param_grid = {
    "kernel": ["linear", "rbf", "sigmoid", "precomputed"], 
    "C": [0, 0.1, 1, 10]
}
model = GridSearchCV(estimator=SVR(cache_size=1000), cv=split, param_grid=param_grid)
model.fit(market_data["X_train_all"], market_data["y_train_all"])

pred_test = linear_svm.predict(market_data["X_test"])


  return f(**kwargs)
Traceback (most recent call last):
  File "/Users/Aaditya/opt/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/Aaditya/opt/anaconda3/lib/python3.8/site-packages/sklearn/svm/_base.py", line 217, in fit
    fit(X, y, sample_weight, solver_type, kernel, random_seed=seed)
  File "/Users/Aaditya/opt/anaconda3/lib/python3.8/site-packages/sklearn/svm/_base.py", line 268, in _dense_fit
    self._probB, self.fit_status_ = libsvm.fit(
  File "sklearn/svm/_libsvm.pyx", line 191, in sklearn.svm._libsvm.fit
ValueError: C <= 0

  return f(**kwargs)
Traceback (most recent call last):
  File "/Users/Aaditya/opt/anaconda3/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/Aaditya/opt/anaconda3/lib/python3.8/site-packages/sklearn/svm/_base.py", line 217

ValueError: X should be a square kernel matrix