In [None]:
import numpy as np
import pandas as pd
import pandas_market_calendars as mcal
from sklearn.metrics import mean_squared_error, median_absolute_error
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler, QuantileTransformer
from tensorflow.keras.preprocessing import timeseries_dataset_from_array
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv2D
from tensorflow.keras.metrics import MeanSquaredError, RootMeanSquaredError
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

#### define the tickers and indicators used

In [None]:
# exchange tickers
tickers = list([
    "SPY",  # S&P 500 Index Fund
    "IWV",  # Russell 3000 Index Fund
    "QQQ",  # Technology Sector Fund
    "IYF",  # Financials Sector Fund
    "XLP",  # Consumer Staples Sector Fund
    "XLU",  # Utilities Sector Funds
    "XLV",  # Health Care Sector Funds
    "IGE",  # NA Natural Resources ETF
    "XLE"  # Energy Sector Fund
])

# alias and FRED indicator
indicators = dict({
    "3M_TBILL": "DTB3",  # 3-Month Treasury Bill: Secondary Market Rate
    "CPI": "MEDCPIM158SFRBCLE",  # Median Consumer Price Index
    "VIX": "VIXCLS",  # CBOE Volatility Index
    "INDP": "INDPRO",  # Industrial Production: Total Index
    "USHY_ADJ": "BAMLH0A0HYM2",  # ICE BofA US High Yield Index Option-Adjusted Spread
    "US_LEADING": "USSLIND",  # Leading Index for the United States
    "30Y_FRMTG": "MORTGAGE30US",  # 30-Year Fixed Rate Mortgage Average in the United States
    "15Y_FRMTG": "MORTGAGE15US",  # 15-Year Fixed Rate Mortgage Average in the United States
    "CPI_URBAN": "CUSR0000SEHA",  # Consumer Price Index for All Urban Consumers: Rent of Primary Residence in U.S. City Average
    "RETAIL": "RSAFS",  # Advance Retail Sales: Retail and Food Services, Total
    "PHARMA": "PCU32543254",  # Producer Price Index by Industry: Pharmaceutical and Medicine Manufacturing
    "UNEMP": "UNRATE",  # Unemployment Rate
    "UNEMP_PERM": "LNS13026638",  # Unemployment Level - Permanent Job Losers
    "UNEMP_MEN": "LNS14000001",  # Unemployment Rate - Men
    "UNEMP_WMN": "LNS14000002",  # Unemployment Rate - Women
    "UNEMP_WHT": "LNS14000003",  # Unemployment Rate - White
    "UNEMP_BLK": "LNS14000006",  # Unemployment Rate - Black or African American
    "UNEMP_HIS": "LNS14000009",  # Unemployment Rate - Hispanic or Latino
    "INC": "PI",  # Personal Income
    "INC_DISP": "DSPIC96",  # Real Disposable Personal Income
    "INC_DISP_PC": "A229RX0",  # Real Disposable Personal Income: Per Capita
    "TAX_HIGH": "IITTRHB",  # U.S Individual Income Tax: Tax Rates for Regular Tax: Highest Bracket
    "TAX_LOW": "IITTRLB"  # U.S Individual Income Tax: Tax Rates for Regular Tax: Lowest Bracket
})

In [None]:
features = []
for ticker in tickers:
    for calculation in ['RET', 'VOL']:
        features.append(f'{ticker}_1D_{calculation}')
    for timeframe in ['1W', '1M', '3M', '6M']:
        for calculation in ['RET', 'STD', 'VOL', 'GBM']:
            features.append(f'{ticker}_{timeframe}_{calculation}')
    for calculation in ['RET', 'STD', 'VOL']:
        features.append(f'{ticker}_1Y_{calculation}')
for indicator in indicators.keys():
    features.append(indicator)
len(features)

In [None]:
targets = [f'{ticker}_TARGET' for ticker in tickers]
targets

#### Split the data into training, testing, and validation sets

In [None]:
# construct a dictionary with all market data in divided into sets and features/targets
dates = mcal.get_calendar('NYSE').schedule(start_date='2004-01-01', end_date='2020-12-31').index
market_data = dict({
    "X" : pd.read_pickle("data/market_data.zip").loc[:, features],
    "y" : pd.read_pickle("data/market_data.zip").loc[:, targets]
})
market_data["X_train"] = market_data["X"].loc['2004-01-01':'2015-12-31', :]
market_data["y_train"] = market_data["y"].loc['2004-01-01':'2015-12-31', :]
market_data["X_test"] = market_data["X"].loc['2016-01-01':'2020-12-31', :]
market_data["y_test"] = market_data["y"].loc['2016-01-01':'2020-12-31', :]
market_data["X"] = market_data["X"].loc['2004-01-01':'2020-12-31', :]
market_data["y"] = market_data["y"].loc['2004-01-01':'2020-12-31', :]

# Create split on train_all with -1 for training data and 0 for validation data (data after '2013-01-01')
split = PredefinedSplit(test_fold=[0 if v else -1 for v in market_data["X_train"].index < '2013-01-01'])

In [None]:
# Create a 3D input with each row being #feature_columns x #time_steps
def create_dataset (X, y, time_steps = 21):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(np.array(X[i:i + time_steps, :]))
        ys.append(y[i + time_steps])
    return np.array(Xs), np.array(ys)
X_3D, y_3D = create_dataset(np.array(market_data["X"]), np.array(market_data["y"]))
X_3D[2]

In [None]:
np.array(market_data["X"].iloc[2+21,:])

#### define the Keras CNN

In [None]:
def build_CNN():
    cnn = Sequential()
    cnn.add(Conv2D(filters=9, kernel_size=1, input_shape=(128, 128, 3)))
