### IMPORT DATA

In [2]:
# %%
import os
import pandas as pd
import numpy as np

DATA_DIR = "../data"

# Read train/test feature files (keeping Date temporarily to filter)
X_train = pd.read_csv(os.path.join(DATA_DIR, "X_train.csv"))
X_test  = pd.read_csv(os.path.join(DATA_DIR, "X_test.csv"))
y_train = pd.read_csv(os.path.join(DATA_DIR, "y_train.csv"))["0"]
y_test  = pd.read_csv(os.path.join(DATA_DIR, "y_test.csv"))["0"]

# Now drop Date column (model shouldn't see raw dates)
X_train = X_train.drop(columns=["Date"])
X_test  = X_test.drop(columns=["Date"])

print("After shifting and dropping 1990 rows:")
print("Shapes:", X_train.shape, X_test.shape, y_train.shape, y_test.shape)
print("Example columns:", list(X_train.columns)[:10])
print("Unique y_train values:", np.unique(y_train))

After shifting and dropping 1990 rows:
Shapes: (4034, 77) (3018, 77) (4034,) (3018,)
Example columns: ['VIX_Close', 'ADS_Index', 'RECBARS', 'fft', '3mth', '10yr', '30yr', 'Aaa', 'Baa', 'term_spread']
Unique y_train values: [-1.  0.  1.]


In [3]:
# drop cols

# %%
# Select only the 6 predictors used for ANN, each with 3 lags → 18 features total
reduced6 = ["VIX_Close", "MACD", "term_spread", "corp_spread", "majcurr_ret", "DAX"]

# Build the expected lag feature names (lag1, lag2, lag3)
use_cols = [f"{c}_lag{l}" for c in reduced6 for l in (1, 2, 3)]

# Keep only columns that actually exist in our dataset
use_cols = [c for c in use_cols if c in X_train.columns]

# Restrict to this reduced feature set
X_train = X_train[use_cols].copy()
X_test  = X_test[use_cols].copy()

print("After selecting ANN feature set (6 predictors × 3 lags):")
print("X_train shape:", X_train.shape)
print("Example features:", use_cols[:12])

After selecting ANN feature set (6 predictors × 3 lags):
X_train shape: (4034, 18)
Example features: ['VIX_Close_lag1', 'VIX_Close_lag2', 'VIX_Close_lag3', 'MACD_lag1', 'MACD_lag2', 'MACD_lag3', 'term_spread_lag1', 'term_spread_lag2', 'term_spread_lag3', 'corp_spread_lag1', 'corp_spread_lag2', 'corp_spread_lag3']


### RUN ANN

In [6]:
# %% Final ANN (single run, paper’s best hyperparams) — time-safe validation (no leakage)
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, regularizers, optimizers, callbacks, models
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)
tf.random.set_seed(RANDOM_STATE)

# 1️⃣  Select reduced 6 × 3 lags = 18 features
reduced6 = ["VIX_Close", "MACD", "term_spread", "corp_spread", "majcurr_ret", "DAX"]
feat_cols = [f"{c}_lag{l}" for c in reduced6 for l in (1, 2, 3)]
feat_cols = [c for c in feat_cols if c in X_train.columns]

Xtr = X_train[feat_cols].copy()
Xte = X_test[feat_cols].copy()

# Map {-1,0,1} → {0,1,2}
y_map = {-1: 0, 0: 1, 1: 2}
ytr = y_train.map(y_map).astype(int).values
yte = y_test.map(y_map).astype(int).values

# 2️⃣  Scale features (fit on train only)
scaler = StandardScaler()
Xtr_s = scaler.fit_transform(Xtr)
Xte_s = scaler.transform(Xte)

# 3️⃣  Chronological validation split (last 10% of training), no shuffling
n = len(Xtr_s)
val_size = max(1, int(0.10 * n))
X_tr_s, X_va_s = Xtr_s[: n - val_size], Xtr_s[n - val_size :]
y_tr,   y_va   = ytr   [: n - val_size], ytr   [n - val_size :]

# 4️⃣  Build the model with paper’s settings
model = models.Sequential([
    layers.Input(shape=(Xtr_s.shape[1],)),
    layers.Dense(5, activation="tanh", kernel_regularizer=regularizers.l2(0.8)),
    layers.Dense(3, activation="softmax")
])
model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-3),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

# 5️⃣  Train once on full training data (time-safe val), no shuffle
es = callbacks.EarlyStopping(
    monitor="val_accuracy", patience=20, restore_best_weights=True, verbose=1
)
history = model.fit(
    X_tr_s, y_tr,
    validation_data=(X_va_s, y_va),
    epochs=500,
    batch_size=64,
    shuffle=False,   # critical for time series
    verbose=1,
    callbacks=[es]
)

# 6️⃣  Evaluate
train_acc = model.evaluate(Xtr_s, ytr, verbose=0)[1]
test_preds = model.predict(Xte_s, verbose=0).argmax(axis=1)
test_acc = accuracy_score(yte, test_preds)

print(f"In-sample accuracy:  {train_acc:.4f}")
print(f"Out-of-sample accuracy: {test_acc:.4f}")

Epoch 1/500
 1/57 [..............................] - ETA: 18s - loss: 7.3756 - accuracy: 0.2969

2025-10-16 18:13:15.856695: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2025-10-16 18:13:15.881893: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp_2.


Epoch 2/500
 1/57 [..............................] - ETA: 0s - loss: 5.5657 - accuracy: 0.2812

2025-10-16 18:13:16.714282: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 34: early stopping


2025-10-16 18:13:45.022222: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


In-sample accuracy:  0.5248
Out-of-sample accuracy: 0.4559
