In [27]:
# Minimal LSTM 3-class classifier for weekly returns (−1 / 0 / +1)
# ---------------------------------------------------------------
# Requirements:
# pip install numpy pandas scikit-learn tensorflow==2.* (or compatible)

import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks

# -------------------
# 1) Load your data
# -------------------
# Assume you already have a DataFrame `df` indexed by weekly dates like the sample you posted.
# Columns used: 'Last Price','ret_1','p_state0_h1','p_state1_h1','p_state2_h1'
# If your DataFrame variable is named differently, just replace `df` below.

df = pd.read_csv("df.csv")  # <-- your DataFrame here

# Ensure time order
df = df.sort_index()

# -------------------
# 2) Build/verify target
# -------------------
# Target definition:
# next_ret = ret_1 shifted by -1 (next period return)
# if next_ret > +0.5%  ->  +1
# if next_ret < -0.5%  ->  -1
# else                 ->   0

NEUTRAL_BAND = 0.007  # 0.5%
if "target" not in df.columns:
    next_ret = df["ret_1"].shift(-1)
    tgt = np.where(next_ret > NEUTRAL_BAND, 1,
          np.where(next_ret < -NEUTRAL_BAND, -1, 0))
    df["target"] = tgt

# Drop the last row (no label after shift) and any rows with NaNs in features/label
feature_cols = [ "Last Price", "ret_1", "p_state0_h1", "p_state1_h1", "p_state2_h1"]
df = df.dropna(subset=feature_cols + ["target"]).copy()

# Optional: quick class distribution check
print("Class counts (−1, 0, +1):", df["target"].value_counts().sort_index().to_dict())

# -------------------
# 3) Train/Val/Test split (chronological 80/10/10)
# -------------------
n = len(df)
i_train_end = int(0.8 * n)
i_val_end   = int(0.9 * n)

df_train = df.iloc[:i_train_end]
df_val   = df.iloc[i_train_end:i_val_end]
df_test  = df.iloc[i_val_end:]

# -------------------
# 4) Make sequences
# -------------------
def make_sequences(block_df, features, lookback=16):
    """Return X (num_seq, lookback, num_feat), y (num_seq,) aligned for next-step label already in block_df['target']."""
    X_list, y_list = [], []
    X_src = block_df[features].values
    y_src = block_df["target"].values
    for t in range(lookback, len(block_df)):
        X_list.append(X_src[t-lookback:t, :])
        y_list.append(y_src[t])  # label for the last timestep
    X = np.array(X_list, dtype=np.float32)
    y = np.array(y_list, dtype=np.int64)
    return X, y

LOOKBACK = 52  # weeks; change later as you like

# Fit scaler on TRAIN ONLY, then transform splits BEFORE building sequences (no leakage)
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler.fit(df_train[feature_cols])

df_train_s = df_train.copy()
df_val_s   = df_val.copy()
df_test_s  = df_test.copy()

df_train_s[feature_cols] = scaler.transform(df_train[feature_cols])
df_val_s[feature_cols]   = scaler.transform(df_val[feature_cols])
df_test_s[feature_cols]  = scaler.transform(df_test[feature_cols])

X_tr, y_tr = make_sequences(df_train_s, feature_cols, LOOKBACK)
X_va, y_va = make_sequences(df_val_s,   feature_cols, LOOKBACK)
X_te, y_te = make_sequences(df_test_s,  feature_cols, LOOKBACK)

print("Shapes:", "X_tr", X_tr.shape, "X_va", X_va.shape, "X_te", X_te.shape)

# -------------------
# 5) Map targets {−1,0,1} -> {0,1,2} for SparseCategoricalCrossentropy
# -------------------
to_idx = {-1: 0, 0: 1, 1: 2}
y_tr_i = np.vectorize(to_idx.get)(y_tr)
y_va_i = np.vectorize(to_idx.get)(y_va)
y_te_i = np.vectorize(to_idx.get)(y_te)

# -------------------
# 6) Simple LSTM model
# -------------------
tf.random.set_seed(42)
np.random.seed(42)

model = models.Sequential([
    layers.Input(shape=(LOOKBACK, len(feature_cols))),
    layers.LSTM(32),
    layers.Dense(16, activation="relu"),
    layers.Dense(3, activation="softmax")
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=["accuracy"]
)

es = callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

history = model.fit(
    X_tr, y_tr_i,
    validation_data=(X_va, y_va_i),
    epochs=50,
    batch_size=32,
    callbacks=[es],
    verbose=1
)

# -------------------
# 7) Evaluate
# -------------------
probs = model.predict(X_te)
y_hat_i = probs.argmax(axis=1)

# Map back {0,1,2} -> {−1,0,1}
from_idx = {0: -1, 1: 0, 2: 1}
y_hat = np.vectorize(from_idx.get)(y_hat_i)
y_true = y_te  # already in {−1,0,1}

print("\nTest accuracy:", accuracy_score(y_true, y_hat))
print("\nClassification report:\n", classification_report(y_true, y_hat, digits=3))
print("\nConfusion matrix (rows=true, cols=pred classes in order [-1,0,1]):\n",
      confusion_matrix(y_true, y_hat, labels=[-1,0,1]))


Class counts (−1, 0, +1): {-1: 718, 0: 239, 1: 791}
Shapes: X_tr (1346, 52, 5) X_va (123, 52, 5) X_te (123, 52, 5)
Epoch 1/50
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step - accuracy: 0.4503 - loss: 1.0407 - val_accuracy: 0.4797 - val_loss: 1.0052
Epoch 2/50
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.4708 - loss: 0.9816 - val_accuracy: 0.4959 - val_loss: 1.0007
Epoch 3/50
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - accuracy: 0.4818 - loss: 0.9799 - val_accuracy: 0.5122 - val_loss: 0.9961
Epoch 4/50
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - accuracy: 0.4820 - loss: 0.9789 - val_accuracy: 0.5041 - val_loss: 0.9906
Epoch 5/50
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - accuracy: 0.4787 - loss: 0.9776 - val_accuracy: 0.4959 - val_loss: 0.9878
Epoch 6/50
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [28]:
print(df['target'].value_counts())

target
 1    791
-1    718
 0    239
Name: count, dtype: int64


In [29]:
from sklearn.linear_model import LogisticRegression

# Use last row of features only (no sequence)
X_last = df[["p_state0_h1","p_state1_h1","p_state2_h1"]].iloc[:-1].values
y_last = df["target"].iloc[1:].values   # shift to next return

logreg = LogisticRegression(max_iter=1000).fit(X_last, y_last)
print("LogReg synthetic acc:", logreg.score(X_last, y_last))

LogReg synthetic acc: 0.4642243846594161


In [30]:
# LSTM with class weights (and optional oversampling for class 0)
# ---------------------------------------------------------------
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score
from sklearn.utils.class_weight import compute_class_weight
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks

# ====== 0) CONFIG ======
FEATURES     = [ "Last Price", "p_state0_h1"]  #, "p_state0_h1", "p_state1_h1", "p_state2_h1"]
LOOKBACK     = 52
NEUTRAL_BAND = 0.005
EPOCHS       = 60
BATCH_SIZE   = 32
USE_OVERSAMPLING = False   # set True to try oversampling class 0 sequences

# ====== 1) PREP ======
df = df.sort_index().copy()

if "target" not in df.columns:
    next_ret = df["ret_1"].shift(-1)
    df["target"] = np.where(next_ret >  NEUTRAL_BAND,  1,
                    np.where(next_ret < -NEUTRAL_BAND, -1, 0))

df = df.dropna(subset=FEATURES + ["target"]).copy()

print("Class counts in FULL data:", df["target"].value_counts().sort_index().to_dict())

# Chronological 80/10/10 split
n = len(df)
i_train_end = int(0.9 * n)
i_val_end   = int(0.95 * n)
df_train, df_val, df_test = df.iloc[:i_train_end], df.iloc[i_train_end:i_val_end], df.iloc[i_val_end:]

# Scale on TRAIN only
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler.fit(df_train[FEATURES])
for d in (df_train, df_val, df_test):
    d[FEATURES] = scaler.transform(d[FEATURES])

# ====== 2) SEQUENCING ======
def make_sequences(block_df, features, lookback=16):
    X_list, y_list = [], []
    X_src = block_df[features].values
    y_src = block_df["target"].values
    for t in range(lookback, len(block_df)):
        X_list.append(X_src[t-lookback:t, :])
        y_list.append(y_src[t])
    return np.array(X_list, dtype=np.float32), np.array(y_list, dtype=np.int64)

X_tr, y_tr = make_sequences(df_train, FEATURES, LOOKBACK)
X_va, y_va = make_sequences(df_val,   FEATURES, LOOKBACK)
X_te, y_te = make_sequences(df_test,  FEATURES, LOOKBACK)

print("Shapes:", "X_tr", X_tr.shape, "X_va", X_va.shape, "X_te", X_te.shape)

# Map {-1,0,1} -> {0,1,2}
to_idx   = {-1:0, 0:1, 1:2}
from_idx = {0:-1, 1:0, 2:1}
y_tr_i = np.vectorize(to_idx.get)(y_tr)
y_va_i = np.vectorize(to_idx.get)(y_va)
y_te_i = np.vectorize(to_idx.get)(y_te)

# ====== 3) (Optional) Oversampling of class 0 sequences in TRAIN ======
if USE_OVERSAMPLING:
    # gather indices by class
    idx_m1 = np.where(y_tr_i == 0)[0]
    idx_0  = np.where(y_tr_i == 1)[0]
    idx_p1 = np.where(y_tr_i == 2)[0]
    # target count ~ min(max of the other classes, 0.8 * median) to avoid huge duplication
    target_0 = max(len(idx_m1), len(idx_p1))
    if len(idx_0) > 0 and target_0 > len(idx_0):
        add = np.random.choice(idx_0, size=target_0 - len(idx_0), replace=True)
        X_tr = np.concatenate([X_tr, X_tr[add]], axis=0)
        y_tr_i = np.concatenate([y_tr_i, y_tr_i[add]], axis=0)
        # shuffle after oversampling
        perm = np.random.permutation(len(y_tr_i))
        X_tr, y_tr_i = X_tr[perm], y_tr_i[perm]
        print(f"Oversampled class 0 sequences from {len(idx_0)} to {target_0}")

# ====== 4) Class weights (computed on TRAIN after optional oversampling) ======
classes = np.array([0,1,2])
class_weights_vec = compute_class_weight(class_weight="balanced", classes=classes, y=y_tr_i)
class_weights = {int(c): float(w) for c, w in zip(classes, class_weights_vec)}
print("Class weights (for indices {0,1,2} ≡ {-1,0,1}):", class_weights)

# ====== 5) Model ======
tf.random.set_seed(42)
np.random.seed(42)

model = models.Sequential([
    layers.Input(shape=(LOOKBACK, len(FEATURES))),
    layers.LSTM(32, dropout=0.1, recurrent_dropout=0.0),
    layers.Dense(16, activation="relu"),
    layers.Dense(3, activation="softmax")
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=["accuracy"]
)

es = callbacks.EarlyStopping(monitor="val_loss", patience=7, restore_best_weights=True)

history = model.fit(
    X_tr, y_tr_i,
    validation_data=(X_va, y_va_i),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=[es],
    verbose=1,
    class_weight=class_weights
)

# ====== 6) Evaluate ======
probs   = model.predict(X_te)
y_hat_i = probs.argmax(axis=1)
y_hat   = np.vectorize(from_idx.get)(y_hat_i)

print("\nTest accuracy:", accuracy_score(y_te, y_hat))
print("Macro F1:", f1_score(y_te, y_hat, labels=[-1,0,1], average="macro"))
print("\nClassification report:\n", classification_report(y_te, y_hat, digits=3))
print("Confusion matrix (rows=true, cols=pred [-1,0,1]):\n",
      confusion_matrix(y_te, y_hat, labels=[-1,0,1]))


Class counts in FULL data: {-1: 718, 0: 239, 1: 791}
Shapes: X_tr (1521, 52, 2) X_va (35, 52, 2) X_te (36, 52, 2)
Class weights (for indices {0,1,2} ≡ {-1,0,1}): {0: 0.8203883495145631, 1: 2.485294117647059, 2: 0.7253218884120172}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  d[FEATURES] = scaler.transform(d[FEATURES])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  d[FEATURES] = scaler.transform(d[FEATURES])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  d[FEATURES] = scaler.transform(d[FEATURES])


Epoch 1/60
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 38ms/step - accuracy: 0.2981 - loss: 1.1245 - val_accuracy: 0.1429 - val_loss: 1.1057
Epoch 2/60
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.3917 - loss: 1.1220 - val_accuracy: 0.1429 - val_loss: 1.1100
Epoch 3/60
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.3844 - loss: 1.1192 - val_accuracy: 0.1143 - val_loss: 1.1257
Epoch 4/60
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.3665 - loss: 1.1200 - val_accuracy: 0.1143 - val_loss: 1.1198
Epoch 5/60
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.3612 - loss: 1.1188 - val_accuracy: 0.1143 - val_loss: 1.1191
Epoch 6/60
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.3829 - loss: 1.1151 - val_accuracy: 0.3143 - val_loss: 1.1089
Epoch 7/60
[1m48/48[0m [32m━━━━

In [31]:
# ==============================================================
# Feature-subset search (require predicting ALL 3 classes on TEST)
# ==============================================================

import itertools
from collections import OrderedDict

REQUIRE_ALL_CLASSES = True
ALL_CLASSES_SET = {-1, 0, 1}

CANDIDATE_FEATURES = ["Last Price", "ret_1", "p_state0_h1", "p_state1_h1", "p_state2_h1"]

# Ensure target exists and data sorted
df = df.sort_index().copy()
if "target" not in df.columns:
    next_ret = df["ret_1"].shift(-1)
    df["target"] = np.where(next_ret >  NEUTRAL_BAND,  1,
                    np.where(next_ret < -NEUTRAL_BAND, -1, 0))

# Keep rows usable for all features (safe superset)
df = df.dropna(subset=CANDIDATE_FEATURES + ["target"]).copy()

# Chronological 80/10/10 split (fixed for all subsets)
n = len(df)
i_train_end = int(0.8 * n)
i_val_end   = int(0.9 * n)
df_train, df_val, df_test = df.iloc[:i_train_end].copy(), df.iloc[i_train_end:i_val_end].copy(), df.iloc[i_val_end:].copy()

def make_sequences(block_df, features, lookback=16):
    X_list, y_list = [], []
    X_src = block_df[features].values
    y_src = block_df["target"].values
    for t in range(lookback, len(block_df)):
        X_list.append(X_src[t-lookback:t, :])
        y_list.append(y_src[t])
    return np.array(X_list, dtype=np.float32), np.array(y_list, dtype=np.int64)

to_idx   = {-1:0, 0:1, 1:2}
from_idx = {0:-1, 1:0, 2:1}

def build_model(input_dim):
    tf.random.set_seed(42)
    np.random.seed(42)
    m = models.Sequential([
        layers.Input(shape=(LOOKBACK, input_dim)),
        layers.LSTM(32, dropout=0.1, recurrent_dropout=0.0),
        layers.Dense(16, activation="relu"),
        layers.Dense(3, activation="softmax")
    ])
    m.compile(
        optimizer=tf.keras.optimizers.Adam(1e-3),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"]
    )
    return m

def fit_eval_for_features(feat_subset):
    # 1) Scale per subset (fit on TRAIN only)
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaler.fit(df_train[feat_subset])
    dtr = df_train.copy(); dva = df_val.copy(); dte = df_test.copy()
    dtr[feat_subset] = scaler.transform(dtr[feat_subset])
    dva[feat_subset] = scaler.transform(dva[feat_subset])
    dte[feat_subset] = scaler.transform(dte[feat_subset])

    # 2) Make sequences
    X_tr, y_tr = make_sequences(dtr, feat_subset, LOOKBACK)
    X_va, y_va = make_sequences(dva, feat_subset, LOOKBACK)
    X_te, y_te = make_sequences(dte, feat_subset, LOOKBACK)

    # Map labels {-1,0,1} -> {0,1,2}
    y_tr_i = np.vectorize(to_idx.get)(y_tr)
    y_va_i = np.vectorize(to_idx.get)(y_va)
    y_te_i = np.vectorize(to_idx.get)(y_te)

    # 3) Optional oversampling (class 0 index = 1)
    X_train_fin, y_train_fin = X_tr, y_tr_i
    if USE_OVERSAMPLING:
        idx_m1 = np.where(y_tr_i == 0)[0]
        idx_0  = np.where(y_tr_i == 1)[0]
        idx_p1 = np.where(y_tr_i == 2)[0]
        target_0 = max(len(idx_m1), len(idx_p1))
        if len(idx_0) > 0 and target_0 > len(idx_0):
            add = np.random.choice(idx_0, size=target_0 - len(idx_0), replace=True)
            X_train_fin = np.concatenate([X_tr, X_tr[add]], axis=0)
            y_train_fin = np.concatenate([y_tr_i, y_tr_i[add]], axis=0)
            perm = np.random.permutation(len(y_train_fin))
            X_train_fin, y_train_fin = X_train_fin[perm], y_train_fin[perm]

    # 4) Class weights from TRAIN
    classes = np.array([0,1,2])
    cw_vec = compute_class_weight(class_weight="balanced", classes=classes, y=y_train_fin)
    class_weights = {int(c): float(w) for c, w in zip(classes, cw_vec)}

    # 5) Build, fit
    model = build_model(input_dim=len(feat_subset))
    es = callbacks.EarlyStopping(monitor="val_loss", patience=7, restore_best_weights=True)
    model.fit(
        X_train_fin, y_train_fin,
        validation_data=(X_va, y_va_i),
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        callbacks=[es],
        verbose=0,
        class_weight=class_weights
    )

    # 6) Evaluate on TEST
    probs   = model.predict(X_te, verbose=0)
    y_hat_i = probs.argmax(axis=1)
    y_hat   = np.vectorize(from_idx.get)(y_hat_i)

    acc  = accuracy_score(y_te, y_hat)
    f1m  = f1_score(y_te, y_hat, labels=[-1,0,1], average="macro")
    rep  = classification_report(y_te, y_hat, digits=3)
    cm   = confusion_matrix(y_te, y_hat, labels=[-1,0,1])

    pred_classes = set(np.unique(y_hat))
    passes = (pred_classes == ALL_CLASSES_SET) if REQUIRE_ALL_CLASSES else True

    return {
        "features": tuple(feat_subset),
        "n_features": len(feat_subset),
        "test_acc": acc,
        "macro_f1": f1m,
        "report": rep,
        "confusion_matrix": cm,
        "pred_classes": pred_classes,
        "passes_all_classes": passes
    }

# Iterate all non-empty subsets
all_subsets = []
for r in range(1, len(CANDIDATE_FEATURES)+1):
    all_subsets += list(itertools.combinations(CANDIDATE_FEATURES, r))

print(f"Evaluating {len(all_subsets)} feature combinations...")
results, skipped = [], 0
for subset in all_subsets:
    res = fit_eval_for_features(list(subset))
    if REQUIRE_ALL_CLASSES and not res["passes_all_classes"]:
        skipped += 1
        missing = ALL_CLASSES_SET - res["pred_classes"]
        print(f"{subset} -> SKIPPED: missing classes {sorted(list(missing))}  "
              f"(predicted: {sorted(list(res['pred_classes']))})")
        continue
    results.append(res)
    print(f"{subset} -> acc={res['test_acc']:.3f}, f1={res['macro_f1']:.3f} | "
          f"pred_classes={sorted(list(res['pred_classes']))}")

print(f"\nCombinations passing the 'all classes predicted' constraint: {len(results)} "
      f"(skipped {skipped})")

if len(results) == 0:
    print("\nNo combinations satisfied the constraint. "
          "Consider widening NEUTRAL_BAND, enabling USE_OVERSAMPLING, "
          "or increasing EPOCHS/hidden units.")
else:
    results_sorted = sorted(results, key=lambda d: (d["test_acc"], d["macro_f1"]), reverse=True)
    best = results_sorted[0]
    print("\n================ BEST COMBINATION (with all classes predicted) ================")
    print("Features:", best["features"])
    print(f"Test accuracy: {best['test_acc']:.3f}")
    print(f"Macro F1: {best['macro_f1']:.3f}")
    print("Predicted classes:", sorted(list(best["pred_classes"])))
    print("\nClassification report:\n", best["report"])
    print("Confusion matrix (rows=true, cols=pred [-1,0,1]):\n", best["confusion_matrix"])

    # Optional: show top-10 table
    tbl = pd.DataFrame([{
        "features": ", ".join(r["features"]),
        "n_features": r["n_features"],
        "test_acc": r["test_acc"],
        "macro_f1": r["macro_f1"],
        "predicted_classes": "".join(str(c) for c in sorted(list(r["pred_classes"])))
    } for r in results_sorted])
    print("\nTop 10 combinations (constraint-satisfying):")
    print(tbl.head(10).to_string(index=False))


Evaluating 31 feature combinations...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


('Last Price',) -> SKIPPED: missing classes [0]  (predicted: [np.int64(-1), np.int64(1)])


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


('ret_1',) -> SKIPPED: missing classes [0]  (predicted: [np.int64(-1), np.int64(1)])
('p_state0_h1',) -> acc=0.309, f1=0.311 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]
('p_state1_h1',) -> acc=0.301, f1=0.270 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


('p_state2_h1',) -> SKIPPED: missing classes [-1, 1]  (predicted: [np.int64(0)])
('Last Price', 'ret_1') -> acc=0.455, f1=0.312 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]
('Last Price', 'p_state0_h1') -> acc=0.488, f1=0.378 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]
('Last Price', 'p_state1_h1') -> acc=0.472, f1=0.334 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


('Last Price', 'p_state2_h1') -> SKIPPED: missing classes [0]  (predicted: [np.int64(-1), np.int64(1)])
('ret_1', 'p_state0_h1') -> acc=0.276, f1=0.277 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]
('ret_1', 'p_state1_h1') -> acc=0.317, f1=0.306 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


('ret_1', 'p_state2_h1') -> SKIPPED: missing classes [-1, 1]  (predicted: [np.int64(0)])
('p_state0_h1', 'p_state1_h1') -> acc=0.260, f1=0.236 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]
('p_state0_h1', 'p_state2_h1') -> acc=0.268, f1=0.239 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


('p_state1_h1', 'p_state2_h1') -> SKIPPED: missing classes [-1]  (predicted: [np.int64(0), np.int64(1)])
('Last Price', 'ret_1', 'p_state0_h1') -> acc=0.301, f1=0.306 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]
('Last Price', 'ret_1', 'p_state1_h1') -> acc=0.512, f1=0.407 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


('Last Price', 'ret_1', 'p_state2_h1') -> SKIPPED: missing classes [0]  (predicted: [np.int64(-1), np.int64(1)])
('Last Price', 'p_state0_h1', 'p_state1_h1') -> acc=0.285, f1=0.267 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]
('Last Price', 'p_state0_h1', 'p_state2_h1') -> acc=0.439, f1=0.402 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]
('Last Price', 'p_state1_h1', 'p_state2_h1') -> acc=0.333, f1=0.323 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]
('ret_1', 'p_state0_h1', 'p_state1_h1') -> acc=0.293, f1=0.290 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


('ret_1', 'p_state0_h1', 'p_state2_h1') -> SKIPPED: missing classes [-1]  (predicted: [np.int64(0), np.int64(1)])
('ret_1', 'p_state1_h1', 'p_state2_h1') -> acc=0.276, f1=0.242 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]
('p_state0_h1', 'p_state1_h1', 'p_state2_h1') -> acc=0.268, f1=0.242 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]
('Last Price', 'ret_1', 'p_state0_h1', 'p_state1_h1') -> acc=0.333, f1=0.330 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]
('Last Price', 'ret_1', 'p_state0_h1', 'p_state2_h1') -> acc=0.423, f1=0.383 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]
('Last Price', 'ret_1', 'p_state1_h1', 'p_state2_h1') -> acc=0.455, f1=0.356 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]
('Last Price', 'p_state0_h1', 'p_state1_h1', 'p_state2_h1') -> acc=0.252, f1=0.232 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


('ret_1', 'p_state0_h1', 'p_state1_h1', 'p_state2_h1') -> SKIPPED: missing classes [-1]  (predicted: [np.int64(0), np.int64(1)])
('Last Price', 'ret_1', 'p_state0_h1', 'p_state1_h1', 'p_state2_h1') -> acc=0.276, f1=0.248 | pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]

Combinations passing the 'all classes predicted' constraint: 22 (skipped 9)

Features: ('Last Price', 'ret_1', 'p_state1_h1')
Test accuracy: 0.512
Macro F1: 0.407
Predicted classes: [np.int64(-1), np.int64(0), np.int64(1)]

Classification report:
               precision    recall  f1-score   support

          -1      0.524     0.768     0.623        56
           0      0.333     0.118     0.174        17
           1      0.514     0.360     0.424        50

    accuracy                          0.512       123
   macro avg      0.457     0.415     0.407       123
weighted avg      0.494     0.512     0.480       123

Confusion matrix (rows=true, cols=pred [-1,0,1]):
 [[43  1 12]
 [10  2  5]
 [29  3 18]]

Top 

In [32]:
# ============================
# Synthetic 3-class LSTM test
# ============================
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, f1_score
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks

np.random.seed(7)
tf.random.set_seed(7)

# -------- Config knobs --------
N              = 2500          # number of weekly points
LOOKBACK       = 16
NEUTRAL_BAND   = 0.005         # ±0.5% neutral
# Regime return params (mean, std) weekly
MU   = np.array([-0.012, 0.0, 0.012])   # down, neutral, up
SIG  = np.array([ 0.02 , 0.005, 0.02 ])
# Sticky transition matrix (rows=sum=1)
Tmat = np.array([
    [0.92, 0.06, 0.02],
    [0.05, 0.90, 0.05],
    [0.02, 0.06, 0.92]
])
# How informative the prob features are:
PROB_NOISE_STD = 0.10  # 0.0 = perfect one-hot; higher = noisier/less reliable
PRICE_START    = 3.00

# -------- 1) Generate hidden states --------
states = np.zeros(N, dtype=int)
states[0] = np.random.choice([0,1,2])  # 0=down,1=neutral,2=up
for t in range(1, N):
    states[t] = np.random.choice([0,1,2], p=Tmat[states[t-1]])

# -------- 2) Generate returns conditional on state --------
# ret_t ~ Normal(mu[state_t], sigma[state_t])
ret = np.random.normal(MU[states], SIG[states])
# Build price (weekly)
price = np.empty(N)
price[0] = PRICE_START
for t in range(1, N):
    price[t] = price[t-1] * (1.0 + ret[t])

# -------- 3) Create noisy "predicted" state probabilities (features) --------
# Start with one-hot for true state and add Gaussian noise, then renormalize
probs = np.zeros((N, 3))
probs[np.arange(N), states] = 1.0
probs = probs + np.random.normal(0, PROB_NOISE_STD, size=probs.shape)
probs = np.clip(probs, 1e-6, None)
probs = probs / probs.sum(axis=1, keepdims=True)

# -------- 4) Assemble DataFrame & labels --------
df_syn = pd.DataFrame({
    "Last Price": price,
    "ret_1": ret,
    "p_state0_h1": probs[:,0],
    "p_state1_h1": probs[:,1],
    "p_state2_h1": probs[:,2],
}, index=pd.date_range("1990-01-07", periods=N, freq="W-SUN"))

# label uses NEXT period return
next_ret = df_syn["ret_1"].shift(-1)
target = np.where(next_ret >  NEUTRAL_BAND,  1,
         np.where(next_ret < -NEUTRAL_BAND, -1, 0))
df_syn["target"] = target
df_syn = df_syn.dropna().copy()  # drop last row with no next_ret

print("Synthetic class counts:", df_syn["target"].value_counts().sort_index().to_dict())

# -------- 5) Chronological 80/10/10 split --------
n = len(df_syn)
i_train_end = int(0.8 * n)
i_val_end   = int(0.9 * n)
df_tr, df_va, df_te = df_syn.iloc[:i_train_end], df_syn.iloc[i_train_end:i_val_end], df_syn.iloc[i_val_end:]

FEATURES = ["Last Price", "ret_1", "p_state0_h1", "p_state1_h1", "p_state2_h1"]

# Scale on TRAIN only
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler.fit(df_tr[FEATURES])
for d in (df_tr, df_va, df_te):
    d[FEATURES] = scaler.transform(d[FEATURES])

# -------- 6) Make sequences --------
def make_sequences(block_df, features, lookback=16):
    X_list, y_list = [], []
    X = block_df[features].values
    y = block_df["target"].values
    for t in range(lookback, len(block_df)):
        X_list.append(X[t-lookback:t, :])
        y_list.append(y[t])
    return np.array(X_list, dtype=np.float32), np.array(y_list, dtype=np.int64)

X_tr, y_tr = make_sequences(df_tr, FEATURES, LOOKBACK)
X_va, y_va = make_sequences(df_va, FEATURES, LOOKBACK)
X_te, y_te = make_sequences(df_te, FEATURES, LOOKBACK)

# Map {−1,0,1} ↦ {0,1,2}
to_idx   = {-1:0, 0:1, 1:2}
from_idx = {0:-1, 1:0, 2:1}
y_tr_i = np.vectorize(to_idx.get)(y_tr)
y_va_i = np.vectorize(to_idx.get)(y_va)
y_te_i = np.vectorize(to_idx.get)(y_te)

# -------- 7) Simple LSTM model --------
model = models.Sequential([
    layers.Input(shape=(LOOKBACK, len(FEATURES))),
    layers.LSTM(32),
    layers.Dense(16, activation="relu"),
    layers.Dense(3, activation="softmax")
])
model.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=["accuracy"])
es = callbacks.EarlyStopping(monitor="val_loss", patience=6, restore_best_weights=True)

history = model.fit(
    X_tr, y_tr_i,
    validation_data=(X_va, y_va_i),
    epochs=50,
    batch_size=32,
    callbacks=[es],
    verbose=0
)

# -------- 8) Evaluate --------
probs_hat = model.predict(X_te, verbose=0)
y_hat_i = probs_hat.argmax(axis=1)
y_hat = np.vectorize(from_idx.get)(y_hat_i)

print("\nSynthetic Test accuracy:", accuracy_score(y_te, y_hat))
print("Synthetic Macro F1:", f1_score(y_te, y_hat, labels=[-1,0,1], average="macro"))
print("\nClassification report (synthetic):\n", classification_report(y_te, y_hat, digits=3))
print("Confusion matrix (synthetic, rows=true, cols=pred [-1,0,1]):\n",
      confusion_matrix(y_te, y_hat, labels=[-1,0,1]))


Synthetic class counts: {-1: 775, 0: 972, 1: 753}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  d[FEATURES] = scaler.transform(d[FEATURES])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  d[FEATURES] = scaler.transform(d[FEATURES])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  d[FEATURES] = scaler.transform(d[FEATURES])



Synthetic Test accuracy: 0.5170940170940171
Synthetic Macro F1: 0.5183433041483726

Classification report (synthetic):
               precision    recall  f1-score   support

          -1      0.529     0.561     0.544        66
           0      0.536     0.500     0.517        90
           1      0.487     0.500     0.494        78

    accuracy                          0.517       234
   macro avg      0.517     0.520     0.518       234
weighted avg      0.518     0.517     0.517       234

Confusion matrix (synthetic, rows=true, cols=pred [-1,0,1]):
 [[37 17 12]
 [16 45 29]
 [17 22 39]]


In [33]:
# ======================================================
# 0) Imports & helpers
# ======================================================
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score
from sklearn.utils.class_weight import compute_class_weight
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks

np.random.seed(7)
tf.random.set_seed(7)

def make_sequences(block_df, feature_cols, lookback=16, label_col="target"):
    X_list, y_list = [], []
    X = block_df[feature_cols].values
    y = block_df[label_col].values
    for t in range(lookback, len(block_df)):
        X_list.append(X[t-lookback:t, :])
        y_list.append(y[t])
    return np.array(X_list, dtype=np.float32), np.array(y_list, dtype=np.int64)

def map_labels(y, to_idx={-1:0, 0:1, 1:2}):
    return np.vectorize(to_idx.get)(y)

def inv_map_labels(y_idx, from_idx={0:-1, 1:0, 2:1}):
    return np.vectorize(from_idx.get)(y_idx)

def build_lstm(input_dim, lookback=16, units=32):
    model = models.Sequential([
        layers.Input(shape=(lookback, input_dim)),
        layers.LSTM(units),
        layers.Dense(16, activation="relu"),
        layers.Dense(3, activation="softmax")
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=["accuracy"])
    return model

def fit_eval(X_tr, y_tr_i, X_va, y_va_i, X_te, y_te, use_class_weight=True, epochs=60, batch_size=32):
    # Optional class weights
    cw = None
    if use_class_weight:
        classes = np.array([0,1,2])
        w = compute_class_weight(class_weight="balanced", classes=classes, y=y_tr_i)
        cw = {int(c): float(wi) for c, wi in zip(classes, w)}
        print("Class weights:", cw)

    model = build_lstm(input_dim=X_tr.shape[2], lookback=X_tr.shape[1], units=32)
    es = callbacks.EarlyStopping(monitor="val_loss", patience=7, restore_best_weights=True)
    model.fit(X_tr, y_tr_i,
              validation_data=(X_va, y_va_i),
              epochs=epochs,
              batch_size=batch_size,
              callbacks=[es],
              verbose=0,
              class_weight=cw)

    probs = model.predict(X_te, verbose=0)
    y_hat_i = probs.argmax(axis=1)
    y_hat   = inv_map_labels(y_hat_i)

    print("Test accuracy:", accuracy_score(y_te, y_hat))
    print("Macro F1:", f1_score(y_te, y_hat, labels=[-1,0,1], average="macro"))
    print("\nClassification report:\n", classification_report(y_te, y_hat, digits=3))
    print("Confusion matrix (rows=true, cols=pred [-1,0,1]):\n",
          confusion_matrix(y_te, y_hat, labels=[-1,0,1]))

# ======================================================
# 1) SYNTHETIC CHECK — PERFECT SIGNAL (noise = 0.0)
# ======================================================
def run_synthetic_perfect(NEUTRAL_BAND=0.005, LOOKBACK=16, N=2500):
    # Hidden states (sticky)
    Tmat = np.array([[0.92,0.06,0.02],
                     [0.05,0.90,0.05],
                     [0.02,0.06,0.92]])
    MU  = np.array([-0.012, 0.0, 0.012])
    SIG = np.array([ 0.02 , 0.005, 0.02 ])

    states = np.zeros(N, dtype=int)
    states[0] = np.random.choice([0,1,2])
    for t in range(1, N):
        states[t] = np.random.choice([0,1,2], p=Tmat[states[t-1]])

    ret = np.random.normal(MU[states], SIG[states])
    price = np.empty(N); price[0] = 3.00
    for t in range(1, N):
        price[t] = price[t-1] * (1.0 + ret[t])

    # PERFECT probabilities (one-hot), i.e., PROB_NOISE_STD = 0.0
    probs = np.zeros((N,3)); probs[np.arange(N), states] = 1.0

    df_syn = pd.DataFrame({
        "Last Price": price,
        "ret_1": ret,
        "p_state0_h1": probs[:,0],
        "p_state1_h1": probs[:,1],
        "p_state2_h1": probs[:,2],
    }, index=pd.date_range("1990-01-07", periods=N, freq="W-SUN"))

    next_ret = df_syn["ret_1"].shift(-1)
    target = np.where(next_ret >  NEUTRAL_BAND,  1,
             np.where(next_ret < -NEUTRAL_BAND, -1, 0))
    df_syn["target"] = target
    df_syn = df_syn.dropna()

    FEATURES = ["Last Price","ret_1","p_state0_h1","p_state1_h1","p_state2_h1"]
    n = len(df_syn); i_tr, i_va = int(0.8*n), int(0.9*n)
    df_tr, df_va, df_te = df_syn.iloc[:i_tr], df_syn.iloc[i_tr:i_va], df_syn.iloc[i_va:]

    scaler = MinMaxScaler(feature_range=(-1,1))
    scaler.fit(df_tr[FEATURES])
    for d in (df_tr, df_va, df_te):
        d[FEATURES] = scaler.transform(d[FEATURES])

    X_tr, y_tr = make_sequences(df_tr, FEATURES, LOOKBACK)
    X_va, y_va = make_sequences(df_va, FEATURES, LOOKBACK)
    X_te, y_te = make_sequences(df_te, FEATURES, LOOKBACK)

    y_tr_i, y_va_i = map_labels(y_tr), map_labels(y_va)
    print("Synthetic (perfect signal) class counts:", df_syn["target"].value_counts().sort_index().to_dict())
    fit_eval(X_tr, y_tr_i, X_va, y_va_i, X_te, y_te, use_class_weight=False, epochs=40)

print("\n=== Running synthetic perfect-signal check (expect very high accuracy) ===")
run_synthetic_perfect(NEUTRAL_BAND=0.005, LOOKBACK=16, N=2500)

# ======================================================
# 2) REAL DATA — widen neutral band to ±1% and add lags/roll stats
# ======================================================
# NOTE: You must provide your DataFrame `df` with at least:
# ['Last Price','ret_1','p_state0_h1','p_state1_h1','p_state2_h1'], indexed by weekly date.
# Example:
# df = your_dataframe.sort_index()

# ---- UNCOMMENT and point df to your data before running ----
# df = df.sort_index().copy()

def run_real_with_features(df,
                           NEUTRAL_BAND=0.01,   # widened neutral band ±1%
                           LOOKBACK=16,
                           use_class_weight=True):
    df = df.sort_index().copy()

    # Rebuild target from NEXT return with wider neutral band
    next_ret = df["ret_1"].shift(-1)
    df["target"] = np.where(next_ret >  NEUTRAL_BAND,  1,
                    np.where(next_ret < -NEUTRAL_BAND, -1, 0))

    # ========== Feature engineering (no leakage) ==========
    # Simple lags of weekly return
    df["ret_lag1"] = df["ret_1"].shift(1)
    df["ret_lag2"] = df["ret_1"].shift(2)
    df["ret_lag3"] = df["ret_1"].shift(3)
    # Rolling mean/vol over last 4 and 8 weeks (uses only past values)
    df["ret_ma_4"]  = df["ret_1"].rolling(4, min_periods=4).mean()
    df["ret_vol_4"] = df["ret_1"].rolling(4, min_periods=4).std()
    df["ret_ma_8"]  = df["ret_1"].rolling(8, min_periods=8).mean()
    df["ret_vol_8"] = df["ret_1"].rolling(8, min_periods=8).std()

    FEATURES = [
        "Last Price","ret_1", "p_state0_h1","p_state1_h1","p_state2_h1",
        "ret_lag1","ret_lag2","ret_lag3",
        "ret_ma_4","ret_vol_4","ret_ma_8","ret_vol_8"
    ]

    df = df.dropna(subset=FEATURES + ["target"]).copy()

    print("\nReal data class counts with ±1% neutral band:", df["target"].value_counts().sort_index().to_dict())

    # Chronological 80/10/10
    n = len(df); i_tr, i_va = int(0.8*n), int(0.9*n)
    df_tr, df_va, df_te = df.iloc[:i_tr], df.iloc[i_tr:i_va], df.iloc[i_va:]

    # Scale on train only
    scaler = MinMaxScaler(feature_range=(-1,1))
    scaler.fit(df_tr[FEATURES])
    for d in (df_tr, df_va, df_te):
        d[FEATURES] = scaler.transform(d[FEATURES])

    # Sequences
    X_tr, y_tr = make_sequences(df_tr, FEATURES, LOOKBACK)
    X_va, y_va = make_sequences(df_va, FEATURES, LOOKBACK)
    X_te, y_te = make_sequences(df_te, FEATURES, LOOKBACK)

    # Labels to indices
    y_tr_i, y_va_i = map_labels(y_tr), map_labels(y_va)

    # Train & evaluate
    fit_eval(X_tr, y_tr_i, X_va, y_va_i, X_te, y_te,
             use_class_weight=use_class_weight, epochs=60, batch_size=32)

# ---- UNCOMMENT to run on your real DataFrame `df` ----
print("\n=== Running real-data model with wider neutral band + lag/roll features ===")
run_real_with_features(df, NEUTRAL_BAND=0.01, LOOKBACK=52, use_class_weight=True)



=== Running synthetic perfect-signal check (expect very high accuracy) ===
Synthetic (perfect signal) class counts: {-1: 775, 0: 972, 1: 753}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  d[FEATURES] = scaler.transform(d[FEATURES])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  d[FEATURES] = scaler.transform(d[FEATURES])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  d[FEATURES] = scaler.transform(d[FEATURES])


Test accuracy: 0.5170940170940171
Macro F1: 0.5184062159895805

Classification report:
               precision    recall  f1-score   support

          -1      0.529     0.561     0.544        66
           0      0.529     0.500     0.514        90
           1      0.494     0.500     0.497        78

    accuracy                          0.517       234
   macro avg      0.517     0.520     0.518       234
weighted avg      0.517     0.517     0.517       234

Confusion matrix (rows=true, cols=pred [-1,0,1]):
 [[37 18 11]
 [16 45 29]
 [17 22 39]]

=== Running real-data model with wider neutral band + lag/roll features ===

Real data class counts with ±1% neutral band: {-1: 619, 0: 447, 1: 675}
Class weights: {0: 0.9305555555555556, 1: 1.3576494427558257, 2: 0.8411801632140615}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  d[FEATURES] = scaler.transform(d[FEATURES])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  d[FEATURES] = scaler.transform(d[FEATURES])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  d[FEATURES] = scaler.transform(d[FEATURES])


Test accuracy: 0.34959349593495936
Macro F1: 0.34084967320261433

Classification report:
               precision    recall  f1-score   support

          -1      0.421     0.178     0.250        45
           0      0.292     0.514     0.373        37
           1      0.410     0.390     0.400        41

    accuracy                          0.350       123
   macro avg      0.375     0.361     0.341       123
weighted avg      0.379     0.350     0.337       123

Confusion matrix (rows=true, cols=pred [-1,0,1]):
 [[ 8 25 12]
 [ 7 19 11]
 [ 4 21 16]]


In [34]:
from sklearn.linear_model import LogisticRegression

# Use last row of features only (no sequence)
X_last = df_syn[["p_state0_h1","p_state1_h1","p_state2_h1"]].iloc[:-1].values
y_last = df_syn["target"].iloc[1:].values   # shift to next return

logreg = LogisticRegression(max_iter=1000).fit(X_last, y_last)
print("LogReg synthetic acc:", logreg.score(X_last, y_last))

LogReg synthetic acc: 0.5654261704681873


In [35]:
df

Unnamed: 0,date,Last Price,ret_1,p_state0_h1,p_state1_h1,p_state2_h1,target
1,1992-01-12,2.43,0.016736,0.949237,0.050318,0.000445,1
2,1992-01-19,2.48,0.020576,0.922941,0.075764,0.001295,1
3,1992-01-26,2.55,0.028226,0.781818,0.212314,0.005868,1
4,1992-02-02,2.57,0.007843,0.400583,0.581123,0.018294,0
5,1992-02-09,2.57,0.000000,0.222312,0.753668,0.024020,-1
...,...,...,...,...,...,...,...
1746,2025-06-22,4.06,-0.044706,0.842409,0.154882,0.002709,-1
1747,2025-06-29,4.00,-0.014778,0.938669,0.060752,0.000579,1
1748,2025-07-06,4.10,0.025000,0.955329,0.044461,0.000210,-1
1749,2025-07-13,3.87,-0.056098,0.933028,0.066269,0.000704,1


In [37]:
# =========================
# Expanding CV + Final Test
# =========================

# ---- Config for CV ----
CV_FIRST_TRAIN = 520          # first train span
CV_VAL_SPAN    = 150          # each validation span
CV_STEP        = 150          # slide by 150 each fold
TRAINVAL_END   = pd.Timestamp("2022-12-31")  # CV/trainval cutoff
TEST_START     = pd.Timestamp("2023-01-01")  # test period start

# Require that the model predicts ALL classes in each validation fold (optional)
REQUIRE_ALL_CLASSES_VAL = False   # set True if you want this constraint on VAL
ALL_CLASSES_SET = {-1, 0, 1}

# Small hyperparameter grid for model selection via CV
HPARAM_GRID = [
    {"units": 32, "dense": 16, "dropout": 0.10, "lr": 1e-3},
    {"units": 64, "dense": 16, "dropout": 0.10, "lr": 1e-3},
    {"units": 64, "dense": 32, "dropout": 0.20, "lr": 1e-3},
]

# -----------------------
# 0) Prep & Target label
# -----------------------
# Pick ONE of the two options below.

# --- Option 1: you already have a 'date' column ---
if "date" in df.columns:
    df = df.sort_values("date").copy()
    df["date"] = pd.to_datetime(df["date"])
    df = df.set_index("date")
else:
    # --- Option 2: no date column, you know the start date and weekly freq ---
    # Adjust the start date to your dataset’s true first week
    start = pd.Timestamp("1970-01-04")  # e.g., first Sunday of 1970; change if needed
    df = df.sort_index().copy()
    df.index = pd.date_range(start=start, periods=len(df), freq="W-SUN")

print("Index dtype:", df.index.dtype)
print("Range:", df.index.min(), "→", df.index.max())
if not np.issubdtype(df.index.dtype, np.datetime64):
    df.index = pd.to_datetime(df.index)

# Your original features (adjust if you wish)
FEATURES = ["Last Price","ret_1","p_state0_h1","p_state1_h1","p_state2_h1"]

if "target" not in df.columns:
    next_ret = df["ret_1"].shift(-1)
    df["target"] = np.where(next_ret >  NEUTRAL_BAND,  1,
                    np.where(next_ret < -NEUTRAL_BAND, -1, 0))

# Keep rows usable (for stability across folds we drop NaNs upfront)
df = df.dropna(subset=FEATURES + ["target"]).copy()

# Split into trainval (<=2022-12-31) and test (>=2023-01-01)
mask_trainval = df.index <= TRAINVAL_END
mask_test     = df.index >= TEST_START

df_trainval = df.loc[mask_trainval].copy()
df_test     = df.loc[mask_test].copy()

print(f"Train/Val span: {df_trainval.index.min().date()} .. {df_trainval.index.max().date()} | n={len(df_trainval)}")
print(f"Test span:      {df_test.index.min().date() if not df_test.empty else 'N/A'} .. {df_test.index.max().date() if not df_test.empty else 'N/A'} | n={len(df_test)}")

# -----------------------
# 1) Helpers
# -----------------------
to_idx   = {-1:0, 0:1, 1:2}
from_idx = {0:-1, 1:0, 2:1}

def make_sequences(block_df, features, lookback=16):
    X_list, y_list = [], []
    X_src = block_df[features].values
    y_src = block_df["target"].values
    for t in range(lookback, len(block_df)):
        X_list.append(X_src[t-lookback:t, :])
        y_list.append(y_src[t])
    return np.array(X_list, dtype=np.float32), np.array(y_list, dtype=np.int64)

def build_model(input_dim, units=32, dense=16, dropout=0.1, lr=1e-3):
    tf.random.set_seed(42); np.random.seed(42)
    model = models.Sequential([
        layers.Input(shape=(LOOKBACK, input_dim)),
        layers.LSTM(units, dropout=dropout, recurrent_dropout=0.0),
        layers.Dense(dense, activation="relu"),
        layers.Dense(3, activation="softmax")
    ])
    model.compile(
        optimizer=tf.keras.optimizers.Adam(lr),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"]
    )
    return model

def fit_one_split(d_tr, d_va, features, hparams):
    # Scale on TRAIN only
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaler.fit(d_tr[features])
    dtr = d_tr.copy(); dva = d_va.copy()
    dtr[features] = scaler.transform(dtr[features])
    dva[features] = scaler.transform(dva[features])

    # Sequences
    X_tr, y_tr = make_sequences(dtr, features, LOOKBACK)
    X_va, y_va = make_sequences(dva, features, LOOKBACK)

    # Label mapping
    y_tr_i = np.vectorize(to_idx.get)(y_tr)
    y_va_i = np.vectorize(to_idx.get)(y_va)

    # Optional oversampling of neutral (index=1)
    X_train_fin, y_train_fin = X_tr, y_tr_i
    if USE_OVERSAMPLING:
        idx_m1 = np.where(y_tr_i == 0)[0]
        idx_0  = np.where(y_tr_i == 1)[0]
        idx_p1 = np.where(y_tr_i == 2)[0]
        target_0 = max(len(idx_m1), len(idx_p1))
        if len(idx_0) > 0 and target_0 > len(idx_0):
            add = np.random.choice(idx_0, size=target_0 - len(idx_0), replace=True)
            X_train_fin = np.concatenate([X_tr, X_tr[add]], axis=0)
            y_train_fin = np.concatenate([y_tr_i, y_tr_i[add]], axis=0)
            perm = np.random.permutation(len(y_train_fin))
            X_train_fin, y_train_fin = X_train_fin[perm], y_train_fin[perm]

    # Class weights
    classes = np.array([0,1,2])
    cw_vec = compute_class_weight(class_weight="balanced", classes=classes, y=y_train_fin)
    class_weights = {int(c): float(w) for c, w in zip(classes, cw_vec)}

    # Build & fit
    model = build_model(len(features), **hparams)
    es = callbacks.EarlyStopping(monitor="val_loss", patience=7, restore_best_weights=True)
    model.fit(
        X_train_fin, y_train_fin,
        validation_data=(X_va, y_va_i),
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        callbacks=[es],
        verbose=0,
        class_weight=class_weights
    )

    # Validate
    probs = model.predict(X_va, verbose=0)
    yhat_i = probs.argmax(axis=1)
    yhat   = np.vectorize(from_idx.get)(yhat_i)

    acc = accuracy_score(y_va, yhat)
    f1m = f1_score(y_va, yhat, labels=[-1,0,1], average="macro")
    rep = classification_report(y_va, yhat, digits=3)
    cm  = confusion_matrix(y_va, yhat, labels=[-1,0,1])

    return {
        "model": model,
        "scaler": scaler,
        "val_acc": acc,
        "val_f1m": f1m,
        "val_report": rep,
        "val_cm": cm,
        "pred_classes": set(np.unique(yhat))
    }

def expanding_cv(df_tv, features, hparams):
    """Return per-fold metrics and mean scores for these hparams."""
    n = len(df_tv)
    idx = np.arange(n)
    # fold k: train [0:train_end), val [train_end:val_end)
    train_end = CV_FIRST_TRAIN
    folds = []
    while True:
        val_end = train_end + CV_VAL_SPAN
        if val_end > n:
            break
        d_tr = df_tv.iloc[:train_end]
        d_va = df_tv.iloc[train_end:val_end]
        res  = fit_one_split(d_tr, d_va, features, hparams)

        if REQUIRE_ALL_CLASSES_VAL and res["pred_classes"] != ALL_CLASSES_SET:
            folds.append({**res, "skip_reason": f"missing classes {sorted(list(ALL_CLASSES_SET - res['pred_classes']))}"})
        else:
            folds.append(res)

        # expand train, slide val
        train_end = val_end if CV_STEP is None else (train_end + CV_STEP)

        # Stop if next val_end would exceed
        if train_end + CV_VAL_SPAN > n:
            break

    # Aggregate (only non-skipped folds)
    valid_folds = [f for f in folds if "skip_reason" not in f]
    if len(valid_folds) == 0:
        return {"folds": folds, "mean_val_f1m": -np.inf, "mean_val_acc": -np.inf}

    mean_f1 = float(np.mean([f["val_f1m"] for f in valid_folds]))
    mean_acc = float(np.mean([f["val_acc"] for f in valid_folds]))
    return {"folds": folds, "mean_val_f1m": mean_f1, "mean_val_acc": mean_acc}

# -----------------------
# 2) Run CV over grid
# -----------------------
cv_results = []
for hp in HPARAM_GRID:
    cv = expanding_cv(df_trainval, FEATURES, hp)
    cv_results.append({"hparams": hp, **cv})
    print(f"HP {hp} -> mean Val F1={cv['mean_val_f1m']:.3f}, mean Val Acc={cv['mean_val_acc']:.3f} | folds={len(cv['folds'])}")

# Pick best by mean F1 (then Acc)
cv_sorted = sorted(cv_results, key=lambda d: (d["mean_val_f1m"], d["mean_val_acc"]), reverse=True)
best_cv = cv_sorted[0]
print("\n=== BEST HPARAMS FROM CV ===")
print(best_cv["hparams"])
print(f"Mean Val F1={best_cv['mean_val_f1m']:.3f}, Mean Val Acc={best_cv['mean_val_acc']:.3f}")

# Optional: show per-fold summary for best
for i, f in enumerate(best_cv["folds"], 1):
    if "skip_reason" in f:
        print(f"Fold {i}: SKIPPED ({f['skip_reason']})")
    else:
        print(f"Fold {i}: Val F1={f['val_f1m']:.3f}, Val Acc={f['val_acc']:.3f}, pred_classes={sorted(list(f['pred_classes']))}")

# -----------------------
# 3) Retrain on ALL train+val (<=2022-12-31) with best hparams
# -----------------------
best_hp = best_cv["hparams"]
# Scale on full trainval
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler.fit(df_trainval[FEATURES])
dtrv = df_trainval.copy()
dte  = df_test.copy()
dtrv[FEATURES] = scaler.transform(dtrv[FEATURES])
if not dte.empty:
    dte[FEATURES] = scaler.transform(dte[FEATURES])

# Sequences
X_trv, y_trv = make_sequences(dtrv, FEATURES, LOOKBACK)
X_te,  y_te  = (np.empty((0, LOOKBACK, len(FEATURES)), dtype=np.float32), np.array([], dtype=np.int64))
if not df_test.empty:
    X_te, y_te = make_sequences(dte, FEATURES, LOOKBACK)

# Label mapping
y_trv_i = np.vectorize(to_idx.get)(y_trv)
y_te_i  = np.vectorize(to_idx.get)(y_te) if len(y_te) else y_te

# Optional oversampling on trainval
X_train_fin, y_train_fin = X_trv, y_trv_i
if USE_OVERSAMPLING:
    idx_m1 = np.where(y_trv_i == 0)[0]
    idx_0  = np.where(y_trv_i == 1)[0]
    idx_p1 = np.where(y_trv_i == 2)[0]
    target_0 = max(len(idx_m1), len(idx_p1))
    if len(idx_0) > 0 and target_0 > len(idx_0):
        add = np.random.choice(idx_0, size=target_0 - len(idx_0), replace=True)
        X_train_fin = np.concatenate([X_trv, X_trv[add]], axis=0)
        y_train_fin = np.concatenate([y_trv_i, y_trv_i[add]], axis=0)
        perm = np.random.permutation(len(y_train_fin))
        X_train_fin, y_train_fin = X_train_fin[perm], y_train_fin[perm]

# Class weights on full trainval
classes = np.array([0,1,2])
cw_vec = compute_class_weight(class_weight="balanced", classes=classes, y=y_train_fin)
class_weights = {int(c): float(w) for c, w in zip(classes, cw_vec)}

# Train final model
final_model = build_model(len(FEATURES), **best_hp)
es = callbacks.EarlyStopping(monitor="val_loss", patience=7, restore_best_weights=True)

# Hold out a tiny slice of trainval tail for monitoring to keep ES meaningful
val_tail = max(LOOKBACK + 50, 300)  # heuristic
split = max(len(X_train_fin) - val_tail, LOOKBACK + 10)
X_tr_final, y_tr_final = X_train_fin[:split], y_train_fin[:split]
X_va_final, y_va_final = X_train_fin[split:], y_train_fin[split:]

final_model.fit(
    X_tr_final, y_tr_final,
    validation_data=(X_va_final, y_va_final) if len(X_va_final) else None,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=[es],
    verbose=0,
    class_weight=class_weights
)

# -----------------------
# 4) Test evaluation (2023-01-01 .. end)
# -----------------------
if len(y_te):
    probs = final_model.predict(X_te, verbose=0)
    yhat_i = probs.argmax(axis=1)
    yhat   = np.vectorize(from_idx.get)(yhat_i)

    test_acc = accuracy_score(y_te, yhat)
    test_f1m = f1_score(y_te, yhat, labels=[-1,0,1], average="macro")
    rep      = classification_report(y_te, yhat, digits=3)
    cm       = confusion_matrix(y_te, yhat, labels=[-1,0,1])
    predc    = sorted(list(np.unique(yhat)))

    print("\n=========== FINAL TEST RESULTS (2023–) ===========")
    print(f"Acc={test_acc:.3f}, Macro-F1={test_f1m:.3f}, Predicted classes={predc}")
    print("\nClassification report:\n", rep)
    print("Confusion matrix (rows=true, cols=pred [-1,0,1]):\n", cm)
else:
    print("\nNo test samples available after sequencing. Check TEST_START or data length.")


Index dtype: datetime64[ns]
Range: 1992-01-12 00:00:00 → 2025-07-20 00:00:00
Train/Val span: 1992-01-12 .. 2022-12-25 | n=1614
Test span:      2023-01-01 .. 2025-07-20 | n=134


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


HP {'units': 32, 'dense': 16, 'dropout': 0.1, 'lr': 0.001} -> mean Val F1=0.334, mean Val Acc=0.412 | folds=7


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


HP {'units': 64, 'dense': 16, 'dropout': 0.1, 'lr': 0.001} -> mean Val F1=0.303, mean Val Acc=0.407 | folds=7
HP {'units': 64, 'dense': 32, 'dropout': 0.2, 'lr': 0.001} -> mean Val F1=0.330, mean Val Acc=0.391 | folds=7

=== BEST HPARAMS FROM CV ===
{'units': 32, 'dense': 16, 'dropout': 0.1, 'lr': 0.001}
Mean Val F1=0.334, Mean Val Acc=0.412
Fold 1: Val F1=0.293, Val Acc=0.313, pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]
Fold 2: Val F1=0.405, Val Acc=0.455, pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]
Fold 3: Val F1=0.411, Val Acc=0.493, pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]
Fold 4: Val F1=0.281, Val Acc=0.448, pred_classes=[np.int64(-1), np.int64(1)]
Fold 5: Val F1=0.250, Val Acc=0.291, pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]
Fold 6: Val F1=0.366, Val Acc=0.433, pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]
Fold 7: Val F1=0.329, Val Acc=0.448, pred_classes=[np.int64(-1), np.int64(0), np.int64(1)]

Acc=0.314, Macro-F1=0.283, Pred