In [47]:
import numpy as np
import pandas as pd
from xgboost import XGBClassifier
from sklearn.metrics import (
    f1_score, roc_auc_score, average_precision_score, precision_recall_curve,
    accuracy_score, classification_report
)


In [48]:

# ------------------- helpers -------------------
def purged_splits(n_samples, n_splits=5, embargo=50):
    fold_sizes = np.full(n_splits, n_samples // n_splits, dtype=int)
    fold_sizes[: n_samples % n_splits] += 1
    starts = np.cumsum(fold_sizes) - fold_sizes
    for i in range(n_splits):
        test_start = starts[i]
        test_end   = test_start + fold_sizes[i]
        train_end  = max(0, test_start - embargo)
        train_idx  = np.arange(0, train_end)
        test_idx   = np.arange(test_start, test_end)
        if len(train_idx) == 0:
            continue
        yield train_idx, test_idx

def coerce_numeric_df(X):
    X = X.copy()
    for c in X.columns:
        if not np.issubdtype(X[c].dtype, np.number):
            X[c] = pd.to_numeric(X[c], errors="coerce")
    return X.replace([np.inf, -np.inf], np.nan).fillna(0.0)



In [None]:
df = pd.read_csv('clean_dataset.csv')
df.columns

# ------------------- 1) build 3 targets (no leakage) -------------------
df = df.copy()

H = 20  # lookahead horizon
coin_specs = {
    "btc": "bitcoin_return",
    "eth": "ethereum_return",
    "ltc": "litecoin_return",
}

for c, ret_col in coin_specs.items():
    fut = df[ret_col].shift(-H).rolling(H).sum()
    df[f"y_{c}"] = (fut > 0).astype(int)



In [50]:
# ------------------- 2) make lagged features -------------------
# Use all columns except the labels as features, then lag by +1 so features are strictly past
label_cols = [f"y_{c}" for c in coin_specs.keys()]
X_lagged = df.drop(columns=label_cols, errors="ignore").shift(1)

# Build final modeling frame and drop NaNs from rolling/shift
data = pd.concat([X_lagged, df[label_cols]], axis=1).dropna().reset_index(drop=True)
X = coerce_numeric_df(data.drop(columns=label_cols))

y_dict = {c: data[f"y_{c}"].astype(int).values for c in coin_specs.keys()}



In [51]:
# ------------------- 3) train 3 models with walk-forward + embargo -------------------
results = {}
models  = {}
thresholds = {}

for coin in ["btc", "eth", "ltc"]:
    y = y_dict[coin]
    n = len(X)

    oof_proba = np.zeros(n, dtype=float)
    oof_pred  = np.zeros(n, dtype=int)

    for tr_idx, te_idx in purged_splits(n, n_splits=5, embargo=50):
        # validation is the tail of the train slice
        val_portion = max(1, int(0.2 * len(tr_idx)))
        if len(tr_idx) <= val_portion:
            continue
        val_idx = tr_idx[-val_portion:]
        tr_core = tr_idx[:-val_portion]

        X_tr, y_tr = X.iloc[tr_core], y[tr_core]
        X_val, y_val = X.iloc[val_idx], y[val_idx]
        X_te,  y_te  = X.iloc[te_idx],  y[te_idx]

        pos = (y_tr == 1).sum(); neg = (y_tr == 0).sum()
        spw = max(1.0, neg / max(1, pos))

        clf = XGBClassifier(
            tree_method="hist",
            max_depth=6,
            learning_rate=0.05,
            n_estimators=2000,   # early stop governs
            subsample=0.9,
            colsample_bytree=0.9,
            min_child_weight=3,
            gamma=1.0,
            reg_lambda=2.0,
            scale_pos_weight=spw,
            eval_metric="auc",
            random_state=42
        )

        clf.fit(
            X_tr, y_tr,
            eval_set=[(X_tr, y_tr), (X_val, y_val)],
            verbose=False
            # early_stopping_rounds=100
        )

        # choose threshold on the validation tail (skip if single-class val)
        proba_val = clf.predict_proba(X_val)[:, 1]
        best_t = 0.5
        if y_val.min() != y_val.max():
            prec, rec, th = precision_recall_curve(y_val, proba_val)
            best_f1 = -1.0
            for t in th:
                f1 = f1_score(y_val, (proba_val >= t).astype(int))
                if f1 > best_f1:
                    best_f1, best_t = f1, t

        proba_te = clf.predict_proba(X_te)[:, 1]
        oof_proba[te_idx] = proba_te
        oof_pred[te_idx]  = (proba_te >= best_t).astype(int)

    # store per-coin metrics
    acc = accuracy_score(y, oof_pred)
    auc = roc_auc_score(y, oof_proba)
    ap  = average_precision_score(y, oof_proba)
    rep = classification_report(y, oof_pred, digits=3)

    results[coin] = {"accuracy": acc, "roc_auc": auc, "avg_precision": ap, "report": rep}
    thresholds[coin] = "per-fold (selected on validation)"  # note: threshold varies by fold



  score: str = model.eval_set(evals, epoch, self.metric, self._output_margin)
  score: str = model.eval_set(evals, epoch, self.metric, self._output_margin)


In [52]:
# ------------------- 4) show results -------------------
for coin in ["btc", "eth", "ltc"]:
    print(f"\n=== {coin.upper()} ===")
    print(f"OOF Accuracy: {results[coin]['accuracy']:.3f}")
    print(f"OOF ROC-AUC : {results[coin]['roc_auc']:.3f}")
    print(f"OOF AP      : {results[coin]['avg_precision']:.3f}")
    print(results[coin]["report"])



=== BTC ===
OOF Accuracy: 0.611
OOF ROC-AUC : 0.622
OOF AP      : 0.511
              precision    recall  f1-score   support

           0      0.771     0.423     0.546       383
           1      0.541     0.844     0.659       308

    accuracy                          0.611       691
   macro avg      0.656     0.634     0.603       691
weighted avg      0.669     0.611     0.597       691


=== ETH ===
OOF Accuracy: 0.621
OOF ROC-AUC : 0.675
OOF AP      : 0.557
              precision    recall  f1-score   support

           0      0.812     0.438     0.569       395
           1      0.536     0.865     0.661       296

    accuracy                          0.621       691
   macro avg      0.674     0.651     0.615       691
weighted avg      0.694     0.621     0.609       691


=== LTC ===
OOF Accuracy: 0.580
OOF ROC-AUC : 0.557
OOF AP      : 0.571
              precision    recall  f1-score   support

           0      0.615     0.324     0.425       330
           1      