In [1]:
import numpy as np
import pandas as pd
from xgboost import XGBClassifier
from sklearn.metrics import (
    f1_score, roc_auc_score, average_precision_score, precision_recall_curve,
    accuracy_score, classification_report
)


In [2]:

# ------------------- helpers -------------------
def purged_splits(n_samples, n_splits=5, embargo=50):
    fold_sizes = np.full(n_splits, n_samples // n_splits, dtype=int)
    fold_sizes[: n_samples % n_splits] += 1
    starts = np.cumsum(fold_sizes) - fold_sizes
    for i in range(n_splits):
        test_start = starts[i]
        test_end   = test_start + fold_sizes[i]
        train_end  = max(0, test_start - embargo)
        train_idx  = np.arange(0, train_end)
        test_idx   = np.arange(test_start, test_end)
        if len(train_idx) == 0:
            continue
        yield train_idx, test_idx

def coerce_numeric_df(X):
    X = X.copy()
    for c in X.columns:
        if not np.issubdtype(X[c].dtype, np.number):
            X[c] = pd.to_numeric(X[c], errors="coerce")
    return X.replace([np.inf, -np.inf], np.nan).fillna(0.0)



In [4]:
df = pd.read_csv('data/clean_dataset.csv')
df.columns

# ------------------- 1) build 3 targets (no leakage) -------------------
df = df.copy()

H = 20  # lookahead horizon
coin_specs = {
    "btc": "bitcoin_return",
    "eth": "ethereum_return",
    "ltc": "litecoin_return",
}

for c, ret_col in coin_specs.items():
    fut = df[ret_col].shift(-H).rolling(H).sum()
    df[f"y_{c}"] = (fut > 0).astype(int)



In [5]:
# ------------------- 2) make lagged features -------------------
# Use all columns except the labels as features, then lag by +1 so features are strictly past
label_cols = [f"y_{c}" for c in coin_specs.keys()]
X_lagged = df.drop(columns=label_cols, errors="ignore").shift(1)

# Build final modeling frame and drop NaNs from rolling/shift
data = pd.concat([X_lagged, df[label_cols]], axis=1).dropna().reset_index(drop=True)
X = coerce_numeric_df(data.drop(columns=label_cols))

y_dict = {c: data[f"y_{c}"].astype(int).values for c in coin_specs.keys()}



In [17]:
# ------------------- 3) train 3 models with walk-forward + embargo -------------------
results = {}
models  = {}
thresholds = {}

for coin in ["btc", "eth", "ltc"]:
    y = y_dict[coin]
    n = len(X)

    oof_proba = np.zeros(n, dtype=float)
    oof_pred  = np.zeros(n, dtype=int)

    for tr_idx, te_idx in purged_splits(n, n_splits=5, embargo=50):
        # validation is the tail of the train slice
        val_portion = max(1, int(0.2 * len(tr_idx)))
        if len(tr_idx) <= val_portion:
            continue
        val_idx = tr_idx[-val_portion:]
        tr_core = tr_idx[:-val_portion]

        X_tr, y_tr = X.iloc[tr_core], y[tr_core]
        X_val, y_val = X.iloc[val_idx], y[val_idx]
        X_te,  y_te  = X.iloc[te_idx],  y[te_idx]

        pos = (y_tr == 1).sum(); neg = (y_tr == 0).sum()
        spw = max(1.0, neg / max(1, pos))

        clf = XGBClassifier(
            tree_method="hist",
            max_depth=6,
            learning_rate=0.05,
            n_estimators=2000,   # early stop governs
            subsample=0.9,
            colsample_bytree=0.9,
            min_child_weight=3,
            gamma=1.0,
            reg_lambda=2.0,
            scale_pos_weight=spw,
            eval_metric="auc",
            random_state=42
        )

        clf.fit(
            X_tr, y_tr,
            eval_set=[(X_tr, y_tr), (X_val, y_val)],
            verbose=False
            # early_stopping_rounds=100
        )

        # choose threshold on the validation tail (skip if single-class val)
        proba_val = clf.predict_proba(X_val)[:, 1]
        best_t = 0.5
        if y_val.min() != y_val.max():
            prec, rec, th = precision_recall_curve(y_val, proba_val)
            best_f1 = -1.0
            for t in th:
                f1 = f1_score(y_val, (proba_val >= t).astype(int))
                if f1 > best_f1:
                    best_f1, best_t = f1, t

        proba_te = clf.predict_proba(X_te)[:, 1]
        oof_proba[te_idx] = proba_te
        oof_pred[te_idx]  = (proba_te >= best_t).astype(int)
    # choose a single global threshold from OOF predictions
    # make sure oof_proba has been filled for all test folds
    try:
        prec, rec, th = precision_recall_curve(y, oof_proba)
        # default if degenerate
        best_t = 0.5
        best_f1 = -1.0
        for t in th:
            f1 = f1_score(y, (oof_proba >= t).astype(int))
            if f1 > best_f1:
                best_f1, best_t = f1, t
    except Exception:
        best_t = 0.5

    # fit one final model per coin on ALL data
    # you can keep the same hyperparams; optionally do early stopping on a tail split
    val_portion = max(1, int(0.2 * len(X)))
    val_idx = np.arange(len(X) - val_portion, len(X))
    tr_idx = np.arange(0, len(X) - val_portion)

    X_tr, y_tr = X.iloc[tr_idx], y[tr_idx]
    X_val, y_val = X.iloc[val_idx], y[val_idx]

    pos = (y_tr == 1).sum(); neg = (y_tr == 0).sum()
    spw = max(1.0, neg / max(1, pos))

    final_clf = XGBClassifier(
        tree_method="hist",
        max_depth=6,
        learning_rate=0.05,
        n_estimators=2000,     # use early stopping
        subsample=0.9,
        colsample_bytree=0.9,
        min_child_weight=3,
        gamma=1.0,
        reg_lambda=2.0,
        scale_pos_weight=spw,
        eval_metric="auc",
        random_state=42
    )

    final_clf.fit(
        X_tr, y_tr,
        eval_set=[(X_tr, y_tr), (X_val, y_val)],
        verbose=False
        # early_stopping_rounds=100
    )

    models[coin] = final_clf
    thresholds[coin] = float(best_t)

    # store per-coin metrics (you already had this)
    acc = accuracy_score(y, oof_pred)
    auc = roc_auc_score(y, oof_proba)
    ap  = average_precision_score(y, oof_proba)
    rep = classification_report(y, oof_pred, digits=3)

    results[coin] = {"accuracy": acc, "roc_auc": auc, "avg_precision": ap, "report": rep}

print(results)
    

  score: str = model.eval_set(evals, epoch, self.metric, self._output_margin)
  score: str = model.eval_set(evals, epoch, self.metric, self._output_margin)


{'btc': {'accuracy': 0.6107091172214182, 'roc_auc': 0.6223296938048897, 'avg_precision': 0.5114024987231941, 'report': '              precision    recall  f1-score   support\n\n           0      0.771     0.423     0.546       383\n           1      0.541     0.844     0.659       308\n\n    accuracy                          0.611       691\n   macro avg      0.656     0.634     0.603       691\nweighted avg      0.669     0.611     0.597       691\n'}, 'eth': {'accuracy': 0.6208393632416788, 'roc_auc': 0.6751154635648307, 'avg_precision': 0.5566833968826717, 'report': '              precision    recall  f1-score   support\n\n           0      0.812     0.438     0.569       395\n           1      0.536     0.865     0.661       296\n\n    accuracy                          0.621       691\n   macro avg      0.674     0.651     0.615       691\nweighted avg      0.694     0.621     0.609       691\n'}, 'ltc': {'accuracy': 0.5803183791606368, 'roc_auc': 0.5568874338957441, 'avg_precision

In [18]:
import joblib, os
os.makedirs("models", exist_ok=True)

joblib.dump(models["btc"], "models/btc_xgb.joblib")
joblib.dump(models["eth"], "models/eth_xgb.joblib")
joblib.dump(models["ltc"], "models/ltc_xgb.joblib")

joblib.dump(list(X.columns), "models/feature_columns.joblib")
joblib.dump(thresholds, "models/thresholds.joblib")
joblib.dump(results, "models/training_results.joblib")


['models/training_results.joblib']