In [None]:
# ============ IMPORTS ============
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import mlflow
import mlflow.sklearn
from mlflow.models import infer_signature
import quantstats as qs
import numpy as np
from src.calculate_ml_metrics import calculate_ml_metrics
from src.calculate_trade_metrics import calculate_trade_metrics

# ============ LOAD LABELLED DATA ============
labelled_df = pd.read_csv("data_cache/labelled_df.csv", index_col=0, parse_dates=True)

# ============ SPLIT DATA ============
split_idx = int(len(labelled_df) * 0.8)
labelled_df["Set"] = "Train"
labelled_df.loc[labelled_df.index[split_idx:], "Set"] = "Test"

# Define X, y
target_col = "Label_7day"
drop_cols = [target_col, "Close","Barrier_Hit_Day","Near_Peak","Actual_Return_7day","Set"]  # keep only features

feature_cols = [col for col in labelled_df.columns if col not in drop_cols]

X_train = labelled_df.loc[labelled_df["Set"] == "Train", feature_cols]
y_train = labelled_df.loc[labelled_df["Set"] == "Train", target_col]
X_test = labelled_df.loc[labelled_df["Set"] == "Test", feature_cols]
y_test = labelled_df.loc[labelled_df["Set"] == "Test", target_col]

In [None]:
# =========== MLFLOW SETUP ============
mlflow.set_experiment("triple_barrier_classification")
mlflow.sklearn.autolog()  # auto-log params, metrics, model, etc.

# =========== INITIALISE EVALUATION LIST
trainingEval = []

# =========== DEFINE MODELS ============
models = {
    "log_reg": LogisticRegression(max_iter=1000, random_state=42, class_weight="balanced"),
    "random_forest": RandomForestClassifier(n_estimators=200, max_depth=8, random_state=42, class_weight="balanced"),
    "svm_rbf": SVC(kernel="rbf", probability=True, random_state=42, class_weight="balanced"),
}


# Get actual returns for BOTH train and test sets
actual_returns_train = labelled_df.loc[X_train.index, "Actual_Return_7day"]
actual_returns_test = labelled_df.loc[X_test.index, "Actual_Return_7day"]

# Initialise prediction dataframes
predictions_df_train = pd.DataFrame(index=X_train.index)
predictions_df_test = pd.DataFrame(index=X_test.index)


# ============ TRAIN MODELS ============
for name, model in models.items():
    with mlflow.start_run(run_name=name):
        # Train model
        model.fit(X_train, y_train)
        
# ============ GENERATE & STORE PREDICTIONS ============
        # Train predictions
        train_pred = model.predict(X_train)
        train_proba = model.predict_proba(X_train)
        predictions_df_train[f'pred_{name}'] = train_pred
        predictions_df_train[f'proba_{name}'] = list(train_proba)  # Store as list of arrays
        
        # Test predictions
        test_pred = model.predict(X_test)
        test_proba = model.predict_proba(X_test)
        predictions_df_test[f'pred_{name}'] = test_pred
        predictions_df_test[f'proba_{name}'] = list(test_proba)
        
        # Log the trained model with signature
        signature = infer_signature(X_train, train_pred)
        # defines the expected input and output formats for ML model, for reliability
        
        model_info = mlflow.sklearn.log_model(
            model, 
            artifact_path="model",
            signature=signature
        )
        
# ============ EVALUATE MODELS USING STORED PREDICTIONS ============
        # ML metrics (sklearn - consistent naming)
        train_ml_metrics = calculate_ml_metrics(
            y_train, 
            predictions_df_train[f'pred_{name}'], 
            predictions_df_train[f'proba_{name}'], 
            "train"
        )
        test_ml_metrics = calculate_ml_metrics(
            y_test, 
            predictions_df_test[f'pred_{name}'], 
            predictions_df_test[f'proba_{name}'], 
            "test"
        )
        
        # Trading metrics (QuantStats)
        train_trade_metrics = calculate_trade_metrics(
            y_train, 
            predictions_df_train[f'pred_{name}'], 
            actual_returns_train, 
            "train"
        )
        test_trade_metrics = calculate_trade_metrics(
            y_test, 
            predictions_df_test[f'pred_{name}'], 
            actual_returns_test, 
            "test"
        )

# ============ LOG ALL METRICS TO MLFLOW ============
        all_metrics = {
            **train_ml_metrics, 
            **test_ml_metrics,
            **train_trade_metrics, 
            **test_trade_metrics
        }
        
        for metric_name, metric_value in all_metrics.items():
            if metric_value is not None:
                mlflow.log_metric(metric_name, metric_value)
        
# ============ MLFLOW EVALUATE (for plots only) ============    
        # Create evaluation dataset (X_test + targets)
        eval_data = X_test.copy()
        eval_data["Label_7day"] = y_test
        
        # Evaluate with MLflow - generates all metrics & plots automatically
        eval_result = mlflow.models.evaluate(
            model=model_info.model_uri,
            data=eval_data,
            targets="Label_7day",
            model_type="classifier",
            evaluator_config={
                "explainability_algorithm": None,  # Disable SHAP
                "log_model_explainability": False  # Don't log explainability artifacts
            }
        )
        
# ============ FULL COMPARISON TABLE ============
        trainingEval.append({
            "Model": name,
            # ML Metrics - Train
            "Train Acc": f"{train_ml_metrics['train_accuracy']:.3f}",
            "Train Prec": f"{train_ml_metrics['train_precision']:.3f}",
            "Train Rec": f"{train_ml_metrics['train_recall']:.3f}",
            "Train F1": f"{train_ml_metrics['train_f1_score']:.3f}",
            "Train ROC-AUC": f"{train_ml_metrics.get('train_roc_auc', 0):.3f}",
            # ML Metrics - Test
            "Test Acc": f"{test_ml_metrics['test_accuracy']:.3f}",
            "Test Prec": f"{test_ml_metrics['test_precision']:.3f}",
            "Test Rec": f"{test_ml_metrics['test_recall']:.3f}",
            "Test F1": f"{test_ml_metrics['test_f1_score']:.3f}",
            "Test ROC-AUC": f"{test_ml_metrics.get('test_roc_auc', 0):.3f}",
            # Trading Metrics - Train
            "Train WR": f"{train_trade_metrics['train_win_rate']:.3%}",
            "Train PF": f"{train_trade_metrics['train_profit_factor']:.3f}",
            "Train Avg Win%": f"{train_trade_metrics['train_avg_win_pct']:.3f}",
            "Train Avg Loss%": f"{train_trade_metrics['train_avg_loss_pct']:.3f}",
            "Train Max DD%": f"{train_trade_metrics['train_max_drawdown']:.3%}",
            "Train Sharpe": f"{train_trade_metrics['train_sharpe_ratio']:.3f}",
            "Train EV": f"{train_trade_metrics['train_expected_value']:.3f}",
            "Train Avg Loss Wrong%": f"{train_trade_metrics['train_avg_loss_when_wrong_pct']:.3f}",
            # Trading Metrics - Test
            "Test WR": f"{test_trade_metrics['test_win_rate']:.3%}",
            "Test PF": f"{test_trade_metrics['test_profit_factor']:.3f}",
            "Test Avg Win%": f"{test_trade_metrics['test_avg_win_pct']:.3f}",
            "Test Avg Loss%": f"{test_trade_metrics['test_avg_loss_pct']:.3f}",
            "Test Max DD%": f"{test_trade_metrics['test_max_drawdown']:.3%}",
            "Test Sharpe": f"{test_trade_metrics['test_sharpe_ratio']:.3f}",
            "Test EV": f"{test_trade_metrics['test_expected_value']:.3f}",
            "Test Avg Loss Wrong%": f"{test_trade_metrics['test_avg_loss_when_wrong_pct']:.3f}",
        })
        
# ============ DISPLAY RESULTS ============
trainingEval = pd.DataFrame(trainingEval)
print(trainingEval.to_markdown(index=False))

# ============ SAVE PREDICTIONS ============
# predictions_df_train.to_csv("data_cache/predictions_train.csv")
# predictions_df_test.to_csv("data_cache/predictions_test.csv")