In [None]:
# ============ IMPORTS ============
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
import numpy as np
import mlflow
import mlflow.sklearn
from mlflow.models import infer_signature
import quantstats as qs
from src.calculate_ml_metrics import calculate_ml_metrics
from src.calculate_trade_metrics import calculate_trade_metrics

# =========== CONFIGURATION ============
target_col = "Label_7day"
# ======================================

# ============ LOAD LABELLED DATA ============
# ============ LOAD LABELLED DATA ============
labelled_data_cache = pd.read_pickle('data_cache/labelled_data.pkl')
predictions_df = labelled_data_cache['labelled_df']
X_train = labelled_data_cache['X_train']
X_test = labelled_data_cache['X_test']
y_train = labelled_data_cache['y_train']
y_test = labelled_data_cache['y_test']

In [None]:
# =========== MLFLOW SETUP ============
experiment  = mlflow.get_experiment_by_name("triple_barrier_classification")
experiment_id = experiment.experiment_id

# =========== LIST PREVIOUS RUNS ============
runs = mlflow.search_runs(experiment_ids=[experiment.experiment_id])
runs = runs.sort_values("start_time", ascending=False)
runs = runs[runs["status"] == "FINISHED"]  # Filter for finished runs

# ========== LOAD LATEST MODELS ==========
model_names = ['log_reg', 'random_forest', 'svm_rbf']
latest_run_df = runs.groupby('tags.mlflow.runName').first()

In [None]:
# =========== PREDICTIONS ============
for model_name in model_names:
    model = mlflow.sklearn.load_model(f"runs:/{latest_run_df.loc[model_name, 'run_id']}/model")
    predictions_df[f'pred_{model_name}'] = model.predict(predictions_df[feature_cols])  # Predict on full dataset
    # Store per-row probability arrays as lists so they can be stacked later
    predictions_df[f'proba_{model_name}'] = list(model.predict_proba(predictions_df[feature_cols]))  # Get probabilities

# predictions_df.to_csv('data_cache/predictions_df.csv', index=True)

In [None]:
# =========== EVALUATION ============
ml_metrics = []

for model_name in model_names:
    train_pred = predictions_df.loc[predictions_df["Set"] == "Train", f'pred_{model_name}']
    train_proba = np.vstack(predictions_df.loc[predictions_df["Set"] == "Train", f'proba_{model_name}'])
    test_pred = predictions_df.loc[predictions_df["Set"] == "Test", f'pred_{model_name}']
    test_proba = np.vstack(predictions_df.loc[predictions_df["Set"] == "Test", f'proba_{model_name}'])
    
    train_ml_metrics_dict = calculate_ml_metrics(y_train, train_pred, train_proba, "train")
    test_ml_metrics_dict = calculate_ml_metrics(y_test, test_pred, test_proba, "test")
    ml_metrics_dict = {"Model": model_name, **train_ml_metrics_dict, **test_ml_metrics_dict}
    ml_metrics.append(ml_metrics_dict)

# ============ STORE RESULTS ============
ml_metrics = pd.DataFrame(ml_metrics)
ml_metrics.to_csv('data_cache/ml_metrics.csv', index=False)

In [None]:
# ============ TRADING METRICS ============
trade_metrics = []
model_trade_metrics_dict = {}

# Get actual returns for train and test set
actual_returns_train = predictions_df.loc[X_train.index, "Actual_Return_7day"]
actual_returns_test = predictions_df.loc[X_test.index, "Actual_Return_7day"]

# Filter to only traded signals (where model predicted -1 or 1)
for model_name in model_names:
    train_pred = predictions_df.loc[predictions_df["Set"] == "Train", f'pred_{model_name}']
    train_proba = np.vstack(predictions_df.loc[predictions_df["Set"] == "Train", f'proba_{model_name}'])
    test_pred = predictions_df.loc[predictions_df["Set"] == "Test", f'pred_{model_name}']
    test_proba = np.vstack(predictions_df.loc[predictions_df["Set"] == "Test", f'proba_{model_name}'])
    
    train_trade_metrics_dict = calculate_trade_metrics(y_train, train_pred, actual_returns_train, "train")
    test_trade_metrics_dict = calculate_trade_metrics(y_test, test_pred, actual_returns_test, "test")
    trade_metrics_dict = {"Model": model_name, **train_trade_metrics_dict, **test_trade_metrics_dict}
    trade_metrics.append(trade_metrics_dict)
    # Store returns for potential further analysis or visualization
    
    test_traded_mask = (test_pred != 0)
    test_traded_returns = actual_returns_test[test_traded_mask] / 100
    test_traded_pred = test_pred[test_traded_mask]
    test_traded_true = y_test[test_traded_mask]
    test_wrong_predictions = (test_traded_pred != np.sign(test_traded_true))
    test_wrong_returns = test_traded_returns[test_wrong_predictions]
    test_avg_loss_when_wrong = abs(test_wrong_returns[test_wrong_returns < 0].mean())
    
    # Same calculations for train set
    train_traded_mask = (train_pred != 0)
    train_traded_returns = actual_returns_train[train_traded_mask] / 100
    train_traded_pred = train_pred[train_traded_mask]
    train_traded_true = y_train[train_traded_mask]
    train_wrong_predictions = (train_traded_pred != np.sign(train_traded_true))
    train_wrong_returns = train_traded_returns[train_wrong_predictions]
    train_avg_loss_when_wrong = abs(train_wrong_returns[train_wrong_returns < 0].mean())
    
    model_trade_metrics_dict[model_name] = {
        'train_traded_mask': train_traded_mask,
        'train_traded_returns': train_traded_returns,
        'train_traded_pred': train_traded_pred,
        'train_traded_true': train_traded_true,
        "train_returns": train_traded_returns,
        "train_wrong_predictions": train_wrong_predictions,
        "train_wrong_returns": train_wrong_returns,
        'train_avg_loss_when_wrong': train_avg_loss_when_wrong,
        'test_traded_mask': test_traded_mask,
        'test_traded_returns': test_traded_returns,
        'test_traded_pred': test_traded_pred,
        'test_traded_true': test_traded_true,
        "test_returns": test_traded_returns,
        "test_wrong_predictions": test_wrong_predictions,
        "test_wrong_returns": test_wrong_returns,
        'test_avg_loss_when_wrong': test_avg_loss_when_wrong  
    }
    
    #  Dataframe of model trade metrics
    globals()[f'model_trade_metrics_{model_name}'] = pd.DataFrame(model_trade_metrics_dict[model_name])
        
# # ============ SHOW & STORE RESULTS ============
# trade_metrics = pd.DataFrame(trade_metrics)
# print(trade_metrics.to_markdown(index=False))
# trade_metrics.to_csv('data_cache/trade_metrics.csv', index=False)

# HTML Report with QuantStats
# returns_series = pd.Series(returns_decimal.values, index=X_test.index[traded_mask])
# qs.reports.html(returns_series, output='data_cache/model_performance_tearsheet.html', 
#                 title=f'{model_name} Trading Performance')