In [None]:
# ============ IMPORTS ============
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
import numpy as np
import mlflow
import mlflow.sklearn
from mlflow.models import infer_signature
import quantstats as qs

# =========== CONFIGURATION ============
target_col = "Label_7day"
# ======================================

# ============ LOAD LABELLED DATA ============
predictions_df = pd.read_csv("data_cache/labelled_df.csv", index_col=0, parse_dates=True)

# ============ SPLIT DATA ============
split_idx = int(len(predictions_df) * 0.8)
predictions_df["Set"] = "Train"
predictions_df.loc[predictions_df.index[split_idx:], "Set"] = "Test"

# Define X, y
drop_cols = [target_col, "Close","Barrier_Hit_Day","Near_Peak","Actual_Return_7day","Set"]  # keep only features
feature_cols = [col for col in predictions_df.columns if col not in drop_cols]

X_train = predictions_df.loc[predictions_df["Set"] == "Train", feature_cols]
y_train = predictions_df.loc[predictions_df["Set"] == "Train", target_col]
X_test = predictions_df.loc[predictions_df["Set"] == "Test", feature_cols]
y_test = predictions_df.loc[predictions_df["Set"] == "Test", target_col]

In [None]:
# =========== MLFLOW SETUP ============
experiment  = mlflow.get_experiment_by_name("triple_barrier_classification")
experiment_id = experiment.experiment_id

# =========== LIST PREVIOUS RUNS ============
runs = mlflow.search_runs(experiment_ids=[experiment.experiment_id])
runs = runs.sort_values("start_time", ascending=False)
runs = runs[runs["status"] == "FINISHED"]  # Filter for finished runs

# ========== LOAD LATEST MODELS ==========
model_names = ['log_reg', 'random_forest', 'svm_rbf']
latest_run_df = runs.groupby('tags.mlflow.runName').first()

In [None]:
# =========== PREDICTIONS ============
for model_name in model_names:
    model = mlflow.sklearn.load_model(f"runs:/{latest_run_df.loc[model_name, 'run_id']}/model")
    predictions_df[f'pred_{model_name}'] = model.predict(predictions_df[feature_cols])  # Predict on full dataset
    # Store per-row probability arrays as lists so they can be stacked later
    predictions_df[f'proba_{model_name}'] = list(model.predict_proba(predictions_df[feature_cols]))  # Get probabilities

# predictions_df.to_csv('data_cache/predictions_df.csv', index=True)

In [None]:
# =========== EVALUATION ============
results = []
# for model_name in model_names:
model_name = "random_forest"  # Just evaluate the random forest
y_pred = predictions_df.loc[predictions_df["Set"] == "Test", f'pred_{model_name}']
# Stack per-row probability arrays into a (n_samples, n_classes) array
y_proba = predictions_df.loc[predictions_df["Set"] == "Test", f'proba_{model_name}'].values
y_proba = np.vstack(list(y_proba))

acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average="weighted")
auc = roc_auc_score(y_test, y_proba, average="weighted", multi_class="ovo")

results.append({
    "Model": model_name,
    "Accuracy": acc,
    "F1 Score": f1,
    "ROC AUC": auc
})

In [None]:
    # ============ TRADING METRICS ============

# Get actual returns for train and test set
actual_returns_train = predictions_df.loc[X_train.index, "Actual_Return_7day"]
actual_returns_test = predictions_df.loc[X_test.index, "Actual_Return_7day"]

# Filter to only traded signals (where model predicted -1 or 1)
traded_mask = (y_pred != 0)
traded_returns = actual_returns_test[traded_mask]
traded_pred = y_pred[traded_mask]
traded_true = y_test[traded_mask]

# Convert to decimal returns (QuantStats expects decimals, not %)
returns_decimal = traded_returns / 100

# Calculate trading metrics
print("\n" + "="*60)
print("TRADING PERFORMANCE METRICS (QuantStats)")
print("="*60)
print(f"Win Rate:              {qs.stats.win_rate(returns_decimal):.1%}")
print(f"Profit Factor:         {qs.stats.profit_factor(returns_decimal):.2f}")
print(f"Avg Win:               {qs.stats.avg_win(returns_decimal):.2%}")
print(f"Avg Loss:              {qs.stats.avg_loss(returns_decimal):.2%}")
print(f"Max Drawdown:          {qs.stats.max_drawdown(returns_decimal):.2%}")
print(f"Sharpe Ratio:          {qs.stats.sharpe(returns_decimal):.2f}")

# Custom metric: Avg Loss When Wrong (supervisor requirement)
wrong_predictions_mask = (traded_pred != np.sign(traded_true))
wrong_returns = traded_returns[wrong_predictions_mask]
avg_loss_when_wrong = abs(wrong_returns[wrong_returns < 0].mean())

print(f"\n{'='*60}")
print(f"AVG LOSS WHEN WRONG:   {avg_loss_when_wrong:.2%}")
print(f"{'='*60}")

# Calculate Expected Value manually (QuantStats doesn't have direct EV)
win_rate = qs.stats.win_rate(returns_decimal)
avg_win = qs.stats.avg_win(returns_decimal)
avg_loss = abs(qs.stats.avg_loss(returns_decimal))
expected_value = (win_rate * avg_win) - ((1 - win_rate) * avg_loss)

# Optional: Generate HTML report for dissertation appendix
returns_series = pd.Series(returns_decimal.values, index=X_test.index[traded_mask])
qs.reports.html(returns_series, output='results/model_performance_tearsheet.html', 
                title=f'{name} Trading Performance')
print(f"\nGenerated HTML report: results/model_performance_tearsheet.html")