“A supervised machine learning model (XGBoost) was trained to classify trade signals as profitable or non-profitable using engineered market features. Trades were filtered based on predicted confidence to improve strategy performance.”

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier

import joblib


In [2]:
df = pd.read_csv("../data/nifty_with_regime_5min.csv")
trades = pd.read_csv("../data/trades_log.csv")

df['datetime'] = pd.to_datetime(df['datetime'])
trades['entry_time'] = pd.to_datetime(trades['entry_time'])
trades['exit_time'] = pd.to_datetime(trades['exit_time'])

print("Data loaded successfully ✅")


Data loaded successfully ✅


In [3]:
ml_data = pd.merge(
    trades,
    df,
    left_on='entry_time',
    right_on='datetime',
    how='left'
)


In [4]:
# 1 = profitable trade, 0 = loss trade
ml_data['target'] = (ml_data['pnl'] > 0).astype(int)

ml_data[['pnl','target']].head()


Unnamed: 0,pnl,target
0,131.7,1
1,-73.95,0
2,-49.2,0
3,-14.45,0
4,-3.7,0


In [5]:
features = [
    'ema_5','ema_15',
    'avg_iv','iv_spread',
    'pcr_oi','futures_basis',
    'regime','spot_return'
]

X = ml_data[features]
y = ml_data['target']


In [6]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [7]:
tscv = TimeSeriesSplit(n_splits=5)


In [8]:
model = XGBClassifier(
    n_estimators=200,
    max_depth=4,
    learning_rate=0.05,
    random_state=42
)

model.fit(X_scaled, y)

print("XGBoost model trained ✅")


XGBoost model trained ✅


In [9]:
preds = model.predict(X_scaled)
print(classification_report(y, preds))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        49
           1       1.00      1.00      1.00        13

    accuracy                           1.00        62
   macro avg       1.00      1.00      1.00        62
weighted avg       1.00      1.00      1.00        62



In [10]:
import joblib
import os
os.makedirs("../models", exist_ok=True)
joblib.dump(model, "../models/xgboost_model.pkl")
print("Model saved successfully ✅")


Model saved successfully ✅


In [11]:
ml_data['ml_confidence'] = model.predict_proba(X_scaled)[:,1]

ml_data[['ml_confidence','target']].head()


Unnamed: 0,ml_confidence,target
0,0.71254,1
1,0.004136,0
2,0.10811,0
3,0.011022,0
4,0.022742,0


In [12]:
ml_filtered_trades = ml_data[ml_data['ml_confidence'] > 0.5]

ml_filtered_trades.to_csv(
    "../data/trades_with_ml_prediction.csv",
    index=False
)

print("ML filtered trades saved ✅")


ML filtered trades saved ✅
