# Gpu

In [None]:
!nvidia-smi

In [None]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

# Set_Up

In [None]:
strategy   = 'Kalman'
process    = 'Train'
symbol     = 'XAUUSD'
direction  = 'Short'
time_frame = 'M5'

root_data = f'/content/drive/MyDrive/Course Folder/Forex/XAUUSD/'
print(root_data)

rolling_window = 100

# Import_Libraries

In [None]:
!pip install ta-lib
import talib as ta
print(ta.__version__)

In [None]:
# Import libraries for data manipulation
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from datetime import timedelta

# For machine learning models
import xgboost
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split # Import train_test_split

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

from tensorflow.keras.models import load_model

from imblearn.over_sampling import SMOTE
from sklearn.model_selection import GridSearchCV

# Import matplotlib as an alias plt and set the style
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use("seaborn-v0_8-whitegrid")

# Import sys to append the path for custom function file
import sys
sys.path.append("..")

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

import joblib

In [None]:
def confusion_probability_summary(y_true, y_pred, positive_class_probabilities, positive_class=1):
    """
    Calculate summary statistics of predicted probabilities for each outcome in a confusion matrix.
    """
    data = pd.DataFrame({
        'actual': np.asarray(y_true),
        'predicted': np.asarray(y_pred),
        'prob_positive': np.asarray(positive_class_probabilities),
    })

    outcomes = {
        'True Positive': (data['actual'] == positive_class) & (data['predicted'] == positive_class),
        'False Positive': (data['actual'] != positive_class) & (data['predicted'] == positive_class),
        'True Negative': (data['actual'] != positive_class) & (data['predicted'] != positive_class),
        'False Negative': (data['actual'] == positive_class) & (data['predicted'] != positive_class),
    }

    summary_rows = []
    for outcome, mask in outcomes.items():
        probabilities = data.loc[mask, 'prob_positive']
        summary_rows.append({
            'Outcome': outcome,
            'Count': int(probabilities.count()),
            'Mean Probability': probabilities.mean() if not probabilities.empty else np.nan,
            'Std Probability': probabilities.std(ddof=0) if probabilities.count() > 1 else np.nan,
        })

    summary = pd.DataFrame(summary_rows).set_index('Outcome')
    return summary


In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Support Functions

In [None]:
def results(data, pnl_column='PnL'):
    # Calculate the metrics
    time_difference = data.index.max() - data.index.min()
    days = time_difference.days
    total_trades = data[data['Open_Trade'].notna() & (data['Open_Trade'] != 0)].shape[0]
    profit_trades = data[data[pnl_column] > 0].shape[0]
    loss_trades = data[data[pnl_column] < 0].shape[0]
    profits = data[data[pnl_column] > 0][pnl_column].sum()
    losses = data[data[pnl_column] < 0][pnl_column].sum()

    # Create a dictionary with the results
    results_dict = {
        'days': days,
        'total_trades': total_trades,
        '': '',
        'income': profits,
        'losses': losses,
        'profits': profits + losses,
        ' ':' ',
        'profit_trades': profit_trades,
        'loss_trades': loss_trades,
        '  ':'  ',
        '% Win_Trades': profit_trades / (profit_trades + loss_trades) * 100 if (profit_trades + loss_trades) > 0 else 0,
        '% Loss_Trades': loss_trades / (profit_trades + loss_trades)*100 if (profit_trades + loss_trades) > 0 else 0
    }

    # Create a DataFrame from the dictionary and transpose it
    perf_metrics = pd.DataFrame([results_dict]).T

    # Rename the column
    perf_metrics.rename(columns={0: 'Results'}, inplace=True)

    # Format the DataFrame for display
    perf_metrics.loc[['days', 'total_trades', 'profit_trades', 'loss_trades','% Win_Trades','% Loss_Trades'], 'Results'] = perf_metrics.loc[['days', 'total_trades', 'profit_trades', 'loss_trades','% Win_Trades','% Loss_Trades'], 'Results'].apply(lambda x: f"{x:,.0f}" if pd.notna(x) else '')
    perf_metrics.loc[['income', 'losses', 'profits'], 'Results'] = perf_metrics.loc[['income', 'losses', 'profits'], 'Results'].apply(lambda x: f"${x:,.2f}" if pd.notna(x) else '')

    return perf_metrics

In [None]:
def create_features(train_data, index):
    short_periods = [3, 5, 7, 10, 15, 17]
    long_periods = [20, 22, 66, 126, 252]
    periods = short_periods + long_periods

    features = pd.DataFrame(index=index)

    # Indicators that do not depend on the lookback period
    features['OBV'] = ta.OBV(train_data['Close'], train_data['Volume'])
    features['AD'] = ta.AD(train_data['High'], train_data['Low'],
                           train_data['Close'], train_data['Volume'])

    # Pre-compute moving averages to avoid repeated calculations
    sma = {p: ta.SMA(train_data['Close'], timeperiod=p) for p in periods}
    ema = {p: ta.EMA(train_data['Close'], timeperiod=p) for p in periods}

    for period in periods:
        features[f'RSI_{period}'] = ta.RSI(train_data['Close'], timeperiod=period)
        features[f'MFI_{period}'] = ta.MFI(train_data['High'], train_data['Low'],
                                           train_data['Close'], train_data['Volume'],
                                           timeperiod=period)
        features[f'ADX_{period}'] = ta.ADX(train_data['High'], train_data['Low'],
                                           train_data['Close'], timeperiod=period)
        features[f'ROCP_{period}'] = ta.ROCP(train_data['Close'], timeperiod=period)

    for s in short_periods:
        for l in long_periods:
            features[f'SMA_Crossover_{s}_{l}'] = sma[s] - sma[l]
            features[f'EMA_Crossover_{s}_{l}'] = ema[s] - ema[l]

    features.dropna(inplace=True)
    return features


In [None]:
def strategy_returns_dynamic_different_thresholds(prices, threshold):
    rolling = prices['Rolling Returns'].to_numpy()
    yearly_std = prices['Yearly Stdev'].to_numpy()
    pct_change = prices['Close'].pct_change().to_numpy()
    signals = np.zeros(len(prices), dtype=np.int8)

    curr_pos = 0
    hold_days = 0
    for i in range(len(prices)):
        if curr_pos == 0 or hold_days == 20:
            thresh_val = threshold * yearly_std[i]
            if rolling[i] >= thresh_val:
                curr_pos = 1
                hold_days = 0
            elif rolling[i] < -thresh_val:
                curr_pos = -1
                hold_days = 0
            else:
                curr_pos = 0
                hold_days = 0
        else:
            hold_days += 1
        signals[i] = curr_pos

    prices[f'Signal_{threshold}'] = signals
    strategy_returns = pct_change * np.roll(signals, 1)
    prices[f'Strategy Returns_{threshold}'] = strategy_returns
    return np.cumprod(strategy_returns + 1)


def strategy_returns_different_thresholds(prices, threshold):
    rolling = prices['Rolling Returns'].to_numpy()
    pct_change = prices['Close'].pct_change().to_numpy()
    signals = np.zeros(len(prices), dtype=np.int8)

    curr_pos = 0
    hold_days = 0
    for i in range(len(prices)):
        if curr_pos == 0 or hold_days == 20:
            if rolling[i] >= threshold:
                curr_pos = 1
                hold_days = 0
            elif rolling[i] < threshold:
                curr_pos = -1
                hold_days = 0
        else:
            hold_days += 1
        signals[i] = curr_pos

    prices[f'Signal_{threshold}'] = signals
    strategy_returns = pct_change * np.roll(signals, 1)
    prices[f'Strategy Returns_{threshold}'] = strategy_returns
    return np.cumprod(strategy_returns + 1)


# Data

In [None]:
data_type = 'Scale'

In [43]:
### Open OHLC dataframe

ohlc = pd.read_csv(root_data + 'Data/'+symbol+'_M5.csv', index_col=0)
ohlc.index = pd.to_datetime(ohlc.index)
time_difference = ohlc.index.max() - ohlc.index.min()
number_of_days = time_difference.days

print(f"The train_data DataFrame covers a period of {number_of_days} days.")

The train_data DataFrame covers a period of 2396 days.


In [None]:
### Features

features_5m = pd.read_csv(root_data + 'Results/'+symbol+'_'+direction+'_M5M10_'+data_type+'_Features.csv', index_col=0)
features_5m['Date'] = features_5m.index
features_5m['Date'] = pd.to_datetime(features_5m['Date'])
features_5m.set_index("Date", inplace=True)

print(list(features_5m.columns),'\n')
print('Shape = ',features_5m.shape)

features_5m.tail(3)

In [44]:
### Labels

lab = pd.read_csv(root_data + 'Results/'+symbol+'_'+strategy+'_'+time_frame+'_Strategy_Gen_Labels.csv', index_col=0)
lab['Date'] = pd.to_datetime(lab['Date'])
lab.set_index('Date', inplace=True)

columns_to_drop = ['st_row_PnL_Low','Close_Trade']
lab = lab.drop(columns=columns_to_drop)

print(list(lab.columns),'\n')
print('Shape : ',lab.shape,'\n')

nan_counts = lab.isnull().sum()
total_missing_counts = nan_counts

print("Missing values (NaN and Inf) in lab sorted by highest to lowest:")
print(total_missing_counts.sort_values(ascending=False))
print("Total missing value count in lab:", total_missing_counts.sum())


['Open', 'High', 'Low', 'Close', 'Volume', 'Spread', 'ATR', 'kal_1', 'kal_2', 'kal_3', 'kal_4', 'Open_Trade', 'Entry_Date', 'Type', 'Trade_Number', 'st_Exit_Date', 'trade type', 'st_Duration', 'st_row_PnL_close', 'st_row_PnL_high', 'st_row_PnL_low', 'st_Max', 'st_Min', 'st_PnL', 'st_atr_PnL', 'st_atr_max_PnL', 'atr_mult_close', 'atr_mult_high', 'atr_mult_low', 'atr_dyn', 'atr_PnL', 'atr_Exit_Date', 'atr_Duration', 'atr_PnL_dollar', 'atr_H_dyn', 'atr_H_PnL', 'atr_H_Exit_Date', 'atr_H_Duration', 'atr_H_PnL_dollar'] 

Shape :  (20000, 39) 

Missing values (NaN and Inf) in lab sorted by highest to lowest:
atr_PnL_dollar      18148
atr_Duration        18148
atr_Exit_Date       18148
atr_PnL             18148
st_Duration         17193
st_Max              17193
st_Min              17193
atr_H_Duration      17193
atr_H_Exit_Date     17193
st_PnL              17193
atr_H_PnL           17193
atr_H_PnL_dollar    17193
st_Exit_Date        17193
Open_Trade          17192
Entry_Date          17192
T

In [45]:
### Merge

feat_obj = features_5m.copy()

print(list(feat_obj.columns),'\n')
print('Shape = ',feat_obj.shape,'\n')
print('Label_Counts = ', feat_obj['label'].value_counts(),'\n')

feat_obj.dropna(inplace=True)

nan_counts = feat_obj.isnull().sum()

print("Missing values (NaN and Inf) in feat_obj sorted by highest to lowest AFTER DROPPING COLUMNS:")

nan_counts_after = feat_obj.isnull().sum()
inf_counts_after = np.isinf(feat_obj.select_dtypes(include=np.number)).sum()
total_missing_counts_after = nan_counts_after + inf_counts_after
print(total_missing_counts_after.sort_values(ascending=False))




['label', 'Open_Trade', '10min_RSI_3_diff', '10min_RSI_3', '10min_Close_Kal_300', 'Close_Kal_300', '10min_slope_div_300_3_diff', 'slope_angle_300_6', 'MFI_7_diff', '10min_slope_lin_reg_signal_300_6 - slope_lin_reg_signal_900_3', 'slope_div_300_6 - slope_div_300_9', '10min_slope_signal_600_6', 'slope_lin_reg_signal_300_3', 'slope_lin_reg_600_9_diff', 'slope_angle_600_9_diff', 'slope_signal_300_6_diff', '10min_slope_signal_900_9_diff', 'Kal_600_minus_Kal_900', '10min_slope_lin_reg_signal_300_9 - slope_lin_reg_signal_900_9', '10min_slope_angle_600_3_diff', '10min_slope_lin_reg_signal_600_6', '10min_MFI_7_diff', '10min_RSI_7', '10min_slope_lin_reg_signal_900_3_diff', '10min_slope_lin_reg_signal_300_9', '10min_slope_lin_reg_signal_300_6 - slope_lin_reg_signal_900_9', 'MFI_3_diff', '10min_slope_lin_reg_signal_600_6 - slope_lin_reg_signal_900_3', '10min_slope_angle_300_3 - slope_angle_900_6', 'slope_signal_600_6 - slope_signal_900_6', '10min_slope_angle_900_9_diff', '10min_slope_angle_300_9_d

In [46]:
# Split the data into 70% train and 30% test based on index
train_size = int(0.7 * len(feat_obj))
train = feat_obj.iloc[:train_size]
test  = feat_obj.iloc[train_size:]

print("Shape of train_data:", train.shape)
print("Shape of test_data:", test.shape)

Shape of train_data: (1965, 236)
Shape of test_data: (843, 236)


In [47]:
### Define Train dataframe

print('Train_Min_Date', train.index.min())
print('Train_Max_Date', train.index.max(),'\n')

print('Test_Min_Date', test.index.min())
print('Test_Max_Date', test.index.max(),'\n')

print('Train_Columns : ',list(train.columns), '\n')
print('Test_Columns : ',list(test.columns))

Train_Min_Date 2025-04-15 04:25:00
Train_Max_Date 2025-06-25 18:25:00 

Test_Min_Date 2025-06-25 18:40:00
Test_Max_Date 2025-07-25 23:45:00 

Train_Columns :  ['label', 'Open_Trade', '10min_RSI_3_diff', '10min_RSI_3', '10min_Close_Kal_300', 'Close_Kal_300', '10min_slope_div_300_3_diff', 'slope_angle_300_6', 'MFI_7_diff', '10min_slope_lin_reg_signal_300_6 - slope_lin_reg_signal_900_3', 'slope_div_300_6 - slope_div_300_9', '10min_slope_signal_600_6', 'slope_lin_reg_signal_300_3', 'slope_lin_reg_600_9_diff', 'slope_angle_600_9_diff', 'slope_signal_300_6_diff', '10min_slope_signal_900_9_diff', 'Kal_600_minus_Kal_900', '10min_slope_lin_reg_signal_300_9 - slope_lin_reg_signal_900_9', '10min_slope_angle_600_3_diff', '10min_slope_lin_reg_signal_600_6', '10min_MFI_7_diff', '10min_RSI_7', '10min_slope_lin_reg_signal_900_3_diff', '10min_slope_lin_reg_signal_300_9', '10min_slope_lin_reg_signal_300_6 - slope_lin_reg_signal_900_9', 'MFI_3_diff', '10min_slope_lin_reg_signal_600_6 - slope_lin_reg_sign

# Results


In [48]:
results(train, pnl_column='st_PnL')

KeyError: 'st_PnL'

In [None]:
results(test, pnl_column= 'st_PnL')


# ML


## Train

In [49]:
### When using train_test_split it applies the scaler to X_train only
start_feature = train.columns.get_loc('10min_RSI_3_diff')
train_features = [col for col in train.columns[start_feature:] if col != 'label']

X = train.loc[:, train_features]
y = train['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

print("Shape of X_train:", X_train.shape)
print("Shape of X_test:",  X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_test:",  y_test.shape)

Shape of X_train: (1375, 234)
Shape of X_test: (590, 234)
Shape of y_train: (1375,)
Shape of y_test: (590,)


In [50]:
print(y_train.value_counts())
print(y_test.value_counts())

label
1    719
0    656
Name: count, dtype: int64
label
1    343
0    247
Name: count, dtype: int64


In [51]:
### Rebalance Data frame due to the low number of class 1
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Now train your model on the resampled data
print(X_train_resampled.value_counts(),'\n')
print(y_train_resampled.value_counts())

10min_RSI_3_diff  10min_RSI_3  10min_Close_Kal_300  Close_Kal_300  10min_slope_div_300_3_diff  slope_angle_300_6  MFI_7_diff  10min_slope_lin_reg_signal_300_6 - slope_lin_reg_signal_900_3  slope_div_300_6 - slope_div_300_9  10min_slope_signal_600_6  slope_lin_reg_signal_300_3  slope_lin_reg_600_9_diff  slope_angle_600_9_diff  slope_signal_300_6_diff  10min_slope_signal_900_9_diff  Kal_600_minus_Kal_900  10min_slope_lin_reg_signal_300_9 - slope_lin_reg_signal_900_9  10min_slope_angle_600_3_diff  10min_slope_lin_reg_signal_600_6  10min_MFI_7_diff  10min_RSI_7  10min_slope_lin_reg_signal_900_3_diff  10min_slope_lin_reg_signal_300_9  10min_slope_lin_reg_signal_300_6 - slope_lin_reg_signal_900_9  MFI_3_diff  10min_slope_lin_reg_signal_600_6 - slope_lin_reg_signal_900_3  10min_slope_angle_300_3 - slope_angle_900_6  slope_signal_600_6 - slope_signal_900_6  10min_slope_angle_900_9_diff  10min_slope_angle_300_9_diff  slope_signal_600_3 - slope_signal_900_3  slope_signal_900_6  slope_signal_600_

In [52]:
# --------------- Classifier 1: XGBoost ------------------------------------------
xgb = XGBClassifier(n_estimators=15, max_depth=3,
                    random_state=42, eval_metric='logloss')

# --------------- Classifier 2: Logistic Regression Classifier------------------------------------------
lr = LogisticRegression(random_state=42)

# --------------- Classifier 3: AdaBoost Classifier ------------------------------------------
ada = AdaBoostClassifier(n_estimators=15, random_state=42)

# --------------- Classifier 4: SVM------------------------------------------
svc = svm.SVC(kernel='rbf', probability=True, random_state=42)

# Define a list to store the different models
estimator = []
estimator.append(('LR', lr))
estimator.append(('XGB', xgb))
estimator.append(('ada', ada))
estimator.append(('SVC', svc))

In [53]:
# --------------- Classifier 1: XGBoost ------------------------------------------
xgb = XGBClassifier(n_estimators=15, max_depth=3,
                    random_state=42, eval_metric='logloss')

# --------------- Classifier 2: Logistic Regression Classifier------------------------------------------
lr = LogisticRegression(random_state=42)

# --------------- Classifier 3: AdaBoost Classifier ------------------------------------------
ada = AdaBoostClassifier(n_estimators=15, random_state=42)

# --------------- Classifier 4: SVM------------------------------------------
svc = svm.SVC(kernel='rbf', probability=True, random_state=42)

# Define a list to store the different models
estimator = []
estimator.append(('LR', lr))
estimator.append(('XGB', xgb))
estimator.append(('ada', ada))
estimator.append(('SVC', svc))

In [54]:
### Implement voting classifier with hard voting

ml_model = VotingClassifier(estimators=estimator, voting='soft')
ml_model.fit(X_train_resampled, y_train_resampled)

In [55]:
y_true = y_test
y_pred = ml_model.predict(X_test)
y_proba_default = ml_model.predict_proba(X_test)[:, 1]

conf_matrix = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

class_report = classification_report(y_true, y_pred)
print("\nClassification Report:")
print(class_report)

probability_summary = confusion_probability_summary(y_true, y_pred, y_proba_default)
print("\nProbability Summary by Confusion Outcome:")
print(probability_summary)


Confusion Matrix:
[[132 115]
 [138 205]]

Classification Report:
              precision    recall  f1-score   support

           0       0.49      0.53      0.51       247
           1       0.64      0.60      0.62       343

    accuracy                           0.57       590
   macro avg       0.56      0.57      0.56       590
weighted avg       0.58      0.57      0.57       590


Probability Summary by Confusion Outcome:
                Count  Mean Probability  Std Probability
Outcome                                                 
True Positive     205          0.577241         0.056312
False Positive    115          0.561796         0.047623
True Negative     132          0.445514         0.035898
False Negative    138          0.446376         0.041590


In [56]:
# Evaluate ML model using a 0.7 probability threshold for class 1
y_proba = ml_model.predict_proba(X_test)[:, 1]
y_pred_threshold = (y_proba >= 0.7).astype(int)

conf_matrix_threshold = confusion_matrix(y_true, y_pred_threshold)
print("Confusion Matrix (Threshold 0.7):")
print(conf_matrix_threshold)

class_report_threshold = classification_report(y_true, y_pred_threshold)
print("\nClassification Report (Threshold 0.7):")
print(class_report_threshold)

probability_summary_threshold = confusion_probability_summary(y_true, y_pred_threshold, y_proba)
print("\nProbability Summary by Confusion Outcome (Threshold 0.7):")
print(probability_summary_threshold)


Confusion Matrix (Threshold 0.7):
[[245   2]
 [335   8]]

Classification Report (Threshold 0.7):
              precision    recall  f1-score   support

           0       0.42      0.99      0.59       247
           1       0.80      0.02      0.05       343

    accuracy                           0.43       590
   macro avg       0.61      0.51      0.32       590
weighted avg       0.64      0.43      0.27       590


Probability Summary by Confusion Outcome (Threshold 0.7):
                Count  Mean Probability  Std Probability
Outcome                                                 
True Positive       8          0.726667         0.018372
False Positive      2          0.734434         0.000478
True Negative     245          0.497737         0.068535
False Negative    335          0.519764         0.076570


In [57]:
### Save ML Model
model_path = root_data + 'Models/'+symbol+'_'+direction+'_ml_model.joblib'
joblib.dump(ml_model, model_path)
print(f"Model saved successfully at: {model_path}")

Model saved successfully at: /content/drive/MyDrive/Course Folder/Forex/XAUUSD/Models/XAUUSD_Short_ml_model.joblib


## Meta

In [58]:
### Import ML Model
model_path = root_data+'Models/'+symbol+'_'+direction+'_ml_model.joblib'
ml_model = joblib.load(model_path)
print(f"Model loaded successfully from: {model_path}")

Model loaded successfully from: /content/drive/MyDrive/Course Folder/Forex/XAUUSD/Models/XAUUSD_Short_ml_model.joblib


In [59]:
#train.head(5)

In [60]:
### Import Data
start_feature = train.columns.get_loc('10min_RSI_3_diff')
train_features = [col for col in train.columns[start_feature:] if col != 'label']

# Select the feature columns from the 'train' DataFrame
X_train_features = train[train_features]

train['label_ml'] = ml_model.predict(X_train_features)

prediction_probabilities = ml_model.predict_proba(X_train_features)
train['prob_0'] = prediction_probabilities[:, 0]
train['prob_1'] = prediction_probabilities[:, 1]

# Additional outputs of the initial model to feed into the meta model.

#meta_manual_features = ['label_ml', 'prob_1', 'prob_0']
meta_manual_features = ['prob_1']
meta_features = train_features + meta_manual_features
X_meta_features = train[meta_features]

#train.head()

In [61]:
train.to_csv(root_data + 'Results/'+symbol+'Meta_Prob_M5+M10_train_l.csv')

In [62]:
# Adjust the manual meta features in 'meta_manual_features' if different inputs are required.
meta = meta_features.copy()
X_meta_features = train[meta]

X = train[meta]
y = train['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: (1375, 235)
Shape of X_test: (590, 235)
Shape of y_train: (1375,)
Shape of y_test: (590,)


In [63]:
print(y_train.value_counts())
print(y_test.value_counts())

label
1    719
0    656
Name: count, dtype: int64
label
1    343
0    247
Name: count, dtype: int64


In [64]:
### Rebalance Data frame due to the low number of class 1

smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [65]:
# --------------- Classifier 1: XGBoost ------------------------------------------
xgb = XGBClassifier(n_estimators=15, max_depth=3,
                    random_state=42, eval_metric='logloss')

# --------------- Classifier 2: Logistic Regression Classifier------------------------------------------
lr = LogisticRegression(random_state=42)

# --------------- Classifier 3: AdaBoost Classifier ------------------------------------------
ada = AdaBoostClassifier(n_estimators=15, random_state=42)

# --------------- Classifier 4: SVM------------------------------------------
svc = svm.SVC(kernel='rbf', probability=True, random_state=42)

# Define a list to store the different models
estimator = []
estimator.append(('LR', lr))
estimator.append(('XGB', xgb))
estimator.append(('ada', ada))
estimator.append(('SVC', svc))

In [66]:
### Implement voting classifier with hard voting

meta_ml_model = VotingClassifier(estimators=estimator, voting='soft')
meta_ml_model.fit(X_train_resampled, y_train_resampled)

In [67]:
y_true = y_test
y_pred = meta_ml_model.predict(X_test)
meta_y_proba_default = meta_ml_model.predict_proba(X_test)[:, 1]

conf_matrix = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

class_report = classification_report(y_true, y_pred)
print("\nClassification Report:")
print(class_report)

probability_summary = confusion_probability_summary(y_true, y_pred, meta_y_proba_default)
print("\nProbability Summary by Confusion Outcome:")
print(probability_summary)


Confusion Matrix:
[[138 109]
 [138 205]]

Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.56      0.53       247
           1       0.65      0.60      0.62       343

    accuracy                           0.58       590
   macro avg       0.58      0.58      0.58       590
weighted avg       0.59      0.58      0.58       590


Probability Summary by Confusion Outcome:
                Count  Mean Probability  Std Probability
Outcome                                                 
True Positive     205          0.669182         0.092511
False Positive    109          0.652047         0.080804
True Negative     138          0.360181         0.084130
False Negative    138          0.359298         0.088929


In [68]:
# Evaluate Meta ML model using a 0.7 probability threshold for class 1
meta_y_proba = meta_ml_model.predict_proba(X_test)[:, 1]
meta_y_pred_threshold = (meta_y_proba >= 0.7).astype(int)

meta_conf_matrix_threshold = confusion_matrix(y_true, meta_y_pred_threshold)
print("Confusion Matrix (Threshold 0.7):")
print(meta_conf_matrix_threshold)

meta_class_report_threshold = classification_report(y_true, meta_y_pred_threshold)
print("\nClassification Report (Threshold 0.7):")
print(meta_class_report_threshold)

meta_probability_summary_threshold = confusion_probability_summary(y_true, meta_y_pred_threshold, meta_y_proba)
print("\nProbability Summary by Confusion Outcome (Threshold 0.7):")
print(meta_probability_summary_threshold)


Confusion Matrix (Threshold 0.7):
[[216  31]
 [263  80]]

Classification Report (Threshold 0.7):
              precision    recall  f1-score   support

           0       0.45      0.87      0.60       247
           1       0.72      0.23      0.35       343

    accuracy                           0.50       590
   macro avg       0.59      0.55      0.47       590
weighted avg       0.61      0.50      0.45       590


Probability Summary by Confusion Outcome (Threshold 0.7):
                Count  Mean Probability  Std Probability
Outcome                                                 
True Positive      80          0.767738         0.037558
False Positive     31          0.752038         0.036170
True Negative     216          0.451226         0.142441
False Negative    263          0.476602         0.144004


In [69]:
train['meta_results'] = meta_ml_model.predict(X_meta_features)

In [70]:
train.to_csv(root_data + 'Results/'+symbol+'train_l_Signals & Meta_Signals.csv')

In [71]:
### Save ML Model
model_path = root_data + 'Models/'+symbol+'_'+direction+'_Meta_ml_model.joblib'
joblib.dump(meta_ml_model, model_path)
print(f"Model saved successfully at: {model_path}")

Model saved successfully at: /content/drive/MyDrive/Course Folder/Forex/XAUUSD/Models/XAUUSD_Short_Meta_ml_model.joblib


## PnL Train

In [72]:
train['ml_results'] = np.where(train['label_ml']==1, train['st_PnL'],0)
results(train, pnl_column='ml_results')

KeyError: 'st_PnL'

In [73]:
train['meta_ml_results'] = np.where(train['meta_results']==1, train['st_PnL'],0)
results(train, pnl_column='meta_ml_results')

KeyError: 'st_PnL'


# Test


## Results_ML

In [74]:
### Import ML Model

model_path = root_data+'Models/'+symbol+'_'+direction+'_ml_model.joblib'
ml_model = joblib.load(model_path)
print(f"Model loaded successfully from: {model_path}")

Model loaded successfully from: /content/drive/MyDrive/Course Folder/Forex/XAUUSD/Models/XAUUSD_Short_ml_model.joblib


In [75]:
### Import Meta Model

model_path = root_data+'Models/'+symbol+'_'+direction+'_Meta_ml_model.joblib'
meta_ml_model = joblib.load(model_path)
print(f"Model loaded successfully from: {model_path}")

Model loaded successfully from: /content/drive/MyDrive/Course Folder/Forex/XAUUSD/Models/XAUUSD_Short_Meta_ml_model.joblib


In [78]:
# Select the feature columns from the 'test' DataFrame
X_test_features = test[train_features]

test['label_ml'] = ml_model.predict(X_test_features)

prediction_probabilities_test = ml_model.predict_proba(X_test_features)
test['prob_0'] = prediction_probabilities_test[:, 0]
test['prob_1'] = prediction_probabilities_test[:, 1]

# Calculate and display Confusion Matrix
conf_matrix = confusion_matrix(test['label'], test['label_ml'])
print("Confusion Matrix:")
print(conf_matrix)

# Calculate and display Classification Report
class_report = classification_report(test['label'], test['label_ml'])
print("\nClassification Report:")
print(class_report)

probability_summary_test = confusion_probability_summary(test['label'], test['label_ml'], test['prob_1'])
print("\nProbability Summary by Confusion Outcome:")
print(probability_summary_test)

Confusion Matrix:
[[188 197]
 [183 275]]

Classification Report:
              precision    recall  f1-score   support

           0       0.51      0.49      0.50       385
           1       0.58      0.60      0.59       458

    accuracy                           0.55       843
   macro avg       0.54      0.54      0.54       843
weighted avg       0.55      0.55      0.55       843


Probability Summary by Confusion Outcome:
                Count  Mean Probability  Std Probability
Outcome                                                 
True Positive     275          0.574630         0.054289
False Positive    197          0.558941         0.041950
True Negative     188          0.444203         0.040618
False Negative    183          0.443418         0.038438


In [80]:
# Calculate and display Confusion Matrix
conf_matrix = confusion_matrix(test['label'], test['label_ml'])
print("Confusion Matrix:")
print(conf_matrix)

# Calculate and display Classification Report
class_report = classification_report(test['label'], test['label_ml'])
print("\nClassification Report:")
print(class_report)

probability_summary_test = confusion_probability_summary(test['label'], test['label_ml'], test['prob_1'])
print("\nProbability Summary by Confusion Outcome:")
print(probability_summary_test)


Confusion Matrix:
[[188 197]
 [183 275]]

Classification Report:
              precision    recall  f1-score   support

           0       0.51      0.49      0.50       385
           1       0.58      0.60      0.59       458

    accuracy                           0.55       843
   macro avg       0.54      0.54      0.54       843
weighted avg       0.55      0.55      0.55       843


Probability Summary by Confusion Outcome:
                Count  Mean Probability  Std Probability
Outcome                                                 
True Positive     275          0.574630         0.054289
False Positive    197          0.558941         0.041950
True Negative     188          0.444203         0.040618
False Negative    183          0.443418         0.038438


In [81]:
# Evaluate ML model on the test set using a 0.7 probability threshold for class 1
test['label_ml_prob_70'] = np.where(test['prob_1'] >= 0.7, 1, 0)

conf_matrix_prob_70 = confusion_matrix(test['label'], test['label_ml_prob_70'])
print("Confusion Matrix (Threshold 0.7):")
print(conf_matrix_prob_70)

class_report_prob_70 = classification_report(test['label'], test['label_ml_prob_70'])
print("\nClassification Report (Threshold 0.7):")
print(class_report_prob_70)

probability_summary_prob_70 = confusion_probability_summary(test['label'], test['label_ml_prob_70'], test['prob_1'])
print("\nProbability Summary by Confusion Outcome (Threshold 0.7):")
print(probability_summary_prob_70)


Confusion Matrix (Threshold 0.7):
[[385   0]
 [452   6]]

Classification Report (Threshold 0.7):
              precision    recall  f1-score   support

           0       0.46      1.00      0.63       385
           1       1.00      0.01      0.03       458

    accuracy                           0.46       843
   macro avg       0.73      0.51      0.33       843
weighted avg       0.75      0.46      0.30       843


Probability Summary by Confusion Outcome (Threshold 0.7):
                Count  Mean Probability  Std Probability
Outcome                                                 
True Positive       6          0.732589         0.025616
False Positive      0               NaN              NaN
True Negative     385          0.502913         0.070679
False Negative    452          0.519410         0.077282


In [83]:
# Select the feature columns for the meta model from the 'test' DataFrame
X_meta_features_test = test[meta]

# Predict using the meta model on the test set
test['meta_label'] = meta_ml_model.predict(X_meta_features_test)
meta_prediction_probabilities_test = meta_ml_model.predict_proba(X_meta_features_test)
test['meta_prob_0'] = meta_prediction_probabilities_test[:, 0]
test['meta_prob_1'] = meta_prediction_probabilities_test[:, 1]


# Calculate and display Confusion Matrix
conf_matrix = confusion_matrix(test['label'], test['meta_label'])
print("Confusion Matrix:")
print(conf_matrix)

# Calculate and display Classification Report
class_report = classification_report(test['label'], test['meta_label'])
print("\nClassification Report:")
print(class_report)

meta_probability_summary_test = confusion_probability_summary(test['label'], test['meta_label'], test['meta_prob_1'])
print("\nProbability Summary by Confusion Outcome:")
print(meta_probability_summary_test)

Confusion Matrix:
[[199 186]
 [189 269]]

Classification Report:
              precision    recall  f1-score   support

           0       0.51      0.52      0.51       385
           1       0.59      0.59      0.59       458

    accuracy                           0.56       843
   macro avg       0.55      0.55      0.55       843
weighted avg       0.56      0.56      0.56       843


Probability Summary by Confusion Outcome:
                Count  Mean Probability  Std Probability
Outcome                                                 
True Positive     269          0.670808         0.092929
False Positive    186          0.644196         0.082007
True Negative     199          0.358360         0.090912
False Negative    189          0.352198         0.086966


In [84]:
# Calculate and display Confusion Matrix
conf_matrix = confusion_matrix(test['label'], test['meta_label'])
print("Confusion Matrix:")
print(conf_matrix)

# Calculate and display Classification Report
class_report = classification_report(test['label'], test['meta_label'])
print("\nClassification Report:")
print(class_report)

meta_probability_summary_test = confusion_probability_summary(test['label'], test['meta_label'], test['meta_prob_1'])
print("\nProbability Summary by Confusion Outcome:")
print(meta_probability_summary_test)


Confusion Matrix:
[[199 186]
 [189 269]]

Classification Report:
              precision    recall  f1-score   support

           0       0.51      0.52      0.51       385
           1       0.59      0.59      0.59       458

    accuracy                           0.56       843
   macro avg       0.55      0.55      0.55       843
weighted avg       0.56      0.56      0.56       843


Probability Summary by Confusion Outcome:
                Count  Mean Probability  Std Probability
Outcome                                                 
True Positive     269          0.670808         0.092929
False Positive    186          0.644196         0.082007
True Negative     199          0.358360         0.090912
False Negative    189          0.352198         0.086966


In [85]:
# Evaluate Meta ML model on the test set using a 0.7 probability threshold for class 1
test['meta_label_prob_70'] = np.where(test['meta_prob_1'] >= 0.7, 1, 0)

meta_conf_matrix_prob_70 = confusion_matrix(test['label'], test['meta_label_prob_70'])
print("Confusion Matrix (Threshold 0.7):")
print(meta_conf_matrix_prob_70)

meta_class_report_prob_70 = classification_report(test['label'], test['meta_label_prob_70'])
print("\nClassification Report (Threshold 0.7):")
print(meta_class_report_prob_70)

meta_probability_summary_prob_70 = confusion_probability_summary(test['label'], test['meta_label_prob_70'], test['meta_prob_1'])
print("\nProbability Summary by Confusion Outcome (Threshold 0.7):")
print(meta_probability_summary_prob_70)


Confusion Matrix (Threshold 0.7):
[[331  54]
 [352 106]]

Classification Report (Threshold 0.7):
              precision    recall  f1-score   support

           0       0.48      0.86      0.62       385
           1       0.66      0.23      0.34       458

    accuracy                           0.52       843
   macro avg       0.57      0.55      0.48       843
weighted avg       0.58      0.52      0.47       843


Probability Summary by Confusion Outcome (Threshold 0.7):
                Count  Mean Probability  Std Probability
Outcome                                                 
True Positive     106          0.768361         0.037207
False Positive     54          0.748963         0.033761
True Negative     331          0.455257         0.142104
False Negative    352          0.470359         0.147321


## Results_PnL

In [86]:
# Calculate and display Confusion Matrix
conf_matrix = confusion_matrix(test['label'], test['label_ml'])
print("Confusion Matrix:")
print(conf_matrix)

# Calculate and display Classification Report
class_report = classification_report(test['label'], test['label_ml'])
print("\nClassification Report:")
print(class_report)

probability_summary_test = confusion_probability_summary(test['label'], test['label_ml'], test['prob_1'])
print("\nProbability Summary by Confusion Outcome:")
print(probability_summary_test)


Confusion Matrix:
[[188 197]
 [183 275]]

Classification Report:
              precision    recall  f1-score   support

           0       0.51      0.49      0.50       385
           1       0.58      0.60      0.59       458

    accuracy                           0.55       843
   macro avg       0.54      0.54      0.54       843
weighted avg       0.55      0.55      0.55       843


Probability Summary by Confusion Outcome:
                Count  Mean Probability  Std Probability
Outcome                                                 
True Positive     275          0.574630         0.054289
False Positive    197          0.558941         0.041950
True Negative     188          0.444203         0.040618
False Negative    183          0.443418         0.038438


In [87]:
# Calculate and display Confusion Matrix
conf_matrix = confusion_matrix(test['label'], test['meta_label'])
print("Confusion Matrix:")
print(conf_matrix)

# Calculate and display Classification Report
class_report = classification_report(test['label'], test['meta_label'])
print("\nClassification Report:")
print(class_report)

meta_probability_summary_test = confusion_probability_summary(test['label'], test['meta_label'], test['meta_prob_1'])
print("\nProbability Summary by Confusion Outcome:")
print(meta_probability_summary_test)


Confusion Matrix:
[[199 186]
 [189 269]]

Classification Report:
              precision    recall  f1-score   support

           0       0.51      0.52      0.51       385
           1       0.59      0.59      0.59       458

    accuracy                           0.56       843
   macro avg       0.55      0.55      0.55       843
weighted avg       0.56      0.56      0.56       843


Probability Summary by Confusion Outcome:
                Count  Mean Probability  Std Probability
Outcome                                                 
True Positive     269          0.670808         0.092929
False Positive    186          0.644196         0.082007
True Negative     199          0.358360         0.090912
False Negative    189          0.352198         0.086966


# Pendientes
