In [None]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import classification_report
import warnings

warnings.filterwarnings('ignore', category=UserWarning)

print("Loading labeled EURUSD data...")
data_path = '../../data/eurusd_macro_h4_labeled.csv'
df = pd.read_csv(data_path, index_col='time', parse_dates=True)
df.dropna(inplace=True)
print("Data loaded successfully.")

In [None]:
features = [col for col in df.columns if col not in ['open', 'high', 'low', 'close', 'volume', 'target']]
X = df[features]
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=False)
y_train_mapped = y_train.replace({-1: 2})
y_test_mapped = y_test.replace({-1: 2})
print("Data prepared for tuning.")

# Define the advanced grid of parameters to search
param_grid = {
    'n_estimators': [200, 500],
    'learning_rate': [0.01, 0.05],
    'max_depth': [3, 5],
    'gamma': [1, 5],
    'reg_lambda': [5, 10],
    'subsample': [0.7, 0.9],
    'colsample_bytree': [0.7, 0.9]
}

In [None]:
# Initialize the XGBoost Classifier using the 'softprob' objective for best results
model = xgb.XGBClassifier(
    objective='multi:softprob',
    num_class=3,
    use_label_encoder=False,
    eval_metric='mlogloss'
)

# Initialize GridSearchCV
# cv=3 performs 3-fold cross-validation, a robust standard.
# n_jobs=-1 will use all available CPU cores on your machine to speed up the process.
grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    scoring='f1_weighted',
    n_jobs=1,
    cv=3,
    verbose=3 
)

print("Starting Hyperparameter Search for EURUSD... This will take a long time.")
grid_search.fit(X_train, y_train_mapped)
print("Search complete.")

In [None]:
import pandas as pd
from sklearn.metrics import classification_report

print("\n--- Best Parameters Found for EURUSD ---")
print(grid_search.best_params_)

print("\n--- Performance of the Best Model on the Test Set ---")
best_model = grid_search.best_estimator_
y_pred_mapped = best_model.predict(X_test)

if y_pred_mapped.ndim > 1:
    y_pred = y_pred_mapped.argmax(axis=1)
else:
    y_pred = y_pred_mapped
    
y_pred = pd.Series(y_pred).replace({2: -1}).values

print(classification_report(y_test, y_pred, zero_division=0))