In [None]:
import pandas as pd
import xgboost as xgb
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

data_path = '../../data/eurusd_h1_labeled.csv'
df = pd.read_csv(data_path, index_col='time', parse_dates=True)

df.dropna(inplace=True)

print("EURUSD labeled data loaded successfully.")
print(f"Data shape: {df.shape}")

In [None]:
features = [col for col in df.columns if col not in ['open', 'high', 'low', 'close', 'volume', 'target']]
X = df[features]
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

print("Data split into training and testing sets.")

class_counts = y_train.value_counts()
weight_for_hold = (class_counts.get(1, 0) + class_counts.get(-1, 0)) / class_counts.get(0, 1)
print(f"Calculated weight for HOLD class: {weight_for_hold:.2f}")

sample_weights = y_train.apply(lambda x: weight_for_hold if x == 0 else 1)

model = xgb.XGBClassifier(
    objective='multi:softprob',
    num_class=3,
    use_label_encoder=False,
    eval_metric='mlogloss',
    colsample_bytree=0.9,
    gamma=5,
    learning_rate=0.05,
    max_depth=5,
    n_estimators=500,
    reg_lambda=5,
    subsample=0.7
)

print("\nModel initialized with the best parameters found.")

y_train_mapped = y_train.replace({-1: 2})
y_test_mapped = y_test.replace({-1: 2})

print("\nTraining the final EURUSD model with sample weights...")
model.fit(X_train, y_train_mapped, sample_weight=sample_weights.values)
print("Model training complete.")

In [None]:
y_pred_output = model.predict(X_test)

if y_pred_output.ndim > 1:
    print("Model outputted probabilities. Finding max...")
    y_pred_mapped = y_pred_output.argmax(axis=1)
else:
    print("Model outputted final predictions directly.")
    y_pred_mapped = y_pred_output
    
y_pred = pd.Series(y_pred_mapped).replace({2: -1}).values

print("\n--- Final EURUSD Model Performance ---")
print(classification_report(y_test, y_pred, zero_division=0))

print("\nConfusion Matrix:")
cm = confusion_matrix(y_test, y_pred, labels=[-1, 0, 1])
sns.heatmap(cm, annot=True, fmt='d', xticklabels=[-1, 0, 1], yticklabels=[-1, 0, 1])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [None]:
model_path = '../../models/eurusd_h1.pkl'
joblib.dump(model, model_path)

print(f"\nSUCCESS: Final EURUSD model saved successfully to: '{model_path}'")