In [14]:
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
from src.managers.cross_validation import TimeSeriesCrossValidator
import wandb
print(wandb.__version__)

# Load the dataset
df = pd.read_csv('src/managers/data/processed_data.csv', index_col="Date", parse_dates=True)

0.18.1


## Applying XGBoost to Fixed Income Signals Prediction

#### Overview

**Task**: Predict the direction in 1, 3, and 6 month intervals for key asset classes across the Fixed Income space using XGBoost. These asset classes are:
- *US Treasuries* (1, 2, 5, 7, 10, 20, and 30 year durations)
- *US High Yield Bonds*
- *US Investment Grade Bonds*

**Method**: Utilize rolling windows 

In [11]:
# Create a dict of the hyperparameters and possible values to search over.
# TO DO: Search over once we figure out wandb.
# hyperparameters = {
#     "n_estimators": [100, 200, 300],
#     "max_depth": [3, 4, 5],
#     "learning_rate": [0.1, 0.01, 0.001],
#     "subsample": [0.5, 0.7, 1],
#     "colsample_bytree": [0.5, 0.7, 1],
#     "gamma": [0, 0.5, 1, 1.5, 2, 5]
# }
config = {
    "objective": "binary:logistic",
    "n_estimator": 100,
    "max_depth": 5,
    "learning_rate": 0.1,
    "subsample": 0.7,
    "colsample_bytree": 0.7,
    "gamma": 0
}

In [15]:
wandb.login()

WandbCoreNotAvailableError: Looks like wandb-core is not compiled for your system (Darwin-23.5.0-x86_64-i386-64bit): Please contact support at support@wandb.com to request `wandb-core` support for your system.

In [16]:
# Perform hyperparameter tuning using the TimeSeriesCrossValidator w/ the following params.
TRAIN_SIZE = 12
STEP_FORWARD_SIZE = 1
TEST_SIZE = 12

# Initialize the TimeSeriesCrossValidator and create the rolling window.
cv = TimeSeriesCrossValidator(df)
rolling_window = cv.perform_cross_validation(method='rolling', initial_train_size=10, step_size=10, test_size=5)

# Use the XGB model and train/test using the rolling splits.
xgb_clf = XGBClassifier(objective='binary:logistic', eval_metric='logloss')
target_col = "BAMLHYH0A0HYM2TRIV_1_mo_return_direction"
# Find the column index of the target column
target_col_index = df.columns.get_loc(target_col)

# Loop through the rolling window and train - test the model. Record the results to wandb.
for i, (train, test) in enumerate(rolling_window):
    # Initialize the wandb run.
    wandb.init(project="principal-capstone-v0", config=config)
    # Add params to WANDB
    wandb.config.TRAIN_SIZE = TRAIN_SIZE
    wandb.config.STEP_FORWARD_SIZE = STEP_FORWARD_SIZE
    wandb.config.TEST_SIZE = TEST_SIZE
    # Fit on the training data.
    xgb_clf.fit(train.iloc[:, :target_col_index], train.iloc[:, target_col_index])
    # Predict on the test data.
    preds = xgb_clf.predict(test.iloc[:, :target_col_index])
    # # Log the results to wandb.
    accuracy = accuracy_score(test.iloc[:, target_col_index], preds)
    precision, recall, f1score, support = precision_recall_fscore_support(test.iloc[:, target_col_index], preds, average='weighted')
    wandb.log({
        "train-daterange": f"{train.index[0]} - {train.index[-1]}",
        "test-daterange": f"{test.index[0]} - {test.index[-1]}",
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1score": f1score,
        "support": support
    })

2024-09-20 17:21:38,897 - INFO - Created 31 splits using rolling window method


WandbCoreNotAvailableError: Looks like wandb-core is not compiled for your system (Darwin-23.5.0-x86_64-i386-64bit): Please contact support at support@wandb.com to request `wandb-core` support for your system.

In [47]:
# Check python version
!python --version

Python 3.11.4
