### Importing Librarires

In [44]:
# Run in the system if never ran before
#pip install xgboost shap
#pip install pykalman

# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pandas_datareader.data as web
from datetime import datetime
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
import shap
from sklearn.inspection import permutation_importance
from pykalman import KalmanFilter

### Importing Variables

In [45]:
clean_bonds_long = pd.read_csv("clean_bond_returns_long.csv", index_col=0, parse_dates=True)
tbill = pd.read_csv("clean_tbill.csv", index_col=0, parse_dates=True)
processed_bonds_ret = pd.read_csv("processed_bond_returns.csv", index_col=0, parse_dates=True)
processed_index = pd.read_csv("processed_index_returns.csv", index_col=0, parse_dates=True)
processed_bonds_mcap = pd.read_csv("processed_bonds_mcap.csv", index_col=0, parse_dates=True)

## Model Development
**Processed Variable Overview**

| Variable Name           | Description                                                                 |
|-------------------------|-----------------------------------------------------------------------------|
| `clean_bonds_long`      | Long-format DataFrame of bond data — each row is a (date, CUSIP) pair.      |
| `tbill`                 | Series or DataFrame containing 1-month Treasury bill rates (risk-free rate).|
| `processed_bonds_ret`   | Pivoted DataFrame of bond **excess returns** — rows = dates, cols = CUSIPs. |
| `processed_bonds_mcap`  | Pivoted DataFrame of bond **market values** — same shape as above.          |
| `processed_index`       | DataFrame of the **corporate bond index** — includes price, return, mcap.   |


In [46]:
processed_bonds_ret

Unnamed: 0_level_0,000336AE7,00037BAA0,00037BAB8,00037BAC6,00037BAD4,00037BAE2,00037BAF9,00077QAA8,00077QAB6,00077QAC4,...,98978VAK9,98978VAL7,98978VAM5,98978VAN3,98978VAP8,98978VAQ6,98978VAS2,98978VAT0,989822AA9,U36964AK7
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2002-08-31,0.005084,,,,,,,0.008741,0.030161,,...,,,,,,,,,-0.013686,
2002-09-30,-0.056349,,,,,,,0.007913,0.033869,,...,,,,,,,,,0.062894,
2002-10-31,0.050924,,,,,,,0.004529,-0.043209,-0.003631,...,,,,,,,,,-0.131872,
2002-11-30,0.079503,,,,,,,-0.023597,0.026622,,...,,,,,,,,,0.151989,
2002-12-31,,,,,,,,0.028813,-0.029565,,...,,,,,,,,,0.047762,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-07-31,,,,,,,,,,,...,0.006461,0.013297,0.029045,0.014169,0.029238,,0.020666,0.028684,0.006188,
2021-08-31,,,,,,,,,,,...,-0.001388,-0.002873,-0.009335,-0.007120,-0.007815,,-0.005161,-0.007925,-0.000338,
2021-09-30,,,,,,,-0.009161,,,,...,-0.004144,-0.007522,-0.037348,-0.008975,-0.028702,,-0.013836,-0.029913,-0.004600,
2021-10-31,,,,,,,,,,,...,-0.007151,-0.007088,0.031418,-0.003406,0.021799,,-0.005331,0.032354,0.001179,


In [47]:
processed_index

Unnamed: 0_level_0,Idx_Price,Idx_Returns,Idx_Exc,Idx_MCap,Idx_Count
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2002-08-31,1257.87,0.026523,0.025106,1666173.29,3580
2002-09-30,1281.86,0.019072,0.017739,1701668.57,3586
2002-10-31,1264.65,-0.013426,-0.014659,1673274.91,3574
2002-11-30,1284.43,0.015641,0.014599,1685374.02,3535
2002-12-31,1323.64,0.030527,0.029527,1736694.47,3530
...,...,...,...,...,...
2021-07-31,3563.68,0.013688,0.013646,6962832.37,6838
2021-08-31,3552.90,-0.003025,-0.003050,6957292.32,6902
2021-09-30,3515.50,-0.010527,-0.010585,6876915.12,6938
2021-10-31,3524.22,0.002480,0.002430,6929726.61,6977


### Dynamic Bond Universe
- Writing a function to select the dynamic universe made up of investable bonds, for specific train and test windows.

In [48]:
def select_bond_universe (train_features: pd.DataFrame,
                         test_features: pd.DataFrame,
                         coverage_percent: float=0.8):
    '''
    A function that returns a DataFrame including bonds that are investable based on availability.
    Availability, in this context, means that a bond must have return values at least once in the
    test set, and should atleast have coverage_percent returns in training set.
    '''
    # Creating copies
    train_df = train_features.copy()
    test_df = test_features.copy()
    
    # Creating necessary variables
    n_rows = train_df.shape[0]
    
    # Dropping bonds which do not have any returns in the dataframes
    train_drop_list = train_df.columns[train_df.count() == 0].to_list()
    test_drop_list = test_df.columns[test_df.count() == 0].to_list()
    train_df = train_df.drop(columns=train_drop_list)
    test_df = test_df.drop(columns=test_drop_list)

    # Getting list of train bonds which have minimum coverage
    coverage_ratio = train_features.count() / n_rows
    train_cols = coverage_ratio[coverage_ratio >= coverage_percent].index
    
    # Using sets to get bonds common to both train and test datasets
    train_cols = set(train_cols)
    test_cols = set(test_df.columns)

    # Creating investable list using set intersection
    investable_list = list(train_cols & test_cols)
        
    # Getting the output dataframes with the selected bonds
    # Filling nan values in the train dataset with 0.0
    output_train = train_df[investable_list].copy()
    output_train.fillna(0.0, inplace=True)
    output_test = test_df[investable_list].copy()
    
    # Returning the outputs
    return {
        "investable_train" : output_train,
        'investable_test' : output_test
    }

### Gradient Boosting Framework

In [49]:
features_dict = select_bond_universe(
    processed_bonds_ret.iloc[0:24],
    processed_bonds_ret.iloc[[24]]
)
index_train = processed_index.iloc[0:24]
index_test = processed_index.iloc[[24]]
invest_train = features_dict['investable_train']
invest_test = features_dict['investable_test']
invest_train

Unnamed: 0_level_0,786514BA6,79549BGP6,263534BJ7,694032AV5,52517PQM6,743315AJ2,665772BN8,264399DK9,852060AM4,06050XCN1,...,94975CAK3,949740CA0,949746CH2,907818CP1,949746CK5,013104AJ3,2338E8DY2,046003DC0,345397GX5,939333AB6
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2002-08-31,0.038826,0.019291,0.02356,0.071615,0.009794,0.044147,0.197825,-0.008296,0.105365,0.087254,...,0.01431,0.013848,0.018277,0.030575,0.029901,0.022985,0.045868,0.045612,-0.011667,0.020023
2002-09-30,0.027613,0.015867,0.035639,0.002553,0.026928,0.069829,0.0,0.061939,-0.155374,-0.017685,...,0.021705,0.020281,0.024284,0.037436,0.010739,0.027313,0.008885,-0.002017,-0.011828,0.004567
2002-10-31,-0.025131,-0.000631,-0.019135,-0.028978,-0.0023,-0.016907,0.0,-0.070748,0.183388,-0.006825,...,0.000828,-0.013713,0.00344,-0.01838,0.001537,-0.004589,-0.050429,0.01134,-0.090618,-0.007298
2002-11-30,-0.015154,-0.011914,-0.004599,0.015656,0.002023,0.016206,0.053615,-0.019006,0.130567,0.027357,...,-0.024039,0.008238,-0.01772,0.0,-0.006631,-0.00627,0.024037,0.003088,0.13218,-0.003067
2002-12-31,0.031032,0.021447,0.033484,0.072503,0.0,-0.016537,-0.045502,0.201403,0.028056,0.004214,...,0.031586,-0.002246,0.023828,0.0,0.023813,0.023386,0.03406,0.014041,0.02104,0.029212
2003-01-31,0.029268,0.022402,-0.001318,-0.047875,0.0,0.087222,0.042753,-0.048619,-0.03678,0.008013,...,0.002807,0.007087,-0.000822,-0.012098,-0.004679,-0.003563,-0.018547,0.002441,-0.006734,0.007686
2003-02-28,0.018686,0.017619,0.016149,0.027985,0.014294,0.021881,0.052272,-0.005444,0.051228,0.03106,...,0.010966,0.019077,0.016226,0.01976,0.020862,0.022209,-0.010596,0.056713,0.001918,0.01319
2003-03-31,-0.00902,0.007093,-0.001885,0.006459,-0.000835,-0.011158,0.03689,0.005305,0.061261,0.01425,...,0.004645,0.007304,-0.00017,-0.004865,-0.003723,-0.004671,0.018277,-0.004536,-0.028449,0.002685
2003-04-30,-0.003824,-0.006754,0.006761,0.026664,0.00565,0.02477,-0.049309,0.040937,0.021955,-0.019002,...,0.001899,0.008583,0.003686,0.007086,-0.000893,0.001159,0.037622,0.001411,0.068483,-0.016709
2003-05-31,0.05305,0.03103,0.035656,-0.034291,0.015461,0.033292,0.085729,0.090718,0.02973,0.021424,...,0.025405,0.013698,0.019423,0.039683,0.024063,0.049713,0.014957,0.01074,0.023774,0.035529


In [50]:
def xgboost_model (bond_returns: pd.DataFrame,
                   index_returns: pd.DataFrame,
                   param_grid: dict = None,
                   cv: int = 5,
                   random_state: int = 42,
                  shap_threshold_quantile: float = 0.0):
    '''
    A function that runs an XGBoost model for a given slice of training data, tunes hyperparameters
    using cross-validation, and uses SHAP (SHapley Additive exPlanations) for feature importance.
    Portfolio weights are computed using the SHAP values.
    '''
    # Creating necessary variables
    features_df = bond_returns.reindex(index_returns.index)
    target_df = index_returns.squeeze()
    
    # Setting the parameter grid
    if param_grid is None:
        param_grid = {
            'n_estimators' : [50, 100],
            'max_depth' : [3, 4],
            'learning_rate' : [0.05, 0.1],
            'subsample' : [0.8, 1.0]
        }

    # Fitting XGBoost with CV
    base_xgb = XGBRegressor(objective = 'reg:squarederror',
                           random_state = random_state)
    # Conducitng grid search
    grid_search = GridSearchCV(estimator = base_xgb,
                              param_grid = param_grid,
                              scoring = 'neg_mean_squared_error',
                              cv=cv,
                              n_jobs=-1)
    
    # Fitting the model with best hyperparams
    grid_search.fit(features_df, target_df)
    tuned_xgb = grid_search.best_estimator_
    
    # SHAP Analysis
    explainer = shap.TreeExplainer(tuned_xgb, features_df)
    shap_values = explainer(features_df)
    shap_importance = np.abs(shap_values.values).mean(axis=0)
    shap_series = pd.Series(shap_importance, index=features_df.columns)

    # Select bonds above threshold
    threshold = shap_series.quantile(shap_threshold_quantile)
    selected_bonds = shap_series[shap_series > threshold].index.tolist()

    # Normalize weights
    weights = shap_series[selected_bonds].copy()
    #weights = shap_series
    weights /= weights.sum()
    
    # Returning output variables
    return {
        'xgb_model': tuned_xgb,
        'portfolio_weights': weights,
        'selected_bonds': selected_bonds,
        'shap_values': shap_series,
        'best_params': grid_search.best_params_
    }

In [51]:
xgb_dict = xgboost_model(
    bond_returns = invest_train,
    index_returns = index_train['Idx_Exc']
)

In [52]:
xgb_weights = xgb_dict['portfolio_weights'].to_frame().T
bonds_list = xgb_weights.columns.to_list()
xgb_weights

Unnamed: 0,786514BA6,79549BGP6,263534BJ7,52517PQM6,852060AM4,171232AF8,78387GAH6,247126AD7,302570AJ5,16161ABX5,...,44181EFA1,233835AS6,929771AC7,92344UAA3,20029PAL3,004408AB6,743263AJ4,06406JCL7,22237LLQ3,494550AH9
0,0.076216,0.096391,0.346567,0.009341,0.000834,0.054631,0.00357,0.000887,0.002019,0.006743,...,0.001739,0.005547,0.004905,0.026818,0.12659,0.001049,0.010428,0.001299,0.001294,0.0105


In [53]:
weights_df = pd.DataFrame(
    np.tile(xgb_weights.values,(len(invest_train), 1)),
    index = invest_train.index,
    columns = xgb_weights.columns
)
weights_df

Unnamed: 0_level_0,786514BA6,79549BGP6,263534BJ7,52517PQM6,852060AM4,171232AF8,78387GAH6,247126AD7,302570AJ5,16161ABX5,...,44181EFA1,233835AS6,929771AC7,92344UAA3,20029PAL3,004408AB6,743263AJ4,06406JCL7,22237LLQ3,494550AH9
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2002-08-31,0.076216,0.096391,0.346567,0.009341,0.000834,0.054631,0.00357,0.000887,0.002019,0.006743,...,0.001739,0.005547,0.004905,0.026818,0.12659,0.001049,0.010428,0.001299,0.001294,0.0105
2002-09-30,0.076216,0.096391,0.346567,0.009341,0.000834,0.054631,0.00357,0.000887,0.002019,0.006743,...,0.001739,0.005547,0.004905,0.026818,0.12659,0.001049,0.010428,0.001299,0.001294,0.0105
2002-10-31,0.076216,0.096391,0.346567,0.009341,0.000834,0.054631,0.00357,0.000887,0.002019,0.006743,...,0.001739,0.005547,0.004905,0.026818,0.12659,0.001049,0.010428,0.001299,0.001294,0.0105
2002-11-30,0.076216,0.096391,0.346567,0.009341,0.000834,0.054631,0.00357,0.000887,0.002019,0.006743,...,0.001739,0.005547,0.004905,0.026818,0.12659,0.001049,0.010428,0.001299,0.001294,0.0105
2002-12-31,0.076216,0.096391,0.346567,0.009341,0.000834,0.054631,0.00357,0.000887,0.002019,0.006743,...,0.001739,0.005547,0.004905,0.026818,0.12659,0.001049,0.010428,0.001299,0.001294,0.0105
2003-01-31,0.076216,0.096391,0.346567,0.009341,0.000834,0.054631,0.00357,0.000887,0.002019,0.006743,...,0.001739,0.005547,0.004905,0.026818,0.12659,0.001049,0.010428,0.001299,0.001294,0.0105
2003-02-28,0.076216,0.096391,0.346567,0.009341,0.000834,0.054631,0.00357,0.000887,0.002019,0.006743,...,0.001739,0.005547,0.004905,0.026818,0.12659,0.001049,0.010428,0.001299,0.001294,0.0105
2003-03-31,0.076216,0.096391,0.346567,0.009341,0.000834,0.054631,0.00357,0.000887,0.002019,0.006743,...,0.001739,0.005547,0.004905,0.026818,0.12659,0.001049,0.010428,0.001299,0.001294,0.0105
2003-04-30,0.076216,0.096391,0.346567,0.009341,0.000834,0.054631,0.00357,0.000887,0.002019,0.006743,...,0.001739,0.005547,0.004905,0.026818,0.12659,0.001049,0.010428,0.001299,0.001294,0.0105
2003-05-31,0.076216,0.096391,0.346567,0.009341,0.000834,0.054631,0.00357,0.000887,0.002019,0.006743,...,0.001739,0.005547,0.004905,0.026818,0.12659,0.001049,0.010428,0.001299,0.001294,0.0105


In [54]:
xgb_train = invest_train.loc[:,bonds_list]
xgb_train

Unnamed: 0_level_0,786514BA6,79549BGP6,263534BJ7,52517PQM6,852060AM4,171232AF8,78387GAH6,247126AD7,302570AJ5,16161ABX5,...,44181EFA1,233835AS6,929771AC7,92344UAA3,20029PAL3,004408AB6,743263AJ4,06406JCL7,22237LLQ3,494550AH9
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2002-08-31,0.038826,0.019291,0.02356,0.009794,0.105365,0.037245,0.018961,0.000605,0.074399,0.016734,...,0.0,0.020088,0.023946,0.036661,0.058089,0.017348,0.072751,0.017934,0.007845,0.012612
2002-09-30,0.027613,0.015867,0.035639,0.026928,-0.155374,0.017384,0.027007,0.022176,0.015406,0.015978,...,0.0,0.006661,0.048973,0.057084,0.029968,0.023205,0.011925,0.023018,0.0252,0.028617
2002-10-31,-0.025131,-0.000631,-0.019135,-0.0023,0.183388,-0.034509,0.018246,-0.02664,-0.042084,0.02274,...,-0.173522,-0.000849,-0.039986,0.005359,0.037167,-0.025966,-0.059062,-0.022893,-0.004939,-0.062597
2002-11-30,-0.015154,-0.011914,-0.004599,0.002023,0.130567,0.000124,-0.015848,0.005193,0.021579,-0.004685,...,0.220737,0.00113,0.023148,0.04364,0.031839,0.021347,0.062089,0.035986,0.003193,0.075019
2002-12-31,0.031032,0.021447,0.033484,0.0,0.028056,0.054853,0.039855,0.026894,0.005883,0.020323,...,0.065118,0.025188,-0.00119,0.027577,0.057105,0.027923,0.070517,-0.011004,0.025398,0.022972
2003-01-31,0.029268,0.022402,-0.001318,0.0,-0.03678,-0.009527,-0.001253,-0.002873,0.042254,0.008275,...,-0.008124,-0.007576,-0.010756,-0.011098,-0.001602,-0.00636,-0.034405,-0.008679,-0.001711,0.004346
2003-02-28,0.018686,0.017619,0.016149,0.014294,0.051228,0.02435,0.015364,0.027819,0.035243,0.028061,...,0.012903,0.016328,0.072663,0.0255,0.04501,0.031817,0.020236,0.054008,0.013745,0.02081
2003-03-31,-0.00902,0.007093,-0.001885,-0.000835,0.061261,-0.003464,0.006852,-0.005838,0.001322,-0.015066,...,-0.027043,-0.005092,-0.007208,0.001362,0.008975,-0.031113,0.007814,-0.02018,0.000891,0.003501
2003-04-30,-0.003824,-0.006754,0.006761,0.00565,0.021955,0.024279,0.005408,0.011494,0.000221,0.049757,...,0.070582,0.016343,0.005619,0.009161,0.027612,0.024396,0.02119,0.018588,0.011737,0.025081
2003-05-31,0.05305,0.03103,0.035656,0.015461,0.02973,0.053398,0.039156,0.007475,0.021799,0.009277,...,0.005738,0.018283,0.026221,0.026532,0.039328,0.028788,0.011555,0.009903,0.017779,0.031016


### Tracking Error Evaluation

In [55]:
# Writing a function that calculates the in-sample and OOS tracking error
def te_eval (train_features: pd.DataFrame,
            test_features: pd.DataFrame,
            train_target: pd.DataFrame,
            test_target: pd.DataFrame,
            model_dict): 
    '''
    A function that calculates the in-sample and out-of-sample tracking error of the weights
    computed by the respective model.
    All the inputs (except model_dict) are in DataFrame format, with dates as rows and bond cusip
    as columns respectively.
    '''
    # Creating necessary variables
    weights = model_dict['portfolio_weights'].to_frame().T
    # weights now has structure of columns being cusip, and the values being weights
    # which has been assigned to each bond (cusip) which is used to create portfolio
    bonds_list = weights.columns.to_list()
    sliced_train_features = train_features[bonds_list].copy()
    sliced_test_features = test_features[bonds_list].copy()
    
    # Calculating OOS TE for the portfolio
    oos_weighted_returns = (sliced_test_features.values @ weights.values.T)[0][0] # Using matrix multiplication
    # As we only have one value in OOS (1 month), the average and sqrt are not required
    oos_te = np.sqrt(((test_target.values[0][0] - oos_weighted_returns)**2).mean()) # Both are now in np.array format
    
    # Calculating in-sample TE for the portfolio
    # Creating a new dataframe that consists of portfolio weights for each day
    weights_df = pd.DataFrame(
        np.tile(weights.values,(len(sliced_train_features), 1)),
        index = sliced_train_features.index,
        columns = weights.columns
    )

    # Calculating weighted returns
    ins_weighted_returns = (weights_df * sliced_train_features).sum(axis=1).to_frame()
    ins_weighted_returns.columns = train_target.columns.to_list()
    
    # Calculating tracking error
    ins_te = np.sqrt(((train_target - ins_weighted_returns)**2).mean().values[0])
    
    # Returning calculated TE values
    return ins_te, oos_te

### Rolling Window Pipeline

In [56]:
# Writing a function to perform rolling-window evaluation for given models
def rolling_framework (bond_returns : pd.DataFrame,
                       index_returns : pd.DataFrame,
                       min_cover: float=0.8,
                       rolling_window : float = 24):
    '''
    The main rolling window function, which performs rolling-window evaluation for all the
    models which are passed as a list to the function. Also constructs the portfolios at 
    each step, and computes OOS tracking-error estimates.
    Inputs:
    bond_returns: DataFrame having rows as dates, and columns as bond returns.
    index_returns: DataFrame having rows as dates, Index values as columns.
    '''
    # Making sure bond and index rows match
    target_df = index_returns['Idx_Exc'].to_frame()
    features_df = bond_returns.reindex(target_df.index)

    # Creating necessary variables
    # XGB Output variables
    output_weights_xgb = pd.DataFrame(0.0, index=target_df.index, columns=features_df.columns)
    output_te_xgb = pd.DataFrame(index=target_df.index, columns=['INS_TE_XGB', 'OOS_TE_XGB'])   
    
    # Running the main rolling loop, and using models to construct portfolios
    for i in range(len(bond_returns) - rolling_window):
        # Getting train and test splits for bond returns
        sliced_bonds_train = features_df.iloc[i : (i+rolling_window)]
        sliced_bonds_test = features_df.iloc[[i+rolling_window]]
        
        # Filtering bonds to get investable universe
        investable_dict = select_bond_universe(sliced_bonds_train,
                                              sliced_bonds_test,
                                              min_cover)
        train_returns = investable_dict['investable_train']
        test_returns = investable_dict['investable_test']
        
        # Getting train and test splits for index returns
        train_index = target_df.iloc[i : (i+rolling_window)]
        test_index = target_df.iloc[[i+rolling_window]]
        
        # Running XGBoost
        xgb_model = xgboost_model(bond_returns = train_returns,
                                 index_returns = train_index)
        xgb_weights = xgb_model['portfolio_weights']
        xgb_bonds = xgb_model['selected_bonds']
        
        # Evaluating XGB Tracking Error
        ins_te_xgb, oos_te_xgb = te_eval(train_returns, test_returns,
                                train_index, test_index,
                                xgb_model)
        
        # Appending results to output DataFrames
        output_weights_xgb.loc[target_df.index[i + rolling_window],xgb_bonds] = xgb_weights
        output_te_xgb.loc[target_df.index[i + rolling_window-1],['INS_TE_XGB']] = ins_te_xgb
        output_te_xgb.loc[target_df.index[i + rolling_window], ['OOS_TE_XGB']] = oos_te_xgb
        
    # Dropping initial training rows from output variables
    output_weights_xgb = output_weights_xgb.iloc[rolling_window:]
    output_te_xgb = output_te_xgb.iloc[rolling_window-1:]

    # Dropping those bonds which were not used in portfolio construction
    invested_mask_xgb = (output_weights_xgb != 0.0).any()
    output_weights_xgb = output_weights_xgb.loc[:, invested_mask_xgb]
    
    return {
        'xgb_weights': output_weights_xgb,
        'xgb_te': output_te_xgb
    }

In [57]:
%%time
# Running the entire framework (takes about 1100 seconds / 18.5 minutes)
rolling_dict = rolling_framework(processed_bonds_ret,
                                processed_index)

CPU times: total: 34min 43s
Wall time: 21min 26s


In [61]:
result_weights_xgb = rolling_dict['xgb_weights']
result_te_xgb = rolling_dict['xgb_te']

In [65]:
result_weights_xgb

Unnamed: 0_level_0,00077QAG5,00080QAB1,001055AC6,001055AF9,001055AJ1,001055AM4,00184AAC9,00184AAF2,00184AAG0,001957BD0,...,984121CK7,98420EAC9,988498AB7,988498AC5,988498AD3,988498AH4,98920AAA6,98956PAC6,98956PAF9,989701AM9
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2004-08-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2004-09-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2004-10-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2004-11-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2004-12-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-07-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-08-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-09-30,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-10-31,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [63]:
result_te_xgb

Unnamed: 0_level_0,INS_TE_XGB,OOS_TE_XGB
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2004-07-31,0.004706,
2004-08-31,0.004905,0.000541
2004-09-30,0.004729,0.00191
2004-10-31,0.003706,0.002026
2004-11-30,0.003879,0.00151
...,...,...
2021-07-31,0.006648,0.000095
2021-08-31,0.006613,0.003745
2021-09-30,0.00485,0.001068
2021-10-31,0.005893,0.003476


### Saving Model Results
- We save the XGB results, which will be used in the post-implementation analysis, which is the next step in this pipeline.

In [66]:
result_weights_xgb.to_csv("xgb_weights_output.csv")
result_te_xgb.to_csv("xgb_te_output.csv")