## Importing Libraries

In [35]:
# Run in the system if never ran before
#pip install xgboost shap
#pip install pykalman

# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pandas_datareader.data as web
from datetime import datetime
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
import shap
from sklearn.inspection import permutation_importance
from pykalman import KalmanFilter
from joblib import Parallel, delayed

## Importing Variables

In [36]:
clean_bonds_long = pd.read_csv("clean_bond_returns_long.csv", index_col=0, parse_dates=True)
tbill = pd.read_csv("clean_tbill.csv", index_col=0, parse_dates=True)
processed_bonds_ret = pd.read_csv("processed_bond_returns.csv", index_col=0, parse_dates=True)
processed_index = pd.read_csv("processed_index_returns.csv", index_col=0, parse_dates=True)
processed_bonds_mcap = pd.read_csv("processed_bonds_mcap.csv", index_col=0, parse_dates=True)

## Model Development
**Processed Variable Overview**

| Variable Name           | Description                                                                 |
|-------------------------|-----------------------------------------------------------------------------|
| `clean_bonds_long`      | Long-format DataFrame of bond data — each row is a (date, CUSIP) pair.      |
| `tbill`                 | Series or DataFrame containing 1-month Treasury bill rates (risk-free rate).|
| `processed_bonds_ret`   | Pivoted DataFrame of bond **excess returns** — rows = dates, cols = CUSIPs. |
| `processed_bonds_mcap`  | Pivoted DataFrame of bond **market values** — same shape as above.          |
| `processed_index`       | DataFrame of the **corporate bond index** — includes price, return, mcap.   |


In [37]:
processed_bonds_ret

Unnamed: 0_level_0,000336AE7,00037BAA0,00037BAB8,00037BAC6,00037BAD4,00037BAE2,00037BAF9,00077QAA8,00077QAB6,00077QAC4,...,98978VAK9,98978VAL7,98978VAM5,98978VAN3,98978VAP8,98978VAQ6,98978VAS2,98978VAT0,989822AA9,U36964AK7
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2002-08-31,0.005084,,,,,,,0.008741,0.030161,,...,,,,,,,,,-0.013686,
2002-09-30,-0.056349,,,,,,,0.007913,0.033869,,...,,,,,,,,,0.062894,
2002-10-31,0.050924,,,,,,,0.004529,-0.043209,-0.003631,...,,,,,,,,,-0.131872,
2002-11-30,0.079503,,,,,,,-0.023597,0.026622,,...,,,,,,,,,0.151989,
2002-12-31,,,,,,,,0.028813,-0.029565,,...,,,,,,,,,0.047762,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-07-31,,,,,,,,,,,...,0.006461,0.013297,0.029045,0.014169,0.029238,,0.020666,0.028684,0.006188,
2021-08-31,,,,,,,,,,,...,-0.001388,-0.002873,-0.009335,-0.007120,-0.007815,,-0.005161,-0.007925,-0.000338,
2021-09-30,,,,,,,-0.009161,,,,...,-0.004144,-0.007522,-0.037348,-0.008975,-0.028702,,-0.013836,-0.029913,-0.004600,
2021-10-31,,,,,,,,,,,...,-0.007151,-0.007088,0.031418,-0.003406,0.021799,,-0.005331,0.032354,0.001179,


In [38]:
processed_index

Unnamed: 0_level_0,Idx_Price,Idx_Returns,Idx_Exc,Idx_MCap,Idx_Count
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2002-08-31,1257.87,0.026523,0.025106,1666173.29,3580
2002-09-30,1281.86,0.019072,0.017739,1701668.57,3586
2002-10-31,1264.65,-0.013426,-0.014659,1673274.91,3574
2002-11-30,1284.43,0.015641,0.014599,1685374.02,3535
2002-12-31,1323.64,0.030527,0.029527,1736694.47,3530
...,...,...,...,...,...
2021-07-31,3563.68,0.013688,0.013646,6962832.37,6838
2021-08-31,3552.90,-0.003025,-0.003050,6957292.32,6902
2021-09-30,3515.50,-0.010527,-0.010585,6876915.12,6938
2021-10-31,3524.22,0.002480,0.002430,6929726.61,6977


### Dynamic Bond Universe
- Writing a function to select the dynamic universe made up of investable bonds, for specific train and test windows.

In [39]:
def select_bond_universe (train_features: pd.DataFrame,
                         test_features: pd.DataFrame,
                         coverage_percent: float=0.8):
    '''
    A function that returns a DataFrame including bonds that are investable based on availability.
    Availability, in this context, means that a bond must have return values at least once in the
    test set, and should atleast have coverage_percent returns in training set.
    '''
    # Creating copies
    train_df = train_features.copy()
    test_df = test_features.copy()
    
    # Creating necessary variables
    n_rows = train_df.shape[0]
    
    # Dropping bonds which do not have any returns in the dataframes
    train_drop_list = train_df.columns[train_df.count() == 0].to_list()
    test_drop_list = test_df.columns[test_df.count() == 0].to_list()
    train_df = train_df.drop(columns=train_drop_list)
    test_df = test_df.drop(columns=test_drop_list)

    # Getting list of train bonds which have minimum coverage
    coverage_ratio = train_features.count() / n_rows
    train_cols = coverage_ratio[coverage_ratio >= coverage_percent].index
    
    # Using sets to get bonds common to both train and test datasets
    train_cols = set(train_cols)
    test_cols = set(test_df.columns)

    # Creating investable list using set intersection
    investable_list = list(train_cols & test_cols)
        
    # Getting the output dataframes with the selected bonds
    # Filling nan values in the train dataset with 0.0
    output_train = train_df[investable_list].copy()
    output_train.fillna(0.0, inplace=True)
    output_test = test_df[investable_list].copy()
    
    # Returning the outputs
    return {
        "investable_train" : output_train,
        'investable_test' : output_test
    }

In [40]:
features_dict = select_bond_universe(
    processed_bonds_ret.iloc[0:24],
    processed_bonds_ret.iloc[[24]]
)
index_train = processed_index.iloc[0:24]
index_test = processed_index.iloc[[24]]
invest_train = features_dict['investable_train']
invest_test = features_dict['investable_test']
invest_train

Unnamed: 0_level_0,852060AF9,616880AU4,44181EJM1,345397TS2,369622CN3,743263AG0,36962GLF9,437076AJ1,441812KA1,382388AL0,...,455434BB5,44181ELG1,44181EHS0,743263AH8,035229CD3,22541LAB9,27746QAC8,00184AAE5,345397SM6,3454024U2
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2002-08-31,0.14589,0.021714,0.0,-0.001748,0.030609,0.127355,0.02244,-0.003006,0.0,0.100976,...,0.221743,0.0,-0.052557,0.038634,0.02281,0.049712,0.040856,0.089026,-0.00441,0.021013
2002-09-30,-0.207187,0.009946,0.0,-0.031599,0.026057,-0.025017,-0.002175,0.02365,0.0,-0.004442,...,0.00814,0.0,0.049953,0.003701,0.041649,0.012891,0.000996,-0.015001,-0.027359,-0.035216
2002-10-31,0.197529,0.007572,-0.199195,-0.053924,-0.022865,-0.06596,0.046019,0.004393,0.0,-0.092224,...,-0.117257,-0.221749,-0.251296,-0.042923,-0.02189,-0.010457,0.022421,0.086932,-0.044325,-0.059067
2002-11-30,0.090728,0.002725,0.30519,0.095502,0.005402,0.125537,-6.1e-05,-0.005961,0.0,0.035857,...,-0.032034,0.299497,0.289234,0.052844,0.001461,-0.010904,-0.018998,0.031359,0.077483,0.090927
2002-12-31,0.108754,0.022894,0.010162,0.022247,0.007271,0.03,0.006776,0.019001,0.04481,0.036028,...,0.061298,0.037688,0.060697,0.02227,0.01976,0.044636,0.026785,0.02849,0.030002,0.018297
2003-01-31,-0.021739,0.021834,0.006604,-0.01963,-0.011868,0.005902,-0.013067,-0.030708,0.018863,-0.051244,...,0.102226,0.008338,0.020176,-0.001365,0.000311,0.007646,0.016447,-0.004545,-0.00769,-0.004648
2003-02-28,0.042055,0.00759,0.037682,0.012922,0.033654,0.032385,0.044607,0.040787,0.027441,0.091827,...,0.0,0.022667,-0.00248,0.016573,0.017682,0.01325,0.010439,0.029016,0.01218,0.011343
2003-03-31,0.083284,-0.019333,0.015978,-0.049343,0.016011,-0.002058,0.004045,0.003991,0.006449,-0.082072,...,0.0,0.003534,0.035667,0.002381,0.008448,-0.00091,-0.008802,0.002706,-0.049237,-0.04189
2003-04-30,0.016175,0.003703,-0.010096,0.101811,-0.00191,0.040386,0.003681,4.6e-05,0.013002,0.102499,...,-0.078248,0.02748,0.027572,0.019703,-0.002744,0.033782,0.013066,0.019614,0.093666,0.063653
2003-05-31,0.024135,0.037437,0.05293,0.0305,0.038129,0.039292,0.029378,0.016451,0.032805,0.118454,...,0.096853,0.027581,-0.001578,0.018045,0.05463,0.031966,0.022745,0.031114,0.028089,0.029913


### Kalman Filter Framework

In [41]:
def kalman_model(bond_returns: pd.DataFrame,
                 index_returns: pd.DataFrame,
                 state_cov_scale: float = 1e-5,
                 obs_cov_scale: float = 1e-3):
    '''
    Fits a time-varying linear model using the Kalman Filter.
    This replicates index_returns using bond_returns with dynamically estimated weights.

    Inputs:
    - bond_returns: DataFrame of shape (T, N) = returns of N bonds over T time steps.
    - index_returns: Series of shape (T,) = target excess return.
    - state_cov_scale: scaling parameter for the state covariance (controls weight flexibility).
    - obs_cov_scale: scaling parameter for the observation noise (controls fit to target).
    '''

    # Align inputs
    X = bond_returns.values  # shape (T, N)
    y = index_returns.values  # shape (T,)

    n_timesteps, n_bonds = X.shape

    # --- Kalman Filter Setup ---

    # Observation matrix Z_t = X_t
    # At each time t, we observe: y_t = X_t @ beta_t + epsilon_t

    # Design matrix: time-varying observation matrices
    observation_matrices = X.reshape((n_timesteps, 1, n_bonds))  # shape (T, 1, N)

    # Transition matrix: beta_t = beta_{t-1} + eta_t, so transition matrix is identity
    transition_matrices = np.eye(n_bonds)

    # Observation covariance: scalar or 1x1 matrix (assumes homoscedastic error)
    observation_covariance = obs_cov_scale * np.eye(1)

    # State (weight) covariance: controls how much beta_t is allowed to vary
    transition_covariance = state_cov_scale * np.eye(n_bonds)

    # Initial state estimate (weights)
    initial_state_mean = np.zeros(n_bonds)

    # Initial state covariance
    initial_state_covariance = np.eye(n_bonds)

    # --- Fit Kalman Filter ---
    kf = KalmanFilter(
        transition_matrices=transition_matrices,
        observation_matrices=observation_matrices,
        transition_covariance=transition_covariance,
        observation_covariance=observation_covariance,
        initial_state_mean=initial_state_mean,
        initial_state_covariance=initial_state_covariance
    )

    # Estimate weights (beta_t) over time using EM algorithm and Kalman smoothing
    state_means, state_covs = kf.smooth(y)  # state_means shape: (T, N)

    # Final estimated weights (most recent step)
    final_weights = pd.Series(state_means[-1], index=bond_returns.columns)
    final_weights = final_weights[final_weights.abs() > 0.001]  # remove negligible weights
    final_weights /= final_weights.sum()  # normalize

    # Return results
    return {
        'portfolio_weights': final_weights,
        'weight_path': pd.DataFrame(state_means, index=bond_returns.index, columns=bond_returns.columns)
    }

In [42]:
kalman_dict = kalman_model(
    bond_returns = invest_train,
    index_returns = index_train['Idx_Exc']
)
kalman_weights = kalman_dict['portfolio_weights']
kalman_weights

852060AF9    0.002343
36962GLF9    0.003128
441812KA1    0.004559
962166BR4    0.004145
652478BA5    0.003055
               ...   
042735AK6    0.003313
126408BM4    0.003286
694032AT0    0.003177
893502AP9    0.002850
22541LAB9    0.002534
Length: 358, dtype: float64

In [43]:
kalman_weights = kalman_weights.to_frame().T
kalman_weights

Unnamed: 0,852060AF9,36962GLF9,441812KA1,962166BR4,652478BA5,852060AT9,16161ABX5,459200AS0,87612EAF3,650094CJ2,...,216831AB3,362333AF3,33738MAE1,46625HAT7,191219AY0,042735AK6,126408BM4,694032AT0,893502AP9,22541LAB9
0,0.002343,0.003128,0.004559,0.004145,0.003055,0.003485,0.002918,0.002236,0.003095,0.002674,...,0.004056,0.002219,0.002241,0.003243,0.00299,0.003313,0.003286,0.003177,0.00285,0.002534


In [44]:
total_weights_df = pd.DataFrame(
    np.tile(kalman_weights.values, (len(invest_train),1)),
    index=invest_train.index,
    columns=kalman_weights.columns
)
total_weights_df

Unnamed: 0_level_0,852060AF9,36962GLF9,441812KA1,962166BR4,652478BA5,852060AT9,16161ABX5,459200AS0,87612EAF3,650094CJ2,...,216831AB3,362333AF3,33738MAE1,46625HAT7,191219AY0,042735AK6,126408BM4,694032AT0,893502AP9,22541LAB9
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2002-08-31,0.002343,0.003128,0.004559,0.004145,0.003055,0.003485,0.002918,0.002236,0.003095,0.002674,...,0.004056,0.002219,0.002241,0.003243,0.00299,0.003313,0.003286,0.003177,0.00285,0.002534
2002-09-30,0.002343,0.003128,0.004559,0.004145,0.003055,0.003485,0.002918,0.002236,0.003095,0.002674,...,0.004056,0.002219,0.002241,0.003243,0.00299,0.003313,0.003286,0.003177,0.00285,0.002534
2002-10-31,0.002343,0.003128,0.004559,0.004145,0.003055,0.003485,0.002918,0.002236,0.003095,0.002674,...,0.004056,0.002219,0.002241,0.003243,0.00299,0.003313,0.003286,0.003177,0.00285,0.002534
2002-11-30,0.002343,0.003128,0.004559,0.004145,0.003055,0.003485,0.002918,0.002236,0.003095,0.002674,...,0.004056,0.002219,0.002241,0.003243,0.00299,0.003313,0.003286,0.003177,0.00285,0.002534
2002-12-31,0.002343,0.003128,0.004559,0.004145,0.003055,0.003485,0.002918,0.002236,0.003095,0.002674,...,0.004056,0.002219,0.002241,0.003243,0.00299,0.003313,0.003286,0.003177,0.00285,0.002534
2003-01-31,0.002343,0.003128,0.004559,0.004145,0.003055,0.003485,0.002918,0.002236,0.003095,0.002674,...,0.004056,0.002219,0.002241,0.003243,0.00299,0.003313,0.003286,0.003177,0.00285,0.002534
2003-02-28,0.002343,0.003128,0.004559,0.004145,0.003055,0.003485,0.002918,0.002236,0.003095,0.002674,...,0.004056,0.002219,0.002241,0.003243,0.00299,0.003313,0.003286,0.003177,0.00285,0.002534
2003-03-31,0.002343,0.003128,0.004559,0.004145,0.003055,0.003485,0.002918,0.002236,0.003095,0.002674,...,0.004056,0.002219,0.002241,0.003243,0.00299,0.003313,0.003286,0.003177,0.00285,0.002534
2003-04-30,0.002343,0.003128,0.004559,0.004145,0.003055,0.003485,0.002918,0.002236,0.003095,0.002674,...,0.004056,0.002219,0.002241,0.003243,0.00299,0.003313,0.003286,0.003177,0.00285,0.002534
2003-05-31,0.002343,0.003128,0.004559,0.004145,0.003055,0.003485,0.002918,0.002236,0.003095,0.002674,...,0.004056,0.002219,0.002241,0.003243,0.00299,0.003313,0.003286,0.003177,0.00285,0.002534


In [45]:
kalman_bonds_list = kalman_weights.columns.to_list()
kalman_ret = invest_train.loc[:, kalman_bonds_list]
kalman_ret

Unnamed: 0_level_0,852060AF9,36962GLF9,441812KA1,962166BR4,652478BA5,852060AT9,16161ABX5,459200AS0,87612EAF3,650094CJ2,...,216831AB3,362333AF3,33738MAE1,46625HAT7,191219AY0,042735AK6,126408BM4,694032AT0,893502AP9,22541LAB9
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2002-08-31,0.14589,0.02244,0.0,0.0,0.118916,0.118719,0.016734,0.062318,0.053449,0.036259,...,0.188119,0.120603,0.024794,0.0,-0.027877,-0.073411,0.042636,0.059651,-0.015656,0.049712
2002-09-30,-0.207187,-0.002175,0.0,0.0,0.011694,-0.114149,0.015978,0.036156,0.017962,0.059543,...,-0.032021,0.073781,0.03759,0.0,0.096398,-0.010111,0.0158,-0.037044,0.096524,0.012891
2002-10-31,0.197529,0.046019,0.0,0.0,-0.021386,0.118203,0.02274,-0.001141,-0.029015,-0.014173,...,-0.025214,-0.026361,-0.013432,0.0,-0.033054,-0.113713,-0.033843,0.031029,-0.050658,-0.010457
2002-11-30,0.090728,-6.1e-05,0.0,0.0,0.088741,0.163831,-0.004685,0.023743,0.013562,0.07002,...,-0.073924,0.028947,-0.021919,0.0,-0.025913,0.160437,0.031578,0.010804,0.020623,-0.010904
2002-12-31,0.108754,0.006776,0.04481,0.031191,0.045689,0.094807,0.020323,-0.006843,0.012632,-0.013376,...,0.120888,0.032694,0.049866,0.055629,0.07423,0.07884,0.036838,-0.011153,0.025509,0.044636
2003-01-31,-0.021739,-0.013067,0.018863,-0.007926,0.052652,-0.029021,0.008275,0.025355,0.00944,0.045295,...,0.011589,0.005202,-0.007028,-0.00655,0.000806,0.01015,0.008452,0.04495,-0.007614,0.007646
2003-02-28,0.042055,0.044607,0.027441,0.0457,0.066364,0.050106,0.028061,0.012862,0.020296,0.023517,...,0.011446,0.064369,0.024428,0.019745,0.004927,0.003098,0.022998,-0.006023,0.038118,0.01325
2003-03-31,0.083284,0.004045,0.006449,-0.015454,-0.021978,0.072026,-0.015066,0.00841,-0.016852,0.027274,...,0.002641,-0.052062,0.002188,0.008307,0.011044,-0.012872,-0.01321,0.021935,-0.027483,-0.00091
2003-04-30,0.016175,0.003681,0.013002,0.043879,0.059284,0.069328,0.049757,0.03863,0.04568,-0.004736,...,-0.044811,0.061788,0.001132,0.013951,0.025271,0.132401,0.024649,0.050905,0.010609,0.033782
2003-05-31,0.024135,0.029378,0.032805,0.04213,0.094085,0.076687,0.009277,0.047889,0.050997,0.048533,...,0.123487,-0.026133,0.040742,0.035331,0.057027,0.04162,0.050334,0.028502,0.08661,0.031966


### Tracking Error Evaluation

In [46]:
# Writing a function that calculates the in-sample and OOS tracking error
def te_eval (train_features: pd.DataFrame,
            test_features: pd.DataFrame,
            train_target: pd.DataFrame,
            test_target: pd.DataFrame,
            model_dict): 
    '''
    A function that calculates the in-sample and out-of-sample tracking error of the weights
    computed by the respective model.
    All the inputs (except model_dict) are in DataFrame format, with dates as rows and bond cusip
    as columns respectively.
    '''
    # Creating necessary variables
    weights = model_dict['portfolio_weights'].to_frame().T
    # weights now has structure of columns being cusip, and the values being weights
    # which has been assigned to each bond (cusip) which is used to create portfolio
    bonds_list = weights.columns.to_list()
    sliced_train_features = train_features[bonds_list].copy()
    sliced_test_features = test_features[bonds_list].copy()
    
    # Calculating OOS TE for the portfolio
    oos_weighted_returns = (sliced_test_features.values @ weights.values.T)[0][0] # Using matrix multiplication
    # As we only have one value in OOS (1 month), the average and sqrt are not required
    oos_te = np.sqrt(((test_target.values[0][0] - oos_weighted_returns)**2).mean()) # Both are now in np.array format
    
    # Calculating in-sample TE for the portfolio
    # Creating a new dataframe that consists of portfolio weights for each day
    weights_df = pd.DataFrame(
        np.tile(weights.values,(len(sliced_train_features), 1)),
        index = sliced_train_features.index,
        columns = weights.columns
    )

    # Calculating weighted returns
    ins_weighted_returns = (weights_df * sliced_train_features).sum(axis=1).to_frame()
    ins_weighted_returns.columns = train_target.columns.to_list()
    
    # Calculating tracking error
    ins_te = np.sqrt(((train_target - ins_weighted_returns)**2).mean().values[0])
    
    # Returning calculated TE values
    return ins_te, oos_te

### Rolling Window Pipeline

In [47]:
# Writing a function to perform rolling-window evaluation for given models
def rolling_framework (bond_returns : pd.DataFrame,
                       index_returns : pd.DataFrame,
                       min_cover: float=0.8,
                       rolling_window : float = 24):
    '''
    The main rolling window function, which performs rolling-window evaluation for all the
    models which are passed as a list to the function. Also constructs the portfolios at 
    each step, and computes OOS tracking-error estimates.
    Inputs:
    bond_returns: DataFrame having rows as dates, and columns as bond returns.
    index_returns: DataFrame having rows as dates, Index values as columns.
    '''
    # Making sure bond and index rows match
    target_df = index_returns['Idx_Exc'].to_frame()
    features_df = bond_returns.reindex(target_df.index)

    # Creating necessary variables
    # Kalman Output variables
    output_weights_kalman = pd.DataFrame(0.0, index=target_df.index, columns=features_df.columns)
    output_te_kalman = pd.DataFrame(index=target_df.index, columns=['INS_TE_KLM', 'OOS_TE_KLM'])    
    
    # Running the main rolling loop, and using models to construct portfolios
    for i in range(len(bond_returns) - rolling_window):
        # Getting train and test splits for bond returns
        sliced_bonds_train = features_df.iloc[i : (i+rolling_window)]
        sliced_bonds_test = features_df.iloc[[i+rolling_window]]
        
        # Filtering bonds to get investable universe
        investable_dict = select_bond_universe(sliced_bonds_train,
                                              sliced_bonds_test,
                                              min_cover)
        train_returns = investable_dict['investable_train']
        test_returns = investable_dict['investable_test']
        
        # Getting train and test splits for index returns
        train_index = target_df.iloc[i : (i+rolling_window)]
        test_index = target_df.iloc[[i+rolling_window]]

        # Running Kalman Model
        kalman_dict = kalman_model(bond_returns = train_returns,
                                   index_returns = train_index)
        kalman_weights = kalman_dict['portfolio_weights']
        kalman_bonds = kalman_weights.to_frame().T.columns.to_list()

        # Evaluating Kalman Tracking Error
        ins_te_kalman, oos_te_kalman = te_eval(train_returns, test_returns,
                                              train_index, test_index,
                                              kalman_dict)
        # Appending results to output DataFrames
        output_weights_kalman.loc[target_df.index[i + rolling_window],kalman_bonds] = kalman_weights
        output_te_kalman.loc[target_df.index[i + rolling_window-1],['INS_TE_KLM']] = ins_te_kalman
        output_te_kalman.loc[target_df.index[i + rolling_window], ['OOS_TE_KLM']] = oos_te_kalman
        
    # Dropping initial training rows from output variables
    output_weights_kalman = output_weights_kalman.iloc[rolling_window:]
    output_te_kalman = output_te_kalman.iloc[rolling_window-1:]
    
    # Dropping those bonds which were not used in portfolio construction
    invested_mask_kalman = (output_weights_kalman != 0.0).any()
    output_weights_kalman = output_weights_kalman.loc[:, invested_mask_kalman]
    
    return {
        'kalman_weights': output_weights_kalman,
        'kalman_te': output_te_kalman
    }

In [48]:
# def run_kalman_iteration(i, features_df, target_df, min_cover, rolling_window):
#     try:
#         # Slicing bonds
#         sliced_bonds_train = features_df.iloc[i : (i+rolling_window)]
#         sliced_bonds_test = features_df.iloc[[i+rolling_window]]

#         investable_dict = select_bond_universe(sliced_bonds_train, sliced_bonds_test, min_cover)
#         train_returns = investable_dict['investable_train']
#         test_returns = investable_dict['investable_test']

#         train_index = target_df.iloc[i : (i+rolling_window)]
#         test_index = target_df.iloc[[i+rolling_window]]

#         kalman_dict = kalman_model(train_returns, train_index)
#         kalman_weights = kalman_dict['portfolio_weights']
#         kalman_bonds = kalman_weights.to_frame().T.columns.to_list()

#         ins_te, oos_te = te_eval(train_returns, test_returns, train_index, test_index, kalman_dict)

#         return {
#             'date_ins': target_df.index[i + rolling_window - 1],
#             'date_oos': target_df.index[i + rolling_window],
#             'weights': kalman_weights,
#             'bonds': kalman_bonds,
#             'ins_te': ins_te,
#             'oos_te': oos_te
#         }
#     except Exception as e:
#         print(f"Iteration {i} failed: {e}")
#         return None

In [49]:
# # Main rolling framework
# def rolling_framework(bond_returns: pd.DataFrame,
#                       index_returns: pd.DataFrame,
#                       min_cover: float = 0.8,
#                       rolling_window: int = 24):
#     '''
#     Performs parallelized rolling-window evaluation using Kalman filter.
#     '''
#     # Align index
#     target_df = index_returns['Idx_Exc'].to_frame()
#     features_df = bond_returns.reindex(target_df.index)
    
#     # Initialize outputs
#     output_weights_kalman = pd.DataFrame(0.0, index=target_df.index, columns=features_df.columns)
#     output_te_kalman = pd.DataFrame(index=target_df.index, columns=['INS_TE_KLM', 'OOS_TE_KLM'])

#     # Run parallel loop
#     results = Parallel(n_jobs=-1, backend='loky')(
#         delayed(run_kalman_iteration)(i, features_df, target_df, min_cover, rolling_window)
#         for i in range(len(bond_returns) - rolling_window)
#     )

#     # Aggregate results
#     for res in results:
#         if res is None:
#             continue
#         output_weights_kalman.loc[res['date_oos'], res['bonds']] = res['weights']
#         output_te_kalman.loc[res['date_ins'], 'INS_TE_KLM'] = res['ins_te']
#         output_te_kalman.loc[res['date_oos'], 'OOS_TE_KLM'] = res['oos_te']

#     # Post-process outputs
#     output_weights_kalman = output_weights_kalman.iloc[rolling_window:]
#     output_te_kalman = output_te_kalman.iloc[rolling_window-1:]

#     invested_mask_kalman = (output_weights_kalman != 0.0).any()
#     output_weights_kalman = output_weights_kalman.loc[:, invested_mask_kalman]

#     return {
#         'kalman_weights': output_weights_kalman,
#         'kalman_te': output_te_kalman
#     }

In [50]:
%%time
# Running Kalman Model
rolling_dict = rolling_framework(processed_bonds_ret,
                                processed_index)

CPU times: total: 1d 5h 16min 11s
Wall time: 3h 41min 59s


In [51]:
result_weights_kalman = rolling_dict['kalman_weights']
result_te_kalman = rolling_dict['kalman_te']

In [57]:
result_weights_kalman

Unnamed: 0_level_0,00080QAB1,001055AC6,001055AF9,001055AJ1,00105DAB1,00184AAB1,00184AAC9,00184AAF2,00184AAG0,00185AAG9,...,984121CJ0,98412JAZ7,988498AA9,988498AB7,988498AC5,988498AD3,98956PAG7,989701AJ6,98978VAH6,98978VAM5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2004-08-31,0.0,0.0,0.0,0.0,0.0,-0.004847,0.002803,0.0,0.003367,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2004-09-30,0.0,0.0,0.0,0.0,0.0,-0.004798,0.002810,0.0,0.003467,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2004-10-31,0.0,0.0,0.0,0.0,0.0,-0.004562,0.002663,0.0,0.003438,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2004-11-30,0.0,0.0,0.0,0.0,0.0,-0.003954,0.003757,0.0,0.004609,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2004-12-31,0.0,0.0,0.0,0.0,0.0,-0.003236,0.002736,0.0,0.003582,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-07-31,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-08-31,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-09-30,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-10-31,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [53]:
result_weights_kalman

Unnamed: 0_level_0,00080QAB1,001055AC6,001055AF9,001055AJ1,00105DAB1,00184AAB1,00184AAC9,00184AAF2,00184AAG0,00185AAG9,...,984121CJ0,98412JAZ7,988498AA9,988498AB7,988498AC5,988498AD3,98956PAG7,989701AJ6,98978VAH6,98978VAM5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2004-08-31,0.0,0.0,0.0,0.0,0.0,-0.004847,0.002803,0.0,0.003367,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2004-09-30,0.0,0.0,0.0,0.0,0.0,-0.004798,0.002810,0.0,0.003467,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2004-10-31,0.0,0.0,0.0,0.0,0.0,-0.004562,0.002663,0.0,0.003438,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2004-11-30,0.0,0.0,0.0,0.0,0.0,-0.003954,0.003757,0.0,0.004609,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2004-12-31,0.0,0.0,0.0,0.0,0.0,-0.003236,0.002736,0.0,0.003582,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-07-31,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-08-31,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-09-30,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2021-10-31,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [58]:
result_te_kalman

Unnamed: 0_level_0,INS_TE_KLM,OOS_TE_KLM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2004-07-31,0.006543,
2004-08-31,0.006465,0.007403
2004-09-30,0.00624,0.004616
2004-10-31,0.006849,0.001516
2004-11-30,0.005124,0.000952
...,...,...
2021-07-31,0.018845,0.007333
2021-08-31,0.019963,0.002125
2021-09-30,0.019013,0.00614
2021-10-31,0.018235,0.010041


### Saving Model Results
- We now save the results we got from the Kalman model, and use it in post-implementation analysis, which is the next part of the pipeline.

In [55]:
result_weights_kalman.to_csv("kalman_weights_output.csv")
result_te_kalman.to_csv("kalman_te_output.csv")