In [None]:
#Improve Plotting of PCA Loadings
#Create a sample fixed income portfolio using quantlib
#Run regression to find sensitivity to Yield curve changes - i.e duration
#Hedge overlays
#Alphs generation - How to leverage PCA for generating alpha - If 10Yr is moving idiosyncraticly; how much to buy/sell based on PC3 (Butterfly)

In [49]:
from openbb import obb
import pandas as pd
import numpy as np
from scipy.stats import norm
import datetime as dt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import QuantLib as ql
import matplotlib.pyplot as plt
import plotly as pl
import plotly.graph_objects as go

In [62]:
def FetchRates(Start_Date=None, End_Date=None):
    treasury_data = obb.fixedincome.government.treasury_rates(start_date=Start_Date, end_date=End_Date, provider="federal_reserve").to_df()
    fed_funds = obb.fixedincome.rate.effr(start_date=Start_Date, end_date=End_Date, provider="federal_reserve").to_df()[['rate']].rename(columns={'rate': 'FedFunds'})
    sofr_data = obb.fixedincome.rate.sofr(start_date=Start_Date, end_date=End_Date, provider="federal_reserve").to_df()[['rate']].rename(columns={'rate': 'SOFR'})

    #Merge
    rates_data = treasury_data.join([fed_funds, sofr_data], how='outer')
    #Reset Index
    rates_data = rates_data.rename_axis('Date').reset_index()

    #Renaming Columns
    rates_data.rename(columns={
        "month_1": "1Mo", "month_2": "2Mo", "month_3": "3Mo", "month_6": "6Mo", 
        "year_1": "1Yr", "year_2": "2Yr", "year_3": "3Yr", 
        "year_5": "5Yr", "year_7": "7Yr", "year_10": "10Yr", "year_30": "30Yr"
    }, inplace=True)

    # Filter and Sort
    cols = ['Date', 'FedFunds', 'SOFR', '1Mo', '3Mo', '6Mo', '1Yr', '2Yr', '3Yr', '5Yr', '7Yr', '10Yr', '30Yr']
    rates_data = rates_data[cols].sort_values('Date')

    # Removing rows where primary benchmarks are missing (weekends/holidays)
    rates_data = rates_data.dropna(subset=['FedFunds', '1Mo'])
    
    # Forward Fill SOFR (Modern syntax)
    rates_data['SOFR'] = rates_data['SOFR'].ffill()
    
    return rates_data

def calculate_rolling_pca_multi(df, window): 
    tenors = ['1Mo', '3Mo', '6Mo', '1Yr', '2Yr', '3Yr', '5Yr', '7Yr', '10Yr', '30Yr']
    diff_df = df[tenors].diff() 
    
    results = []
    loadings_flat = [] # This will store 30 values per date
    dates = []
    
    for i in range(window + 1, len(df)):
        window_data = diff_df.iloc[i-window:i].dropna()
        
        scaler = StandardScaler()
        scaled_window = scaler.fit_transform(window_data)
        
        pca = PCA(n_components=3)
        pca.fit(scaled_window)
        
        # 1. Store Explained Variance
        results.append(pca.explained_variance_ratio_)
        
        # 2. Flatten Loadings: (3, 10) becomes (30,)
        # It flattens in order: PC1_1Mo...PC1_30Yr, PC2_1Mo...PC2_30Yr, etc.
        loadings_flat.append(pca.components_.flatten())
        
        dates.append(df['Date'].iloc[i-1]) 
    
    # Create the Explained Variance DataFrame
    rolling_var_df = pd.DataFrame(
        results, 
        columns=['PC1_Level', 'PC2_Slope', 'PC3_Curvature'], 
        index=dates
    )
    
    # 3. Create the MultiIndex for Columns
    # This creates a hierarchy: Level/Tenor, Slope/Tenor, Curvature/Tenor
    multi_col = pd.MultiIndex.from_product(
        [['PC1_Level', 'PC2_Slope', 'PC3_Curvature'], tenors], 
        names=['Component', 'Tenor']
    )
    
    # Create the Loadings DataFrame
    rolling_loadings_df = pd.DataFrame(
        loadings_flat, 
        columns=multi_col, 
        index=dates
    )
    
    return rolling_var_df, rolling_loadings_df

def plot_loadings_by_date(rolling_loadings_df, selected_date):
   
    # Convert input to datetime and find the nearest valid index
    target_dt = pd.to_datetime(selected_date)
    # Find the date in our index closest to the selected_date
    nearest_date = rolling_loadings_df.index[rolling_loadings_df.index.get_indexer([target_dt], method='nearest')[0]]
    
    # Extract loadings for that specific date
    loadings = rolling_loadings_df.loc[nearest_date]
    tenors = ['1Mo', '3Mo', '6Mo', '1Yr', '2Yr', '3Yr', '5Yr', '7Yr', '10Yr', '30Yr']
    
    # Create Figure
    fig = go.Figure()

    # Define components to iterate through
    components = [
        ('PC1_Level', 'Level (Parallel Shift)', '#1f77b4'),
        ('PC2_Slope', 'Slope (Twist)', '#ff7f0e'),
        ('PC3_Curvature', 'Curvature (Butterfly)', '#2ca02c')
    ]

    for col, label, color in components:
        fig.add_trace(go.Scatter(
            x=tenors,
            y=loadings[col],
            mode='lines+markers',
            name=label,
            line=dict(color=color, width=3),
            marker=dict(size=8),
            hovertemplate='<b>Tenor:</b> %{x}<br><b>Loading:</b> %{y:.4f}<extra></extra>'
        ))

    date_str = pd.to_datetime(nearest_date).strftime('%Y-%m-%d')

    # 4. Professional Dashboard Styling
    fig.update_layout(
        title=dict(
            text=f"<b>Yield Curve Factor Loadings</b><br>Snapshot: {date_str}",
            x=0.5, font=dict(size=18)
        ),
        xaxis_title="Maturity (Tenor)",
        yaxis_title="Sensitivity (Loading Weight)",
        template="plotly_white",
        hovermode="x unified",
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5),
        margin=dict(l=50, r=50, t=100, b=50),
        height=600
    )

    # Add a zero-line for reference
    fig.add_hline(y=0, line_dash="dash", line_color="black", line_width=1)

    return fig

def plot_single_tenor_loadings(rolling_loadings_df, tenor=None):
    
    # 1. Use .xs to pull only the selected tenor from the MultiIndex
    # level='Tenor' refers to the name we gave the second index level
    tenor_data = rolling_loadings_df.xs(tenor, level='Tenor', axis=1)
    
    # 2. Create the figure
    fig = go.Figure()

    # Define the three factors to plot
    factors = [
        ('PC1_Level', 'Level Sensitivity', '#1f77b4'),
        ('PC2_Slope', 'Slope Sensitivity', '#ff7f0e'),
        ('PC3_Curvature', 'Curvature Sensitivity', '#2ca02c')
    ]

    for col, label, color in factors:
        fig.add_trace(go.Scatter(
            x=tenor_data.index,
            y=tenor_data[col],
            mode='lines',
            name=label,
            line=dict(width=2, color=color),
            hovertemplate=f'<b>{label}:</b> %{{y:.4f}}<extra></extra>'
        ))

    fig.update_yaxes(
        dtick=0.05,           # Forces ticks at 0.05 intervals
        tickformat=".2f",     # Ensures two decimal places (e.g., 0.05, 0.10)
        gridcolor='LightGrey' # Makes the granular grid easy to see
    )
    
    # 3. Formatting
    fig.update_layout(
        title=dict(
            text=f"<b>Historical Factor Loadings for the {tenor} Tenor</b>",
            x=0.5, font=dict(size=18)
        ),
        xaxis_title="Date",
        yaxis_title="Loading Weight (Sensitivity)",
        template="plotly_white",
        hovermode="x unified",
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5),
        height=600
    )

    # Add a zero-line for perspective
    fig.add_hline(y=0, line_dash="dash", line_color="black", line_width=1)

    return fig

In [30]:
# Filter for rows that contain any NaN values
#null_rows = rates_data[rates_data.isnull().any(axis=1)]
#null_rows

In [None]:
start_date = "2023-12-01"
end_date = dt.datetime.today().strftime('%Y-%m-%d')
rates_data=FetchRates(start_date, end_date)

# Execute
rolling_window = 20 
rolling_var, rolling_loadings = calculate_rolling_pca_multi(rates_data, rolling_window)

In [None]:
fig = go.Figure()

# Traces for each Principal Component
# We use high-contrast colors for a professional dashboard look
colors = ['#1f77b4', '#ff7f0e', '#2ca02c']
components = ['PC1_Level', 'PC2_Slope', 'PC3_Curvature']

for col, color in zip(components, colors):
    fig.add_trace(go.Scatter(
        x=rolling_var.index, 
        y=rolling_var[col],
        mode='lines',
        name=col.replace('_', ': '),
        line=dict(width=2, color=color),
        hovertemplate='%{y:.2%}' # Formats hover as percentage
    ))

# 3. Update the layout for a "Terminal" look
fig.update_layout(
    title=dict(
        text=f'<b>{rolling_window}-Day Rolling PCA: Explained Variance</b>',
        x=0.5,
        font=dict(size=20)
    ),
    xaxis_title="Date",
    yaxis_title="Percentage of Total Variance",
    template="plotly_white", # Clean white background
    hovermode="x unified",     # Shows all 3 values when hovering on a date
    legend=dict(
        orientation="h", 
        yanchor="bottom", y=1.02, 
        xanchor="right", x=1
    ),
    margin=dict(l=50, r=50, t=100, b=50),
    height=600
)

# 4. Format Y-axis as percentages (e.g., 95.0%)
fig.update_layout(yaxis_tickformat='.1%')

In [None]:
my_date = '2025-04-02'
fig = plot_loadings_by_date(rolling_loadings, my_date)
fig.show()

In [63]:
fig = plot_single_tenor_loadings(rolling_loadings, tenor='30Yr')
fig.show()