In [1]:
from typing import Optional, List
from importlib import reload

from numpy import sqrt
import pandas as pd
import xarray as xr
xr.set_options(keep_attrs=True,
               display_expand_data=False)

import plotly.express as px
import plotly.io as pio

from data import build_dataset_with_composites, get_factor_master, build_factor_data2
from stats import total_return

In [None]:
halflifes = [21, 63, 126, 252]
factor_data = build_factor_data2(halflifes)

In [3]:
def px_scatter(df, x, y, color_map_override=None, **kwargs):
    """
    Create a scatter plot using Plotly with customized color mapping and formatting.
    Parameters
    ----------
    df : pandas.DataFrame
        The data frame containing the data to be plotted.
    x : str
        The column name to be used for the x-axis.
    y : str
        The column name to be used for the y-axis.
    color_map_override : dict, optional
        A dictionary to override the default color sequence for specific categories.
    **kwargs : dict
        Additional keyword arguments to be passed to the plotly.express.scatter function. Typical arguments include:
        `text`, `color`, `size`, and `symbol`, which contain column names for those attributes.
    Returns
    -------
    plotly.graph_objs._figure.Figure
        The generated scatter plot figure.
    Notes
    -----
    The function uses the 'plotly_white' template by default and sets the figure height and width to 750.
    If 'color' is provided in kwargs and 'color_map_override' is not None, a custom color mapping is applied.
    The text font color for each trace is updated based on the provided color map override.
    """
    args_format = {'template': 'plotly_white', 'height': 750, 'width': 750}
    
    color = kwargs.get('color')
    # TODO: Refactor to get_color_map() function
    if (color is None) or (color_map_override is None):
        color_discrete_map = None
    else:
        color_keys = df[color].unique()
        color_sequence = pio.templates[args_format['template']]['layout']['colorway']
        color_dict = {a: b for a, b in zip(color_keys, color_sequence)}
        color_discrete_map = {**color_dict, **color_map_override}

    fig = (px.scatter(df, x=x, y=y, **kwargs, #text=text, color=color, size=size, symbol=symbol, 
                      color_discrete_map=color_discrete_map,
                      size_max=20,
                      **args_format)
           .update_traces(textposition='middle right', textfont_color='lightgray')
           .update_layout(legend_title_text=None)
           )

    def get_trace_color(trace, legendgroup):
        return trace.marker.color if trace.legendgroup in legendgroup else 'lightgray'

    asset_class_list = color_map_override.keys()
    fig.for_each_trace(lambda t: t.update(textfont_color = get_trace_color(t, asset_class_list)))

    return fig

In [None]:
def format_date(date_str):
    return pd.to_datetime(date_str).strftime('%m/%d')


# return_title =  'Return on Fed Announcement (std)'
# return_date = '2024-12-18'
# return_date2 = '2024-12-19'

# return_title =  'Return on Jan 10 (std)'
# return_date = '2025-01-10'
# return_date2 = None



return_start = '2024-12-31'
return_end = '2025-01-28'
return_title = f'Month-to-Date Return (std, as of {format_date(return_end)})'

return_start = '2025-01-21'
return_end = '2025-01-28'
return_title = f'5-day Return (std, {format_date(return_start)} - {format_date(return_end)})'

# return_title = 'Return on Jan 10 (std)'
# return_start = '2024-01-08'
# return_end = '2025-01-10'

corr_type = 63
corr_asset = 'IEF'
vol_type = 63

ndays = factor_data['cret'].sel(date=slice(return_start, return_end)).date.size - 1  # - 1
corr = (factor_data['corr']
        .shift(date=1)
        .sel(corr_type=corr_type, factor_name_1=corr_asset)
        .sel(date=return_start, method='ffill')
        .to_series()
        )
vol = (factor_data['vol']
       .shift(date=1)
       .sel(vol_type=vol_type)
       .sel(date=return_start, method='ffill')
       .to_series()
       .mul(sqrt(ndays / 252) * 100)
       )
ret = total_return(factor_data['cret'], return_start, return_end)
zscore = ret.div(vol).rename('zscore')

factor_master = get_factor_master()

df = (pd.concat([corr, zscore, factor_master.asset_class, factor_master.hyper_factor], axis=1)
      .replace('MWTIX', 'TCW')
      .rename_axis('asset').reset_index()
      .replace('MWTIX', 'TCW')
    #   .assign(size = lambda df: df['hyper_factor'].apply(lambda x: 10 if x == 1 else 1).astype('float'))
      )


# TODO: REMOVE THIS OVERRIDE:
# df = df.query('asset != "TCW"') #.query('asset != "FXE"')
# df['textcolor'] = ['black' if asset == 'SPY' else 'lightgray' for asset in df['asset']]

# def override_color_sequence(map):
#     asset_class_list = df.asset_class.unique()
#     color_sequence = pio.templates['plotly_white']['layout']['colorway']
#     color_dict = {a: b for a, b in zip(asset_class_list, color_sequence)}
#     return {**color_dict, **map}

# color_sequence = override_color_sequence({'Portfolio': 'black', 'Theme': 'red'})

color_map_override = {'Portfolio': 'black', 
                      'Theme':     'red'}
fig = (px_scatter(df, x='corr', y='zscore', text='asset', color='asset_class', #size='size',
                 color_map_override = color_map_override)
       .update_layout(yaxis_title=return_title,
                      xaxis_title=f'Correlation with {corr_asset}'
                      ))
# from chart import px_format
# fig = px_format(fig, 
#                 x_title='Correlation with 10-year bond',
#                 y_title='Return on Fed Announcement (std)')

corr_min = df['corr'].min()
fig.add_shape(
    type="line",
    x0=df[df['asset'] == 'IEF']['corr'].values[0] * corr_min,
    y0=df[df['asset'] == 'IEF']['zscore'].values[0] * corr_min,
    x1=df[df['asset'] == 'IEF']['corr'].values[0],
    y1=df[df['asset'] == 'IEF']['zscore'].values[0],
    line=dict(color="lightgray", width=2)
)

fig.show(renderer='png')
fig.write_html('fed_feedback.html')
fig.write_image('fed_feedback.png')


In [19]:
def draw_market_feedback_scatter(factor_data, return_start, return_end, vol_type, corr_type, corr_asset):

    ndays = factor_data['cret'].sel(date=slice(return_start, return_end)).date.size - 1  # - 1
    corr = (factor_data['corr']
            .shift(date=1)
            .sel(corr_type=corr_type, factor_name_1=corr_asset)
            .sel(date=return_start, method='ffill')
            .to_series()
            )
    vol = (factor_data['vol']
           .shift(date=1)
           .sel(vol_type=vol_type)
           .sel(date=return_start, method='ffill')
           .to_series()
           .mul(sqrt(ndays / 252) * 100)
           )
    ret = total_return(factor_data['cret'], return_start, return_end)
    zscore = ret.div(vol).rename('zscore')

    factor_master = pd.DataFrame(factor_data['factor_name'].attrs).T

    df = (pd.concat([corr, zscore, factor_master['asset_class'], factor_master['hyper_factor']], axis=1)
          .replace('MWTIX', 'TCW')
          .rename_axis('asset').reset_index()
          .replace('MWTIX', 'TCW')
        #   .assign(size = lambda df: df['hyper_factor'].apply(lambda x: 10 if x == 1 else 1).astype('float'))
          )
    
    color_map_override = {'Portfolio': 'black', 
                          'Theme':     'red'}
    fig = (px_scatter(df, x='corr', y='zscore', text='asset', color='asset_class', #size='size',
                      color_map_override = color_map_override)
           .update_layout(yaxis_title=return_title,
                          xaxis_title=f'Correlation with {corr_asset}'))
    # from chart import px_format
    # fig = px_format(fig, 
    #                 x_title='Correlation with 10-year bond',
    #                 y_title='Return on Fed Announcement (std)')

    corr_min = df['corr'].min()
    fig.add_shape(
        type="line",
        x0=df[df['asset'] == 'IEF']['corr'].values[0] * corr_min,
        y0=df[df['asset'] == 'IEF']['zscore'].values[0] * corr_min,
        x1=df[df['asset'] == 'IEF']['corr'].values[0],
        y1=df[df['asset'] == 'IEF']['zscore'].values[0],
        line=dict(color='lightgray', width=2)
        )
    return fig


In [None]:
return_start = '2025-01-21'
return_end = '2025-01-28'
return_title = f'5-day Return (std, {format_date(return_start)} - {format_date(return_end)})'
vol_type = 63
corr_type = 63
corr_asset = 'IEF'

fig = draw_market_feedback_scatter(factor_data, return_start, return_end, vol_type, corr_type, corr_asset)
fig.show(renderer='png')
fig.write_html('fed_feedback.html')
fig.write_image('fed_feedback.png')


# Correlation Matrix

In [6]:
def sort_assets(corr: pd.DataFrame, sorting_factor: str, factor_master: pd.DataFrame) -> pd.Index:
    df = (corr[[sorting_factor]]
          .join(factor_master[['asset_class', 'hyper_factor']])
          .assign(is_theme=lambda df: df['asset_class'] == 'Theme')
          .assign(is_sorting_factor=lambda df: df.index == sorting_factor)
          )
    sorted_index = df.sort_values(by=['is_sorting_factor', 'hyper_factor', 'is_theme', sorting_factor], 
                                  ascending=[False, False, True, False], key=abs).index
    return sorted_index


In [None]:
date_latest = factor_data.date.max().values
corr = factor_data.corr.sel(corr_type=corr_type, date=date_latest).to_pandas()

corr



def format_corr_matrix(corr: pd.DataFrame): # -> pd.io.formats.style.Styler:
    """
    Format the correlation matrix by adding asset class information and sorting.
    
    Parameters
    ----------
    corr : pd.DataFrame
        The correlation matrix with assets as both rows and columns.
    
    Returns
    -------
    pd.io.formats.style.Styler
        The formatted correlation matrix as a pandas Styler object.
    """
    # Sort the correlation matrix by asset class
    corr = corr
    
    # Style the correlation matrix
    styled_corr = corr.style.background_gradient(cmap='coolwarm', vmin=-1, vmax=1).format(precision=2)
    
    return styled_corr

# Apply the function to format the correlation matrix
formatted_corr = format_corr_matrix(corr) #, factor_master['asset_class'])
formatted_corr



In [None]:
date_t0 = factor_data.date.max().values
date_t1  =factor_data.date[-63].values

corr_t0 = factor_data.corr.sel(corr_type=corr_type, date=date_t0).to_pandas()
corr_t1 = factor_data.corr.sel(corr_type=corr_type, date=date_t1).to_pandas()
corr_diff = corr_t0 - corr_t1
sorted_index = sort_assets(corr_t0, '2s10s', factor_master)
sorted_index_chg = sort_assets(corr_diff, '2s10s', factor_master)

format_corr_matrix(corr.loc[sorted_index, sorted_index])
format_corr_matrix(corr_diff.loc[sorted_index_chg, sorted_index_chg])

# Draw correlation time series

In [None]:
factor_data

In [None]:
asset1 = 'SPY'
asset2 = 'IEF'
start_date = '2020'
start_date = None


df1  = factor_data.sel(factor_name=asset1, factor_name_1=asset2, date=slice('2024', None)).corr.to_pandas().T.stack()

from chart import draw_correlation, draw_volatility

fig1 = draw_correlation(factor_data.corr.sel(date=slice(start_date, None)), asset1, asset2, [21, 63, 126, 252])
fig1.show(renderer='png')

fig2 = draw_volatility(factor_data.vol.sel(date=slice(start_date, None)), asset1, [21, 63, 126, 252])
fig3 = draw_volatility(factor_data.vol.sel(date=slice(start_date, None)), asset2, [21, 63, 126, 252])

fig2.show()
fig3.show()

fig1.show(renderer='png')
# fig1.write_html('corr_ts.html')
# fig1.write_image('corr_ts.png')


In [None]:
start_date = '2015'
start_date = None

corr_pairs = [('SPY', 'IEF'),
            #   ('SHY', 'IEF'), 
            #   ('2s10s', 'SPY'), 
            #   ('2s10s', 'IEF'), 
            #   ('2s10s', 'HYGH'), 
            #   ('2s10s', 'HYG'), 
            #   ('2s10s', 'LQD'), 
              ('2s10s', 'LQDH'), 
            #   ('2s10s', 'VMBS'), 
            #   ('2s10s', 'AGNC')
              ]

for asset1, asset2 in corr_pairs:
    fig = draw_correlation(factor_data.corr.sel(date=slice(start_date, None)), asset1, asset2, [21, 63, 126, 252])
    fig.show(renderer='png')
    # fig.write_html(f'corr_{asset1}_{asset2}.html')
    # fig.write_image(f'corr_{asset1}_{asset2}.png')

In [15]:
fig1.write_html('corr_ts.html')
fig1.write_image('corr_ts.png')

fig.write_html('fed_feedback.html')
fig.write_image('fed_feedback.png')

In [None]:
# factor_data.ret.sel(asset=[asset1, asset2]).plot(hue='asset')
factor_data.ret.sel(asset='IWM').plot()
factor_data.ret.sel(asset='IEF').plot()
factor_data.ret.sel(asset='SHY').plot()

factor_data.ret.sel(asset='2s10s').plot()