In [None]:
import numpy as np
from numpy import sqrt
import pandas as pd
from risk_data import get_factor_data
from risk_stats import get_vix_regime, summarize, summarize_regime

In [None]:
factor_data = get_factor_data(read_cache=True, check=None)
factor_list = ['SPY', 'IEF']
ret = factor_data.ret.sel(factor_name=factor_list).to_pandas()
vix_regime = get_vix_regime(factor_data.cret)

In [None]:
summarize(ret)

In [None]:
summarize_regime(ret, vix_regime)

In [None]:
def summarize_walrus(df):
    return (
        pd.concat(
            stats := {'count': df.count(),
                      'mean': df.mean() * 252,
                      'std': df.std() * sqrt(252),
                      'sharpe': pd.Series(
                              np.where(df.std() != 0, (df.mean() / df.std()) * sqrt(252), np.nan),
                          index=df.columns
                      ),
                      'min': df.min(),
                      'max': df.max()
                      }, axis=1)
        .T
        .set_index(pd.CategoricalIndex(stats.keys(), 
                                       categories=stats.keys(), 
                                       ordered=True)
        )
        .sort_index()
    )

summarize_walrus(ret)

In [None]:
# factor_names  = ['SPY', 'IEF']
# ret = factor_data['ret'].sel(factor_name=factor_names).to_pandas()

# def summarize(df, rfr=0.0, ann=252):
#     """ Summarize returns statistics.
#     Parameters
#     ----------
#     df : pd.DataFrame
#         DataFrame of returns.
#     rfr : float
#         Risk-free rate.
#     ann : int
#         Annualization factor.
#     """
#     stats = {'count':  df.count(),
#              'mean':   df.mean() * ann,
#              'std':    df.std() * sqrt(ann), 
#              'sharpe': (df.mean() * ann - rfr)  / (df.std() * sqrt(ann)),
#              'min':    df.min(), 
#              'max':    df.max()}
    
#     summary = (pd.concat(stats, axis=1)
#                .T)
#     summary.index = pd.CategoricalIndex(summary.index, categories=stats.keys(), ordered=True, name='statistic')
#     return summary


# def summarize_regime(df, groups=None, rfr=None):
#     summary = df.groupby(groups).apply(summarize)
#     summary.index.names = ['regime', 'statistic']
#     summary = summary.reorder_levels([1, 0], axis=0).sort_index()
#     return summary

# summarize_regime(ret, groups=vix_regime)
# # summarize(ret)

In [None]:
from typing import Optional
import pandas as pd
import numpy as np
from math import sqrt

def summarize(df, rfr=0.0, percentiles: Optional[list[float]]=[0.05, 0.5, 0.85], freq=252):
    """Summarize a DataFrame of returns."""
    ann_mean = df.mean() * freq
    ann_std  = df.std() * sqrt(freq)
    
    # Compute quantiles
    # quantiles = df.quantile(percentiles)
    # quantiles.index = [f"p{int(p * 100):02d}" for p in quantiles.index]
    if percentiles is None:
        quantiles = pd.DataFrame()
    else:
        quantiles = df.quantile(percentiles)
        quantile_labels = [f"p{int(p * 100):02d}" for p in percentiles]
        quantiles.index = quantile_labels  # rename index in place
    
    stats = {
        'sharpe':     (ann_mean - rfr) / ann_std,
        'mean':       ann_mean,
        'std':        ann_std,
        # 'skew':       df.skew(),
        # 'kurtosis':   df.kurtosis(),
        'min':        df.min(),
        **quantiles.T.to_dict(),
        'max':        df.max(),
        'count':      df.count(),
    }
    
    summary = pd.DataFrame(stats).T
    summary.index = pd.CategoricalIndex(
        summary.index, 
        categories=list(stats.keys()), 
        ordered=True,
        name='statistic'
    )
    return summary

def summarize_regime(df, groups=None, include_total=True, **kwargs):
    """Summarize returns by regime, optionally including total."""

    summary = (df
               .groupby(groups)
               .apply(summarize, **kwargs))

    if include_total:
        total_summary = summarize(df, **kwargs)
        # Align with the multiindex: (statistic, regime)
        total_summary.index = pd.MultiIndex.from_product(
            [['Total'], total_summary.index],
        )
        summary = pd.concat([summary, total_summary]).sort_index()

    statistics_name = summary.index.names[0]
    return (summary
            .rename_axis(['regime', statistics_name])
            .reorder_levels([statistics_name, 'regime'])
            .sort_index())

# summarize(ret)
summarize_regime(ret, groups=vix_regime, percentiles=None)

In [None]:
vix_regime = get_vix_regime(factor_data.cret)
vix_regime

ret.groupby(vix_regime).apply(summarize)