In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline


作者：HY Scatterer
链接：https://www.zhihu.com/question/266917275/answer/903838564
来源：知乎
著作权归作者所有。商业转载请联系作者获得授权，非商业转载请注明出处。

In [2]:
def bayesian_shrinkage_for_idio_vol(idio_vol_est, market_cap, empirical_coef):
    """
    Adjustment of idiosyncratic return by Bayesian Shrinkage, the prior is the market capital weighted
    volatility in each stock group (classified by similar market capital)
    
    :param market_cap: the stock market capital at the time of volatility estimation, a series with index
                       of stock tickers
    :param empirical_coef: this coefficient (variable q in BARRA note) is to balance the distance between
                           estimate and prior. The standard deviation of that distance in for each bin
                           generated by market capital
                           
    """
    
    NUM_MARKET_CAP_BINS = 10
    market_cap = market_cap.sort_values(ascending=True)
    market_cap_bins = pd.cut(market_cap, bins=NUM_MARKET_CAP_BINS, labels=range(NUM_MARKET_CAP_BINS))
    
    #classify stocks into categories by market cap labeled by an integer
    df_ = pd.DataFrame({'market_cap':market_cap, 'bin':market_cap_bins, 'vol_est':idio_vol_est})
    bin_vol_prior = df_.groupby('bin')[['vol_est', 'market_cap']].agg(lambda sub_df:
                                                                     (sub_df['vol_est']*sub_df['market_cap']).sum()*1./sub_df['market_cap'].sum())['vol_est'].rename('vol_prior')
    
    #get the prior volatility in each bin by market_cap weighted average
    df_ = pd.merge(df_, bin_vol_prior, how='left', left_on='bin', right_index=True)
    
    std_vol_est_vol_prior = df_.groupby('bin')[['vol_est', 'vol_prior']].agg(lambda sub_df:\
                                                                            np.sqrt(((sub_df['vol_est'] - sub_df['vol_prior'])**2).mean()))['vol_est'].rename('dist_std')
    # get the std of vol_est and vol_prior in each bin
    df_ = pd.merge(df_, std_vol_est_vol_prior, how='left', left_on='bin', right_index=True)
    
    bayesian_shrinkage_coef = (1+df_['dist_std'] / (empirical_coef * np.abs(df_['vol_est'] - df_['vol_prior'])))**(-1)
    
    # then we get bayesian shrinkage coefficient for each stock s.t a better estimation of 
    # the idiosyncratic vol is a linear combination of the prior vol and the sample estimated vol
    vol_sh = bayesian_shrinkage_coef * df_['vol_prior'] + (1-bayesian_shrinkage_coef) * df_['vol_est']
    return vol_sh