# 五因子复制

## 数据处理及因子构建方法
### 样本数据选取
1. 考虑到我国境内机构/组织/个人一般主要参与A股交易，本文股票样本只选取A股股票。
2. 从数据完整性上，本文选取A股上市公司1994年7月至2015年8月共254个月的月度数据为样本
3. 本文使用的数据库是北京量邦科技金融数据库，复制采用CSMAR数据库数据
3. 无风险收益率采用人民银行公布的人民币三个月整存整取利率
4. 月度收益率采用考虑现金红利再投资的月度收益率
### 数据处理
1. 以流通股计算股票市值加权权重
2. 剔除所有股票上市之后最初120个交易日的交易数据
3. 财务数据取自合并报表
### 因子构建
* 本文在第t年6月底根据指标分位点进行分组，用于确认第t年7月至t+1年6月的股票组合。对于股票i， 以其在第t年6月底的流通市值(size);账面市值比(B/M)用第t-1年末的账面价值除以第t-1年12月底股票的流通市值;使用“营业利润/股东权益合计”反映A股市场的“营运利润率”指标;投资风格(Inv)用第t-1年末相对于第t-2年末的总资产增加额除以t-2年末的总资产。
* 分组方法一共有三种：
1. 2x3分组，分成SH/SN/SL/BH/BN/BL/SR/SN/SW/BR/BN/BW/SC/SN/SA/BC/BN/BA 18组
2. 2x2分组，分成SH/SL/BH/BL/SR/SW/BR/BW/SC/SA/BA/BA 12组
3. 2x2x2x2分组，12组
![blob](https://i.loli.net/2020/06/11/cM6ICwUq1NXjBSE.png)

In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import os
import statsmodels.api as sm
import statsmodels.formula.api as smf
import scipy.stats as st
from numpy.linalg import inv
from pandas.tseries.offsets import *
from WindPy import *
w.start()
os.chdir(r'D:\Postgraduate\2020\asset pricing\data\中国市场')

def get_month1(table, key):
    table.loc[:, key] = pd.to_datetime(table[key])
    table.loc[:, 'year'] = table[key].dt.year
    table.loc[:, 'month'] = table[key].dt.month
    return table

Welcome to use Wind Quant API for Python (WindPy)!

COPYRIGHT (C) 2017 WIND INFORMATION CO., LTD. ALL RIGHTS RESERVED.
IN NO CIRCUMSTANCE SHALL WIND BE RESPONSIBLE FOR ANY DAMAGES OR LOSSES CAUSED BY USING WIND QUANT API FOR Python.


In [2]:
# 读取需要的数据
# 流通市值和收益率
monthly = pd.read_csv('TRD_Mnth1.csv')  
monthly.columns = ['code', 'date', 'mktcap', 'ret', 'type']
# book value
balance_sheet = pd.read_csv('balance sheet1.csv')  
balance_sheet.columns = ['code', 'date', 'type', 'defer assets', 'defer debut', 'prior stock', 'equity']
# 权益/资产/盈利
equity = pd.read_csv('FS_Combas1.csv')
profit = pd.read_csv('FS_Comins1.csv')
# IPO时间
ipo = pd.read_csv('IPO_Cobasic1.csv')
ipo.columns = ['code', 'ipo']

In [3]:
# 从wind中获取交易日数据
wt = w.tdays("1990-12-01", "2020-06-09", "")
trade = pd.DataFrame(wt.Data).T.rename(columns={0: 'ipo'})
trade.loc[:, '120day'] = trade['ipo'].shift(-120)

def get_120day(df1, df2, df3):
    '''
    * 去掉上市前120个交易日的数据
    @param df1: ipo时间
    @param df2: 交易日时间
    @param df3: 需要筛选IPO的数据-收益率
    @return: 
    '''
    df1.loc[:, 'ipo'] = pd.to_datetime(df1['ipo'])
    ipo1 = pd.merge(df1, df2, on='ipo')
    dd = pd.merge(df3, ipo1, on='code')
    dm = dd[pd.to_datetime(dd['date']) > dd['120day']]
    return dm

amonthly = get_120day(ipo, trade, monthly)

In [5]:
def get_mkt(df):
    df.loc[:, 'mktcap'] = df['mktcap'] * 1000
    df1 = df[(df['type'] == 1) | (df['type'] == 4)]  # 筛选A股
    df1 = get_month1(df1, 'date')
    df1.loc[:, 'size'] = np.log(df1['mktcap'])
    return df1

def get_bs(df):
    df = df[df['type'] == 'A']
    df = get_month1(df, 'date')
    df = df[df['month'] == 12]
    df = df.fillna(0)
    df['be'] = df['equity'] + df['defer assets'] - df['defer debut'] - df['prior stock']
    #    be = pd.pivot(df,index='code',columns='year',values='be')
    return df

bmkt = get_mkt(amonthly)
bsheet = get_bs(balance_sheet)

def get_bm(df1, df2):
    '''
    * 计算BM
    @param df1: 流通市值size
    @param df2: 账面市值比be
    @return: 
    '''
    df1 = df1[df1['month'] == 12]
    bs = df1[['code', 'year', 'month', 'be']]
    mkt = df2[['code', 'mktcap', 'year', 'month']]
    bm = pd.merge(bs, mkt, on=['code', 'year', 'month'])
    bm.loc[:, 'be'] = bm.groupby(['code'])['be'].fillna(axis=0, method='ffill')
    bm['bm'] = bm['be'] / bm['mktcap']
    bm.loc[:, 'date'] = bm['year'] * 100 + bm['month']
    bm.loc[:, 'date'] = pd.to_datetime(bm['date'], format='%Y%m') + MonthEnd(7)
    return bm


def get_gpinv(df1, df2):
    '''
    * 计算Op = profit/equity   Inv = (Invt-Invt-1)/Invt-1
    @param df1: 流通市值size
    @param df2: 账面市值比be
    @return: 
    '''
    df1.columns = ['code', 'date', 'type', 'prof']
    df2.columns = ['code', 'date', 'type', 'asset', 'equity']
    df = pd.merge(df1, df2, on=['code', 'date', 'type'])
    df = get_month1(df, 'date')
    df = df[(df['type'] == 'A') & (df['month'] == 12)]
    df.loc[:, 'date'] = df['date'] + MonthEnd(6)
    df['profit'] = df['prof'] / df['equity']
    df['asset-1'] = df.groupby(['code'])['asset'].shift(1)
    df['inv'] = (df['asset'] - df['asset-1']) / df['asset-1']
    df.loc[:, 'profit'] = df.groupby(['code'])['profit'].fillna(axis=0, method='ffill')
    return df

def get_mom(df):
    '''
    * 计算MOM：前2-12个月的累积收益率
    @param df: 收益率数据
    @return: 
    '''
    df['ret+1'] = df['ret'] + 1
    df = df.sort_values(by=['code', 'date'])
    mome = df.groupby(['code'])['ret+1'].rolling(window=11).apply(np.prod, raw=False).reset_index()
    mom1 = mome.set_index('level_1').rename(columns={'ret+1': 'cumret'})
    dff = pd.concat([df, mom1['cumret']], axis=1)
    dff['mom'] = dff['cumret'] - 1
    dff.loc[:, 'mom'] = dff.groupby(['code'])['mom'].shift(2)
    return dff

def combine(df1, df2, df3, df4):
    '''
    将计算出的五个因子进行合并
    @param df1: size
    @param df2: bm
    @param df3: op/inv
    @param df4: mom
    @return: 
    '''
    df1 = df1[df1['month'] == 6]
    df1.loc[:, 'date'] = df1['date'] + MonthEnd(1)
    f1 = df1.loc[:, ['code', 'date', 'size']]
    f2 = df2.loc[:, ['code', 'date', 'bm']]
    f3 = df3.loc[:, ['code', 'date', 'profit', 'inv']]
    df4.loc[:, 'date'] = pd.to_datetime(df4['date']) + MonthEnd(1)
    f4 = df4.loc[:, ['code', 'date', 'mom']]
    ff1 = pd.merge(f1, f2, on=['code', 'date'])
    ff2 = pd.merge(ff1, f3, on=['code', 'date'])
    ff3 = pd.merge(ff2, f4, on=['code', 'date'])
    return ff3

bm1 = get_bm(bsheet, bmkt)
gpinv = get_gpinv(profit, equity)
mom = get_mom(amonthly)

cc = combine(bmkt, bm1, gpinv, mom)
cc.head()

Unnamed: 0,code,date,size,bm,profit,inv,mom
0,600601,1992-06-30,26.356309,0.000195,0.062655,-0.060308,2.644466
1,600601,1993-06-30,26.419016,0.000426,0.044449,4.735329,0.801119
2,600601,1994-06-30,26.968442,0.000111,0.138002,0.342467,0.129205
3,600601,1995-06-30,27.197157,0.000325,0.367572,1.931432,0.004231
4,600601,1996-06-30,27.352449,0.000417,0.152748,0.291067,0.15653


In [6]:
ff5 = pd.read_csv('fivefactor_monthly.csv').rename(columns={'trdmn': 'date'})
ff = ff5[(ff5['date'] >= 199407) & (ff5['date'] <= 201508)]

# 收益率合并
def get_ret(df1, df2, df3):
    '''
    将因子与收益率进行合并，以便分组
    @param df1: 收益率数据
    @param df2: 计算出的因子数据
    @param df3: 无风险收益率数据
    @return: 
    '''
    ret1 = df1[['code', 'date', 'mktcap', 'ret']]
    ret1.loc[:, 'date'] = pd.to_datetime(ret1['date']) + MonthEnd(1)
    df = pd.merge(ret1, df2, how='outer', on=['code', 'date'])
    for i in df.columns[4:]:
        df[i] = df.groupby('code')[i].fillna(axis=0, method='ffill')
    df3.loc[:, 'date'] = pd.to_datetime(df3['date'], format='%Y%m') + MonthEnd(1)
    df3 = df3[['date', 'rf']]
    dff = pd.merge(df, df3, on='date')
    dff['eret'] = dff['ret'] - dff['rf']
    #    dff['eret'] = dff.sort_values(by = ['code', 'date']).groupby('code')['eret'].shift(-1)
    dff1 = dff[(dff['date'] >= '1994-07-31 00:00:00') & (dff['date'] <= '2015-08-31 00:00:00')]
    return dff1

fac = get_ret(amonthly, cc, ff5)

In [8]:
def get_decile(data, x1_name, x2_name):
    '''
    数据2*3分组分位点
    @param data: 股票收益率及因子数据
    @param x1_name: 分组变量名称
    @param x2_name: 分组变量名称
    @return: 
    '''
    x1 = data.groupby(['date'])[x1_name].describe(percentiles=[0.3, 0.7]).reset_index()
    x1 = x1[['date', '30%', '70%']]
    x2 = data.groupby(['date'])[x2_name].describe(percentiles=[0.5]).reset_index()
    x2 = x2[['date', '50%']]
    df = pd.merge(x1, x2, how='inner', on=['date'])
    df.columns = ['date', 'B1_1t', 'B1_2t', 'B2_1t']
    return df

def portfolio_ind(alldata, breakpoints, X1_name, X2_name):
    '''
    根据分位点分组，独立2*3
    @param alldata: 总数据
    @param breakpoints: 分位点
    @param X1_name: 分组变量
    @param X2_name: 分组变量
    @return: 
    '''
    data = alldata.copy()
    date = data[['date']].drop_duplicates()
    X = pd.DataFrame()
    for t in date['date']:
        temp_data = data[data['date'] == t]
        temp_breakpoint = breakpoints[breakpoints['date'] == t].iloc[0, :]
        B1 = [-np.inf, temp_breakpoint[1], temp_breakpoint[2], np.inf]
        B2 = [-np.inf, temp_breakpoint[3], np.inf]
        temp = pd.DataFrame()
        for i in range(3):
            x = temp_data[(temp_data[X1_name] >= B1[i]) & (temp_data[X1_name] <= B1[i + 1])]
            x.loc[:, 'X1_group'] = X1_name + str(i + 1)
            temp = pd.concat([temp, x])
        temp2 = pd.DataFrame()
        for j in range(2):
            x = temp[(temp[X2_name] >= B2[j]) & (temp[X2_name] <= B2[j + 1])]
            x.loc[:, 'X2_group'] = X2_name + str(j + 1)
            temp2 = pd.concat([temp2, x])
        X = pd.concat([X, temp2])
    return X

# 市值加权
def wavg(group, avg_name, weight_name):
    d = group[avg_name]
    w = group[weight_name]
    try:
        return (d * w).sum() / w.sum()
    except ZeroDivisionError:
        return np.nan

# 流通股
ffs = pd.read_csv('ffsA.csv')
def get_ffs(df1, df2):
    '''
    根据流通股进行市值加权
    @param df1: 流通股数据
    @param df2: 因子收益数据
    @return: 
    '''
    ffs1 = df1.iloc[:, 3:].rename(columns={'Date': 'code'}).set_index('code')
    ffsA = ffs1.stack().reset_index()
    ffsA.columns = ['code', 'date', 'ffs']
    ffsA.loc[:, 'code'] = ffsA['code'].str[:6]
    ffsA.loc[:, 'date'] = pd.to_datetime(ffsA['date'].str[:7], format='%Y-%m') + MonthEnd(1)
    df2.loc[:, 'code'] = df2['code'].apply(lambda x: str(x).zfill(6))
    df = pd.merge(df2, ffsA, on=['code', 'date'])
    return df


fac1 = get_ffs(ffs, fac)

# create SMB and HML factors
def get_factor1(df, key, key1, key2):
    '''
    @param df: 因子和收益数据
    @param key: 最终的因子名称smb/hml
    @param key1: 3分组分组变量
    @param key2: 2分组分组变量
    @return: 
    '''
    df = df.dropna()
    decile = get_decile(df, key1, key2)
    class_1 = portfolio_ind(df, decile, key1, key2)
    vwret = class_1.groupby(['date', 'X2_group', 'X1_group']).apply(wavg, 'eret',
                                                                    'ffs').to_frame().reset_index().rename(
        columns={0: 'vwret'})
    vwret.loc[:, 'sbport'] = vwret['X2_group'] + vwret['X1_group']
    ff = vwret.pivot(index='date', columns='sbport', values='vwret')
    ff['WB'] = (ff.iloc[:, -3] + ff.iloc[:, -2] + ff.iloc[:, -1]) / 3
    ff['WS'] = (ff.iloc[:, 0] + ff.iloc[:, 1] + ff.iloc[:, 2]) / 3
    ff['WH'] = (ff.iloc[:, 5] + ff.iloc[:, 2]) / 2
    ff['WL'] = (ff.iloc[:, 3] + ff.iloc[:, 0]) / 2
    ff.loc[:, 'SMB' + key] = ff['WS'] - ff['WB']
    ff.loc[:, key] = ff['WH'] - ff['WL']
    return ff


def get_five():
    hml = get_factor1(fac1, 'hml', 'bm', 'size')
    rmw = get_factor1(fac1, 'rmw', 'profit', 'size')
    cma = get_factor1(fac1, 'cma', 'inv', 'size')
    ff5s = pd.concat([hml.iloc[:, -2:], rmw.iloc[:, -2:], cma.iloc[:, -2:]], axis=1)
    ff5s.loc[:, 'smb'] = (ff5s['SMBhml'] + ff5s['SMBrmw'] + ff5s['SMBcma']) / 3
    umd = get_factor1(fac1, 'umd', 'mom', 'size')
    ff.index = ff5s.index
    five = pd.concat([ff['mkt_rf'], ff5s, umd['umd']], axis=1)
    five = five[['mkt_rf', 'smb', 'hml', 'umd', 'rmw', 'cma']]
    return five

ff5st = get_five()
ff5st

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0_level_0,mkt_rf,smb,hml,umd,rmw,cma
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1994-07-31,-0.245340,-0.128499,-0.034557,-0.154988,-0.015712,-0.021407
1994-08-31,1.126362,0.332739,0.359644,0.946154,-0.140694,-0.215747
1994-09-30,0.050760,0.205678,-0.152699,-0.013798,0.056253,-0.015849
1994-10-31,-0.209376,0.013961,0.050675,0.062098,-0.025381,-0.016795
1994-11-30,0.028550,0.023728,0.005689,0.141247,0.090484,-0.005015
...,...,...,...,...,...,...
2015-04-30,0.199267,0.002995,0.030643,0.019716,-0.039268,-0.067870
2015-05-31,0.112851,0.190012,-0.130860,-0.063959,-0.073150,-0.034751
2015-06-30,-0.090956,-0.050468,0.072417,-0.117176,-0.026957,-0.040234
2015-07-31,-0.161367,-0.036094,0.019109,-0.024708,0.072261,0.020151


In [9]:
# 因子收益均值
def nwttest_1samp(a, lags=6):
    adj_a = np.array(a)
    # 对常数回归
    model = sm.OLS(adj_a, [1] * len(adj_a)).fit(cov_type='HAC', cov_kwds={'maxlags': lags})
    return adj_a.mean(), float(model.tvalues)

def excess_return(data):
    X = pd.DataFrame()
    for i in range(0, 6):
        a = data.iloc[:, i].dropna()
        df = nwttest_1samp(a)
        x = pd.Series(df)
        X = pd.concat([X, x], axis=1)
    X.index = ['Average', 't-statistic']
    X.columns = ['mkt_rf', 'smb', 'hml', 'umd', 'rmw', 'cma']
    X = X.applymap(lambda x: round(x, 4))
    return X

disc1 = excess_return(ff5st)
disc1

Unnamed: 0,mkt_rf,smb,hml,umd,rmw,cma
Average,0.0136,0.0089,0.0011,0.0114,-0.0004,-0.0027
t-statistic,1.7437,2.8158,0.4628,2.2864,-0.203,-1.4627


![blob](https://i.loli.net/2020/06/11/jeJX75koFYDdVvp.png)

In [10]:
def ffc1(df):
    """
    * A股市场风格检验
    @param df:五因子收益率
    @return:
    """
    FFC = pd.DataFrame()
    for i in df.columns[3:]:
        ffc = smf.ols(i + '~mkt_rf+smb+hml', df).fit(cov_type='HAC', cov_kwds={'maxlags': 6})
        X = []
        X.append(ffc.params[0])
        X.append(ffc.tvalues[0])
        X = pd.Series(X, name=i, index=['Alpha', 'tvalues'])
        FFC = pd.concat([FFC, X], axis=1, sort=True)
    FFC = FFC.applymap(lambda x: round(x, 4))
    return FFC

c1 = ffc1(ff5st)
c1

Unnamed: 0,umd,rmw,cma
Alpha,0.0052,0.0041,0.0009
tvalues,1.3553,2.41,0.5926


In [12]:
def ffc2(df, yz):
    """
    * 冗余检验
    @param df:
    @param yz:
    @return:
    """
    FFC = pd.DataFrame()
    for i in df.columns:
        a = yz.copy()
        a.remove(i)
        x = '+'.join(a)
        ffc = smf.ols(i + '~' + x, df).fit(cov_type='HAC', cov_kwds={'maxlags': 6})
        X = []
        X.append(ffc.params[0])
        X.append(ffc.tvalues[0])
        X = pd.Series(X, name=i, index=['Alpha', 'tvalues'])
        FFC = pd.concat([FFC, X], axis=1, sort=False)
    FFC = FFC.applymap(lambda x: round(x, 4))
    return FFC

ryyz = ['mkt_rf', 'smb', 'hml', 'rmw', 'cma']
f = ff5st.drop('umd', axis=1)
c2 = ffc2(f, ryyz)
c2

Unnamed: 0,mkt_rf,smb,hml,rmw,cma
Alpha,0.0055,0.0057,0.0007,0.0033,-0.001
tvalues,0.5604,2.6091,0.2962,2.7247,-0.9427


In [13]:
# 股改前后
ggry = ['mkt_rf', 'smb', 'hml', 'rmw', 'cma', 'umd']
f1 = ff5st.iloc[:-98, :]
f2 = ff5st.iloc[-98:, :]
cf1 = ffc2(f1, ggry)
cf2 = ffc2(f2, ggry)
cf1

Unnamed: 0,mkt_rf,smb,hml,umd,rmw,cma
Alpha,0.0052,0.0025,0.0005,0.0054,0.0015,0.0004
tvalues,0.4706,0.7722,0.1724,1.0475,0.8514,0.2981


In [14]:
cf2

Unnamed: 0,mkt_rf,smb,hml,umd,rmw,cma
Alpha,0.003,0.0094,0.005,0.0006,0.006,-0.0045
tvalues,0.2301,4.1652,2.1535,0.0976,2.446,-3.8168


In [15]:
def get_decile1(data, x1_name, x2_name):
    x1 = data.groupby(['date'])[x1_name].describe(percentiles=[0.2, 0.4, 0.6, 0.8]).reset_index()
    x1 = x1[['date', '20%', '40%', '60%', '80%']]
    x2 = data.groupby(['date'])[x2_name].describe(percentiles=[0.2, 0.4, 0.6, 0.8]).reset_index()
    x2 = x2[['date', '20%', '40%', '60%', '80%']]
    df = pd.merge(x1, x2, how='inner', on=['date'])
    df.columns = ['date', 'B1_1t', 'B1_2t', 'B1_3t', 'B1_4t', 'B2_1t', 'B2_2t', 'B2_3t', 'B2_4t']
    return df

def portfolio_ind_5X5(alldata, breakpoints, X1_name, X2_name):
    data = alldata.copy()
    date = data[['date']].drop_duplicates()
    X = pd.DataFrame()
    for t in date['date']:
        temp_data = data[data['date'] == t]
        temp_breakpoint = breakpoints[breakpoints['date'] == t].iloc[0, :]
        B1 = [-np.inf, temp_breakpoint[1], temp_breakpoint[2], temp_breakpoint[3], temp_breakpoint[4], np.inf]
        B2 = [-np.inf, temp_breakpoint[5], temp_breakpoint[6], temp_breakpoint[7], temp_breakpoint[8], np.inf]
        temp = pd.DataFrame()
        for i in range(5):
            x = temp_data[(temp_data[X1_name] >= B1[i]) & (temp_data[X1_name] <= B1[i + 1])]
            x.loc[:, 'X1_group'] = X1_name + str(i + 1)
            temp = pd.concat([temp, x])
        temp2 = pd.DataFrame()
        for j in range(5):
            x = temp[(temp[X2_name] >= B2[j]) & (temp[X2_name] <= B2[j + 1])]
            x.loc[:, 'X2_group'] = X2_name + str(j + 1)
            temp2 = pd.concat([temp2, x])
        X = pd.concat([X, temp2])
    return X

def get_vwret(df, key1, key2):
    decile = get_decile1(fac1, key1, key2)
    class_1 = portfolio_ind_5X5(fac1, decile, key1, key2)
    vwret = class_1.groupby(['date', 'X2_group', 'X1_group']).apply(wavg, 'eret',
                                                                    'ffs').to_frame().reset_index().rename(
        columns={0: 'eret'})
    dfs = pd.pivot_table(vwret, index=['date', 'X2_group'], columns='X1_group')['eret'].reset_index()
    return dfs

# 根据size/inv分组后的收益
rett = get_vwret(fac1, 'size', 'inv')
rett

X1_group,date,X2_group,size1,size2,size3,size4,size5
0,1994-07-31,inv1,-0.273195,-0.340606,,-0.221179,-0.171877
1,1994-07-31,inv2,-0.278404,-0.357568,-0.442469,-0.172143,-0.058160
2,1994-07-31,inv3,-0.234910,-0.347835,,-0.215241,-0.105424
3,1994-07-31,inv4,-0.062945,-0.315378,-0.236658,-0.240441,-0.140164
4,1994-07-31,inv5,-0.438349,,-0.241758,-0.206763,-0.179832
...,...,...,...,...,...,...,...
1265,2015-08-31,inv1,-0.110738,-0.108847,-0.125164,-0.084807,-0.100105
1266,2015-08-31,inv2,-0.082551,-0.142621,-0.127251,-0.133042,-0.083000
1267,2015-08-31,inv3,-0.121646,-0.124832,-0.132586,-0.144924,-0.114280
1268,2015-08-31,inv4,0.816880,-0.139991,-0.146190,-0.131992,-0.107603


In [16]:
def get_ffc(data, ff, ols, num):
    '''
    计算cma因子系数
    @param data: 回归数据
    @param ff: 5因子数据
    @param ols: 回归方程（三因子/四因子/五因子）
    @param num: 系数（intercpt/params）
    @return: 
    '''
    df = pd.DataFrame()
    date = data[['date']].drop_duplicates()
    for i in data['X2_group'][:5]:
        X = pd.DataFrame(columns=data.columns[2:])
        x = pd.DataFrame(columns=data.columns)
        for t in date['date']:
            temp = data[(data['date'] == t) & (data['X2_group'] == i)]
            x = x.append(temp, ignore_index=True)
        x.index = x['date']
        x = x[x.columns[2:]]
        x = x.astype(np.float64)
        x = x.dropna()
        temp_c = []
        temp_tv = []
        for j in x.columns:
            temp_data = pd.concat([x[j], ff], axis=1)
            model = smf.ols(str(j) + ols, temp_data).fit(cov_type='HAC', cov_kwds={'maxlags': 6})
            temp_c.append(model.params[num])
            temp_tv.append(model.tvalues[num])
        temp_c = pd.Series(temp_c, index=x.columns)
        temp_tv = pd.Series(temp_tv, index=x.columns)
        X = X.append(temp_c, ignore_index=True)
        X = X.append(temp_tv, ignore_index=True)
        X.index = [[i, i], ['params', 't_value_alpha']]
        X.index.name = ['active_share', 'Coefficient']
        df = pd.concat([df, X])
        df = df.apply(lambda x: round(x, 4))
    df = df.T
    return df


fc = get_ffc(rett, ff5st, '~mkt_rf+smb+hml+cma', 4)
fc

Unnamed: 0_level_0,inv1,inv1,inv2,inv2,inv3,inv3,inv4,inv4,inv5,inv5
Unnamed: 0_level_1,params,t_value_alpha,params,t_value_alpha,params,t_value_alpha,params,t_value_alpha,params,t_value_alpha
X1_group,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
size1,-1.2147,-2.3934,-0.3143,-2.8603,-0.4387,-2.8404,-0.074,-0.2686,0.3797,1.1671
size2,-0.5907,-4.5709,-0.2966,-1.6704,-0.2011,-2.7296,-0.2541,-1.9724,0.3055,2.3214
size3,-0.8846,-7.599,-0.1176,-0.6441,-0.0963,-0.9204,-0.4489,-2.0645,0.3016,2.606
size4,-0.7924,-5.9255,-0.3688,-2.2322,-0.3007,-3.294,-0.0436,-0.2975,0.2695,2.133
size5,-0.5496,-3.3514,-0.7355,-5.0714,-0.3779,-5.7894,-0.1198,-1.0938,0.2504,2.196


In [18]:
def get_cmao(df):
    """
    构造正交CMAO，回归截距项+残差项
    @param ret:
    @param df:
    @return:
    """
    ffc = smf.ols('cma~mkt_rf+smb+hml+rmw', ff5st).fit(cov_type='HAC', cov_kwds={'maxlags': 6})
    para = pd.DataFrame(ffc.resid)
    para['intercept'] = ffc.params[0]
    para['cmao'] = para['intercept'] + para[0]
    dff = pd.concat([df, para['cmao']], axis=1)
    return dff

cmao = get_cmao(ff5st)

fc1 = get_ffc(rett, cmao, '~mkt_rf+smb+hml+rmw+cmao', 4)
fc1

Unnamed: 0_level_0,inv1,inv1,inv2,inv2,inv3,inv3,inv4,inv4,inv5,inv5
Unnamed: 0_level_1,params,t_value_alpha,params,t_value_alpha,params,t_value_alpha,params,t_value_alpha,params,t_value_alpha
X1_group,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
size1,-0.432,-1.8886,-0.4571,-6.1421,-0.3249,-3.4722,-0.5646,-1.7794,0.1317,0.7125
size2,-0.3686,-5.3699,-0.2479,-1.828,-0.2599,-3.5352,-0.4111,-4.9056,0.0154,0.1469
size3,-0.7736,-4.8061,-0.1813,-1.6143,-0.037,-0.4197,-0.4553,-4.0549,0.1237,1.1844
size4,-0.5334,-5.019,-0.4828,-4.3072,-0.2798,-4.353,0.126,0.7497,-0.0167,-0.1515
size5,-0.1345,-1.2131,-0.407,-4.5028,-0.2626,-2.7552,-0.1924,-2.6299,0.0508,0.5233


In [19]:
fc2 = get_ffc(rett, cmao, '~mkt_rf+smb+hml+rmw+cmao', 5)
fc2

Unnamed: 0_level_0,inv1,inv1,inv2,inv2,inv3,inv3,inv4,inv4,inv5,inv5
Unnamed: 0_level_1,params,t_value_alpha,params,t_value_alpha,params,t_value_alpha,params,t_value_alpha,params,t_value_alpha
X1_group,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
size1,-1.3669,-2.4422,0.2023,1.2138,-0.2541,-1.6708,0.791,2.595,0.432,1.3188
size2,-0.4399,-3.2563,-0.1076,-0.5361,0.0407,0.4422,0.2313,1.5178,0.4759,4.4444
size3,-0.3445,-1.6055,0.0924,0.52,-0.1044,-0.8121,-0.0324,-0.1412,0.3161,1.8068
size4,-0.5349,-2.785,0.1503,0.8675,-0.0933,-0.9985,-0.2802,-0.9935,0.4628,4.6663
size5,-0.7048,-3.5944,-0.6046,-3.0678,-0.2435,-2.0003,0.1067,0.8146,0.336,3.0741


In [21]:
def GRS_test(factor, resid, alpha):
    """
    Function GRS_test(factor, resid, alpha) is to conduct GRS test according to Gibbons, Ross & Shanken(1989)
    # H0: alpha1=alpha2=...=alphaN
    @param factor: matrix of FF factors with shape (T, L)
    @param resid: matrix of residuals with shape (T, N)
    @param alpha: matrix of intercepts with shape (N, 1)
    @return:GRS-statistic P-value
    """
    resid = resid.fillna(0)
    T, N = resid.shape
    L = factor.shape[1]
    mu_mean = factor.mean(0)
    cov_e = np.cov(resid.T)
    cov_f = np.cov(factor.T).reshape((L, L))
    alpha = np.asmatrix(alpha).reshape(N, 1)
    mu_mean = np.asmatrix(mu_mean).reshape(L, 1)
    # matrix operation with np.ndarray
    GRS = float((T / N) * ((T - N - L) / (T - L - 1))) * (alpha.T @ inv(cov_e) @ alpha) / (
            1 + mu_mean.T @ inv(cov_f) @ mu_mean)
    GRS = GRS[0, 0]
    GRSp = st.f.sf(GRS, N, (T - N - L))
    grs = [GRS, GRSp]
    return grs


def get_test(df1, df2):
    """
    股票实际收益率中不能被模型解释得部分（一阶/二阶）
    A / AR / AR2 test:  A |alpha|   A|alpha|/A|r_bar|  A|alpha^2|/A|r_bar^2|
    @param df1:alpha数据
    @param df2:因子收益数据
    @return:
    """
    a1 = list(df1.abs().mean())
    df2['mean'] = df2.T.mean()
    for i in df2.columns:
        df2[i] = (df2[i] - df2['mean']).abs()
    df2 = df2.drop('mean', axis=1)
    rt = df2.mean()
    df1 = pd.concat([df1, rt], axis=1)
    a2 = (df1.iloc[:, 0] / df1.iloc[:, 1]).mean()
    a3 = (df1.iloc[:, 0] ** 2 / df1.iloc[:, 1] ** 2).mean()
    a1.append(a2)
    a1.append(a3)
    return a1

# size and op portfolio
def get_alpha(df1, df2, key1, key2, ols):
    decile = get_decile1(df1, key1, key2)
    class_1 = portfolio_ind_5X5(df1, decile, key1, key2)
    vwret = class_1.groupby(['date', 'X2_group', 'X1_group']).apply(wavg, 'eret',
                                                                    'ffs').to_frame().reset_index().rename(
        columns={0: 'eret'})
    vwret.loc[:, 'sbport'] = vwret['X2_group'] + vwret['X1_group']
    wret = vwret.pivot(index='date', columns='sbport', values='eret')
    temp = []
    re = pd.DataFrame()
    for j in wret.columns:
        temp_data = pd.concat([wret[j], ff], axis=1)
        temp_data = temp_data.dropna()
        model = smf.ols(str(j) + ols, temp_data).fit(cov_type='HAC', cov_kwds={'maxlags': 6})
        temp.append(model.params[0])
        residual = pd.DataFrame(model.resid)
        re = pd.concat([re, residual], axis=1)
    df = pd.DataFrame(temp, index=wret.columns, columns=['alpha'])
    re.columns = wret.columns
    re.index = df2.index
    return df, re, wret

# 三因子/四因子/五因子/五因子+mom回归
alpha2, resid2, wwret = get_alpha(fac1, ff5st, 'size', 'profit', '~mkt_rf')
alpha3, resid3, wwret = get_alpha(fac1, ff5st, 'size', 'profit', '~mkt_rf+smb+hml')
alpha4, resid4, wwret = get_alpha(fac1, ff5st, 'size', 'profit', '~mkt_rf+smb+hml+umd')
alpha5, resid5, wwret = get_alpha(fac1, ff5st, 'size', 'profit', '~mkt_rf+smb+hml+rmw+cma')
alpha6, resid6, wwret = get_alpha(fac1, ff5st, 'size', 'profit', '~mkt_rf+smb+hml+umd+rmw+cma')

def get_final(alpha_list, resid_list, df, df2, independent_list):
    """
    @param alpha_list: 三四五因子alpha
    @param resid_list: 三四五因子残差
    @param resid_list: 五因子数据
    @param independent: 回归变量名 
    @return:Test results
    """
    result = pd.DataFrame()
    for i in range(len(alpha_list)):
        alpha1 = alpha_list[i]
        resid1 = resid_list[i]
        independent = independent_list[i]
        tt = get_test(alpha1, df2)
        fact = df[independent]
        a1 = GRS_test(fact, resid1, alpha1)
        tt = tt + a1
        tt = pd.Series(tt)
        result = pd.concat([result, tt], axis = 1)
    result.columns = ['CAPM', 'FF3', 'FFC', 'FF5', 'FFT+Mom']
    result.index = ['A', 'AR', 'AR2', 'GRS', 'Pvalue']
    return result

alphalist = [alpha2, alpha3, alpha4, alpha5, alpha6]
residlist = [resid2, resid3, resid4, resid5, resid6]
indlist = [['mkt_rf'],['mkt_rf', 'smb', 'hml'],['mkt_rf', 'smb', 'hml', 'umd'],['mkt_rf', 'smb', 'hml', 'rmw', 'cma'],['mkt_rf', 'smb', 'hml', 'rmw', 'cma', 'umd']]

test = get_final(alphalist, residlist, ff5st, wwret, indlist)
test

Unnamed: 0,CAPM,FF3,FFC,FF5,FFT+Mom
A,0.005779,0.003279,0.003508,0.002636,0.002622
AR,0.197802,0.019562,0.077193,0.199764,0.283826
AR2,0.062185,0.045537,0.141399,0.190846,0.429543
GRS,2.084282,1.988117,1.94544,1.697641,1.685584
Pvalue,0.002647,0.004704,0.006044,0.024018,0.025643


In [22]:
# 第二种分组方法，加上股改时间划分
def get_decile2X2(data, x1_name, x2_name):
    x1 = data.groupby(['date'])[x1_name].describe(percentiles=[0.5]).reset_index()
    x1 = x1[['date', '50%']]
    x2 = data.groupby(['date'])[x2_name].describe(percentiles=[0.5]).reset_index()
    x2 = x2[['date', '50%']]
    df = pd.merge(x1, x2, how='inner', on=['date'])
    df.columns = ['date', 'B1_1t', 'B2_1t']
    return df

def portfolio_ind2X2(alldata, breakpoints, X1_name, X2_name):
    data = alldata.copy()
    date = data[['date']].drop_duplicates()
    X = pd.DataFrame()
    for t in date['date']:
        temp_data = data[data['date'] == t]
        temp_breakpoint = breakpoints[breakpoints['date'] == t].iloc[0, :]
        B1 = [-np.inf, temp_breakpoint[1], np.inf]
        B2 = [-np.inf, temp_breakpoint[2], np.inf]
        temp = pd.DataFrame()
        for i in range(2):
            x = temp_data[(temp_data[X1_name] >= B1[i]) & (temp_data[X1_name] <= B1[i + 1])]
            x.loc[:, 'X1_group'] = X1_name + str(i + 1)
            temp = pd.concat([temp, x])
        temp2 = pd.DataFrame()
        for j in range(2):
            x = temp[(temp[X2_name] >= B2[j]) & (temp[X2_name] <= B2[j + 1])]
            x.loc[:, 'X2_group'] = X2_name + str(j + 1)
            temp2 = pd.concat([temp2, x])
        X = pd.concat([X, temp2])
    return X

def get_factor2(df, key, key1, key2):
    decile = get_decile2X2(df, key1, key2)
    class_1 = portfolio_ind2X2(df, decile, key1, key2)
    vwret = class_1.groupby(['date', 'X2_group', 'X1_group']).apply(wavg, 'eret',
                                                                    'ffs').to_frame().reset_index().rename(
        columns={0: 'vwret'})
    vwret.loc[:, 'sbport'] = vwret['X2_group'] + vwret['X1_group']
    ff = vwret.pivot(index='date', columns='sbport', values='vwret')
    ff['WB'] = (ff.iloc[:, -2] + ff.iloc[:, -1]) / 2
    ff['WS'] = (ff.iloc[:, 0] + ff.iloc[:, 1]) / 2
    ff['WH'] = (ff.iloc[:, 3] + ff.iloc[:, 1]) / 2
    ff['WL'] = (ff.iloc[:, 2] + ff.iloc[:, 0]) / 2
    ff.loc[:, 'SMB' + key] = ff['WS'] - ff['WB']
    ff.loc[:, key] = ff['WH'] - ff['WL']
    return ff

def get_five1(fac):
    hml = get_factor2(fac, 'hml', 'bm', 'size')
    rmw = get_factor2(fac, 'rmw', 'profit', 'size')
    cma = get_factor2(fac, 'cma', 'inv', 'size')
    ff5s = pd.concat([hml.iloc[:, -2:], rmw.iloc[:, -2:], cma.iloc[:, -2:]], axis=1)
    ff5s.loc[:, 'smb'] = (ff5s['SMBhml'] + ff5s['SMBrmw'] + ff5s['SMBcma']) / 3
    umd = get_factor2(fac, 'umd', 'mom', 'size')
    ff.index = ff5s.index
    ffst = pd.concat([ff['mkt_rf'], ff5s, umd['umd']], axis=1)
    ffst = ffst[['mkt_rf', 'smb', 'hml', 'umd', 'rmw', 'cma']]
    return ffst

ff5st22 = get_five1(fac1)
ff5st21 = ff5st22.iloc[:-98, :]
ff5st22 = ff5st22.iloc[-98:, :]

fac221 = fac1[fac1['date']<='2007-06-30 00:00:00']
fac222= fac1[fac1['date']>'2007-06-30 00:00:00']


def get_list(fac, fft, key):
    alpha2, resid2, wwret = get_alpha(fac, fft, 'size', key, '~mkt_rf')
    alpha3, resid3, wwret = get_alpha(fac, fft, 'size', key, '~mkt_rf+smb+hml')
    alpha4, resid4, wwret = get_alpha(fac, fft, 'size', key, '~mkt_rf+smb+hml+umd')
    alpha5, resid5, wwret = get_alpha(fac, fft, 'size', key, '~mkt_rf+smb+hml+rmw+cma')
    alpha6, resid6, wwret = get_alpha(fac, fft, 'size', key, '~mkt_rf+smb+hml+umd+rmw+cma')
    allist = [alpha2, alpha3, alpha4, alpha5, alpha6]
    relist = [resid2, resid3, resid4, resid5, resid6]
    return allist, relist, wwret

alphalist, residlist, wret = get_list(fac221, ff5st21, 'inv')
indlist = [['mkt_rf'],['mkt_rf', 'smb', 'hml'],['mkt_rf', 'smb', 'hml', 'umd'],['mkt_rf', 'smb', 'hml', 'rmw', 'cma'],['mkt_rf', 'smb', 'hml', 'rmw', 'cma', 'umd']]
test1 = get_final(alphalist, residlist, ff5st21, wret, indlist)   
test1

Unnamed: 0,CAPM,FF3,FFC,FF5,FFT+Mom
A,0.002627,0.003584,0.002888,0.002894,0.002769
AR,0.082119,-0.129609,-0.064964,-0.173487,-0.158068
AR2,0.010339,0.06517,0.126767,0.36766,0.805215
GRS,0.6528,1.268971,1.157399,1.183275,1.16205
Pvalue,0.892422,0.195733,0.292055,0.267483,0.287862


In [33]:
alphalist, residlist, wret = get_list(fac222, ff5st22, 'inv')
indlist = [['mkt_rf'],['mkt_rf', 'smb', 'hml'],['mkt_rf', 'smb', 'hml', 'umd'],['mkt_rf', 'smb', 'hml', 'rmw', 'cma'],['mkt_rf', 'smb', 'hml', 'rmw', 'cma', 'umd']]
test2 = get_final(alphalist, residlist, ff5st22, wret, indlist)  
test2

Unnamed: 0,CAPM,FF3,FFC,FF5,FFT+Mom
A,0.011268,0.004864,0.005013,0.004771,0.004728
AR,0.489974,0.063917,0.000627,0.278276,0.13307
AR2,0.329527,0.116658,0.297455,0.429054,0.832311
GRS,2.522029,1.817448,1.865728,1.696941,1.709171
Pvalue,0.001217,0.026715,0.022144,0.04463,0.042943


In [40]:
# 第三种分组 2x2x2x2
def get_decile2222_dep(df, x1_name, x2_name, x3_name, x4_name):
    # X1=beta,X2=mrtcap
    # date=1988
    decile = pd.DataFrame()
    for i in ff5st.index:
        x = df[df['date'] == i]
        x1_name = x.columns[4]
        x2_name = x.columns[5]
        x3_name = x.columns[6]
        x4_name = x.columns[7]
        x1 = x[x1_name].dropna()
        B1 = np.percentile(x1, 50)
        B1_range = [-np.inf, B1, np.inf]
        for j in range(2):
            down = B1_range[j]
            up = B1_range[j + 1]
            temp = x[(x[x1_name] <= up) & (x[x1_name] >= down)]
            x2 = temp[x2_name].dropna()
            B2 = np.percentile(x2, 50)
            B2_range = [-np.inf, B2, np.inf]
            for k in range(2):
                down = B2_range[k]
                up = B2_range[k + 1]
                temp1 = temp[(temp[x2_name] <= up) & (temp[x2_name] >= down)]
                x3 = temp1[x3_name].dropna()
                B3 = np.percentile(x3, 50)
                B3_range = [-np.inf, B3, np.inf]
                for l in range(2):
                    down = B3_range[l]
                    up = B3_range[l + 1]
                    temp2 = temp1[(temp1[x3_name] <= up) & (temp1[x3_name] >= down)]
                    x4 = temp2[x4_name].dropna()
                    B4 = np.percentile(x4, 50)
                    B4_range = [-np.inf, B4, np.inf]
                    for m in range(2):
                        down = B4_range[m]
                        up = B4_range[m + 1]
                        temp3 = temp2[(temp2[x4_name] <= up) & (temp2[x4_name] >= down)]
                        temp3.loc[:, 'X_group'] = (x1_name + str(j + 1)) + (x2_name + str(k + 1)) + (
                                x3_name + str(l + 1)) + (x4_name + str(m + 1))
                        decile = pd.concat([decile, temp3])

    return decile

def get_factor3(df, key1, key2, key3, key4):
    dd = get_decile2222_dep(df, key1, key2, key3, key4)
    vwret = dd.groupby(['date', 'X_group']).apply(wavg, 'eret', 'ffs').to_frame().reset_index().rename(
        columns={0: 'vwret'})
    ff = vwret.pivot(index='date', columns='X_group', values='vwret')
    x = pd.DataFrame()
    a1 = ff.iloc[:, 0::2]
    a2 = ff.iloc[:, 1::2]
    a2.columns = a1.columns
    a = (a2 - a1).T.mean()  # inv
    x = pd.concat([x, a], axis=1)
    a1 = ff.iloc[:, :8]
    a2 = ff.iloc[:, -8:]
    a2.columns = a1.columns
    a = (a2 - a1).T.mean()  # size
    x = pd.concat([x, a], axis=1)
    a1 = pd.concat([ff.iloc[:, 0:20:4], ff.iloc[:, 1:20:4]], axis=1)
    a2 = pd.concat([ff.iloc[:, 2:20:4], ff.iloc[:, 3:20:4]], axis=1)
    a2.columns = a1.columns
    a = (a2 - a1).T.mean()  # profit
    x = pd.concat([x, a], axis=1)
    a1 = pd.concat([ff.iloc[:, :4], ff.iloc[:, 8:12]], axis=1)
    a2 = pd.concat([ff.iloc[:, -4:], ff.iloc[:, 4:8]], axis=1)
    a2.columns = a1.columns
    a = (a2 - a1).T.mean()  # bm
    x = pd.concat([x, a], axis=1)
    x.columns = ['cma', 'smb', 'rmw', 'hml']
    x.index = ff.index
    return x

five2222 = get_factor3(fac1, 'size', 'bm', 'profit', 'inv')
five2222 = pd.concat([five2222, ff5st.loc[:, ['mkt_rf', 'umd']]], axis=1)

five1 = five2222.iloc[:-98, :]
five2 = five2222.iloc[-98:, :]

alphalist, residlist, wret = get_list(fac221, five1, 'inv')
indlist = [['mkt_rf'],['mkt_rf', 'smb', 'hml'],['mkt_rf', 'smb', 'hml', 'umd'],['mkt_rf', 'smb', 'hml', 'rmw', 'cma'],['mkt_rf', 'smb', 'hml', 'rmw', 'cma', 'umd']]
test3 = get_final(alphalist, residlist, five1, wret, indlist) 
test3

Unnamed: 0,CAPM,FF3,FFC,FF5,FFT+Mom
A,0.002628,0.003587,0.00289,0.002897,0.002772
AR,0.082158,-0.129552,-0.064811,-0.173339,-0.157888
AR2,0.010348,0.06526,0.127014,0.368517,0.805528
GRS,0.653199,1.281455,1.149206,1.206292,1.155517
Pvalue,0.892092,0.186681,0.300207,0.246724,0.294276


In [41]:
alphalist, residlist, wret = get_list(fac222, five2, 'inv')
indlist = [['mkt_rf'],['mkt_rf', 'smb', 'hml'],['mkt_rf', 'smb', 'hml', 'umd'],['mkt_rf', 'smb', 'hml', 'rmw', 'cma'],['mkt_rf', 'smb', 'hml', 'rmw', 'cma', 'umd']]
test4 = get_final(alphalist, residlist, five2, wret, indlist)  
test4

Unnamed: 0,CAPM,FF3,FFC,FF5,FFT+Mom
A,0.011268,0.004864,0.005013,0.004771,0.004728
AR,0.489974,0.063917,0.000627,0.278276,0.13307
AR2,0.329527,0.116658,0.297455,0.429054,0.832311
GRS,2.522029,1.770661,1.828596,1.64924,1.660413
Pvalue,0.001217,0.032404,0.025829,0.053944,0.052102


In [42]:
# MOM
alphalist, residlist, wret = get_list(fac221, five1, 'mom')
indlist = [['mkt_rf'],['mkt_rf', 'smb', 'hml'],['mkt_rf', 'smb', 'hml', 'umd'],['mkt_rf', 'smb', 'hml', 'rmw', 'cma'],['mkt_rf', 'smb', 'hml', 'rmw', 'cma', 'umd']]
test5 = get_final(alphalist, residlist, five1, wret, indlist)   
test5

Unnamed: 0,CAPM,FF3,FFC,FF5,FFT+Mom
A,0.004078,0.005184,0.003216,0.005161,0.004334
AR,0.068732,-0.039107,0.009373,0.044201,0.122701
AR2,0.019959,0.098893,0.140099,0.816361,1.475779
GRS,1.005963,1.486508,1.408224,1.528737,1.563376
Pvalue,0.464495,0.080385,0.11259,0.067044,0.057553


In [44]:
alphalist, residlist, wret = get_list(fac222, five2, 'mom')
indlist = [['mkt_rf'],['mkt_rf', 'smb', 'hml'],['mkt_rf', 'smb', 'hml', 'umd'],['mkt_rf', 'smb', 'hml', 'rmw', 'cma'],['mkt_rf', 'smb', 'hml', 'rmw', 'cma', 'umd']]
test6 = get_final(alphalist, residlist, five2, wret, indlist) 
test6

Unnamed: 0,CAPM,FF3,FFC,FF5,FFT+Mom
A,0.011987,0.005289,0.005481,0.004597,0.004356
AR,0.454123,0.080055,0.03091,0.33293,0.215589
AR2,0.313386,0.12945,0.362102,0.46103,0.906402
GRS,2.552518,1.809891,1.903634,1.57555,1.572397
Pvalue,0.001064,0.027564,0.018906,0.071932,0.073379


In [45]:
alphalist, residlist, wret = get_list(fac221, ff5st21, 'mom')
indlist = [['mkt_rf'],['mkt_rf', 'smb', 'hml'],['mkt_rf', 'smb', 'hml', 'umd'],['mkt_rf', 'smb', 'hml', 'rmw', 'cma'],['mkt_rf', 'smb', 'hml', 'rmw', 'cma', 'umd']]
test7 = get_final(alphalist, residlist, ff5st21, wret, indlist)   
test7

Unnamed: 0,CAPM,FF3,FFC,FF5,FFT+Mom
A,0.004078,0.005184,0.003216,0.005161,0.004334
AR,0.068732,-0.039107,0.009373,0.044201,0.122701
AR2,0.019959,0.098893,0.140099,0.816361,1.475779
GRS,1.005963,1.486508,1.408224,1.528737,1.563376
Pvalue,0.464495,0.080385,0.11259,0.067044,0.057553


In [46]:
alphalist, residlist, wret = get_list(fac222, ff5st22, 'mom')
indlist = [['mkt_rf'],['mkt_rf', 'smb', 'hml'],['mkt_rf', 'smb', 'hml', 'umd'],['mkt_rf', 'smb', 'hml', 'rmw', 'cma'],['mkt_rf', 'smb', 'hml', 'rmw', 'cma', 'umd']]
test7 = get_final(alphalist, residlist, ff5st22, wret, indlist)   
test7

Unnamed: 0,CAPM,FF3,FFC,FF5,FFT+Mom
A,0.011987,0.005289,0.005481,0.004597,0.004356
AR,0.454123,0.080055,0.03091,0.33293,0.215589
AR2,0.313386,0.12945,0.362102,0.46103,0.906402
GRS,2.552518,1.857714,1.942289,1.62112,1.618571
Pvalue,0.001064,0.022594,0.016077,0.060251,0.061382
