In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
# import tushare as ts
# import QUANTAXIS as QA
import talib as ta
import datetime, time
from dateutil.relativedelta import *
from pandas.tseries.offsets import *
from tqdm import tqdm_notebook
from scipy.stats import mstats

In [3]:
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
plt.style.use('ggplot')

In [76]:
def calc_exf(df):
    ret = df.ret_p1
    dret = df.dret_p1
    exf = df.mv - df.mv.shift(1) * df.retx_p1
    exf_to_mv = exf / df.mv.shift(1)
    ret_roll_1 = ((ret[::-1]).rolling(1, min_periods=1).apply(np.prod, raw=True)[::-1])
    ret_roll_2 = ((ret[::-1]).rolling(2, min_periods=1).apply(np.prod, raw=True)[::-1])
    ret_roll_3 = ((ret[::-1]).rolling(3, min_periods=1).apply(np.prod, raw=True)[::-1])
    ret_roll_4 = ((ret[::-1]).rolling(4, min_periods=1).apply(np.prod, raw=True)[::-1])
    ret_roll_5 = ((ret[::-1]).rolling(5, min_periods=1).apply(np.prod, raw=True)[::-1])
    ret_roll_6 = ((ret[::-1]).rolling(6, min_periods=1).apply(np.prod, raw=True)[::-1])
    ret_roll_7 = ((ret[::-1]).rolling(7, min_periods=1).apply(np.prod, raw=True)[::-1])
    ret_roll_8 = ((ret[::-1]).rolling(8, min_periods=1).apply(np.prod, raw=True)[::-1])
    
    dret_roll_1 = ((dret[::-1]).rolling(1, min_periods=1).apply(np.prod, raw=True)[::-1])
    dret_roll_2 = ((dret[::-1]).rolling(2, min_periods=1).apply(np.prod, raw=True)[::-1])
    dret_roll_3 = ((dret[::-1]).rolling(3, min_periods=1).apply(np.prod, raw=True)[::-1])
    dret_roll_4 = ((dret[::-1]).rolling(4, min_periods=1).apply(np.prod, raw=True)[::-1])
    dret_roll_5 = ((dret[::-1]).rolling(5, min_periods=1).apply(np.prod, raw=True)[::-1])
    dret_roll_6 = ((dret[::-1]).rolling(6, min_periods=1).apply(np.prod, raw=True)[::-1])
    dret_roll_7 = ((dret[::-1]).rolling(7, min_periods=1).apply(np.prod, raw=True)[::-1])
    dret_roll_8 = ((dret[::-1]).rolling(8, min_periods=1).apply(np.prod, raw=True)[::-1])
    
    dret_roll_5_bar = ((dret[::-1]).rolling(5, min_periods=1).\
                       apply(np.prod, raw=True)[::-1]).mean()
    
    car1 = ret_roll_1 - dret_roll_1
    car2 = ret_roll_2 - dret_roll_2
    car3 = ret_roll_3 - dret_roll_3
    car4 = ret_roll_4 - dret_roll_4
    car5 = ret_roll_5 - dret_roll_5
    car6 = ret_roll_6 - dret_roll_6
    car7 = ret_roll_7 - dret_roll_7
    car8 = ret_roll_8 - dret_roll_8
    
    wr1 = ret_roll_1 / dret_roll_1
    wr2 = ret_roll_2 / dret_roll_2
    wr3 = ret_roll_3 / dret_roll_3
    wr4 = ret_roll_4 / dret_roll_4
    wr5 = ret_roll_5 / dret_roll_5
    
    wr5_bar = ret_roll_5 / dret_roll_5_bar
    
    ret_n1 = car1
    ret_n2 = car2 - car1
    ret_n3 = car3 - car2
    ret_n4 = car4 - car3
    ret_n5 = car5 - car4
    ret_n6 = car6 - car5
    ret_n7 = car7 - car6
    ret_n8 = car8 - car7
    
    res = pd.DataFrame({
        'date':df.date,
        'mv':df.mv,
        'retx_p1':df.retx_p1,
        'exf':exf,
        'exf_to_mv':exf_to_mv,
        'car1':car1.shift(-1),
        'car3':car3.shift(-1),
        'car5':car5.shift(-1),
        'wr1':wr1.shift(-1),
        'wr2':wr2.shift(-1),
        'wr3':wr3.shift(-1),
        'wr4':wr4.shift(-1),
        'wr5':wr5.shift(-1),
        'wr5_bar':wr5_bar,
        'ret_n1':ret_n1.shift(-1),
        'ret_n2':ret_n2.shift(-1),
        'ret_n3':ret_n3.shift(-1),
        'ret_n4':ret_n4.shift(-1),
        'ret_n5':ret_n5.shift(-1),
        'ret_n6':ret_n6.shift(-1),
        'ret_n7':ret_n7.shift(-1),
        'ret_n8':ret_n8.shift(-1)
        
    })
    return res

load data

In [4]:
crsp_fa = pd.read_csv('data/crsp_fa_filtered.zip',
                     parse_dates=['date'], infer_datetime_format=True)

In [5]:
crsp_fa.head()

Unnamed: 0,date,permno,ret_p1,retx_p1,dret_p1,mv,mv_adj
0,1986-10-31,10000,0.178571,0.178571,1.059979,3002.34375,5796.726244
1,1986-06-30,10001,1.03245,1.0,1.252963,6033.125,12067.131553
2,1987-06-30,10001,1.023885,0.959184,1.018842,5822.125,11205.083735
3,1988-06-30,10001,1.140535,1.063829,0.827947,6200.0,11459.064979
4,1989-06-30,10001,1.199811,1.12,1.005611,7007.0,12326.466763


In [6]:
crsp_filter = crsp_fa.set_index('date')[:'2008']
filter_permno = crsp_filter[(crsp_filter.mv_adj.shift(1) * crsp_filter.retx_p1) >= 5e+4].\
permno.unique().tolist()

crsp_fa_filtered = crsp_fa.set_index('permno').loc[filter_permno].reset_index()

In [26]:
# crsp_fa_filtered = crsp_fa_filtered.assign(year = crsp_fa_filtered.date.dt.year)

In [29]:
# crsp_fa_filtered.head()

In [10]:
test = crsp_fa_filtered[crsp_fa_filtered.permno == 10002]

In [20]:
test.head(10)

Unnamed: 0,date,permno,ret_p1,retx_p1,dret_p1,mv,mv_adj
0,1986-12-31,10002,1.064515,1.064515,1.005044,14540.625,27694.277996
1,1987-12-31,10002,0.808082,0.808082,0.840417,11750.0,21491.762931
2,1988-12-31,10002,0.8375,0.8375,1.173377,9840.625,17227.102052
3,1989-12-31,10002,0.731343,0.731343,1.084321,7196.875,11976.390245
4,1990-12-31,10002,0.816327,0.816327,0.689016,5875.0,9254.062732
5,1991-12-31,10002,1.2,1.2,1.453275,7050.0,10815.81128
6,1992-12-31,10002,1.499999,1.499999,1.401752,12978.0,19282.872353
7,1993-12-31,10002,1.450353,1.430557,1.349054,38612.125,55997.874298
8,1994-12-31,10002,1.039473,1.019418,0.92785,39361.875,55492.135988
9,1995-12-31,10002,1.092129,1.066668,1.373491,42028.0,57642.203439


In [77]:
calc_exf(test)

Unnamed: 0,date,mv,retx_p1,exf,exf_to_mv,car1,car3,car5,wr1,wr2,...,wr5,wr5_bar,ret_n1,ret_n2,ret_n3,ret_n4,ret_n5,ret_n6,ret_n7,ret_n8
0,1986-12-31,14540.625,1.064515,,,-0.032335,-0.574328,-0.58585,0.961525,0.68629,...,0.452834,0.190785,-0.032335,-0.277023,-0.26497,0.241619,-0.253141,-0.187731,-0.196349,0.187721
1,1987-12-31,11750.0,0.808082,-0.013582,-9.341005e-07,-0.335878,-0.376648,-0.885846,0.713751,0.481404,...,0.503963,0.215067,-0.335878,-0.323941,0.283171,-0.297362,-0.211836,-0.218039,0.22535,-0.709889
2,1988-12-31,9840.625,0.8375,0.00493,4.195653e-07,-0.352979,-0.369345,-0.494632,0.67447,0.799094,...,0.759094,0.399217,-0.352979,0.202879,-0.219245,-0.077999,-0.047288,0.209663,-0.562271,-0.382646
3,1989-12-31,7196.875,0.731343,0.006905,7.017218e-07,0.127311,0.065773,0.458322,1.184772,0.978292,...,1.260865,0.69135,0.127311,-0.149048,0.08751,0.171806,0.220743,-0.45211,-0.387604,1.534011
4,1990-12-31,5875.0,0.816327,-0.002175,-3.02171e-07,-0.253274,-0.137564,-0.538593,0.825722,0.883596,...,0.846217,0.982631,-0.253274,0.016143,0.099567,0.301334,-0.702362,-0.53841,1.722299,0.014003
5,1991-12-31,7050.0,1.2,-0.001615,-2.748649e-07,0.098247,0.506804,-0.329636,1.070089,1.150441,...,0.877482,1.31462,0.098247,0.186242,0.222314,-0.446987,-0.389453,1.581327,-0.02927,-1.98653
6,1992-12-31,12978.0,1.499999,2403.003647,0.3408516,0.101299,-0.072728,0.676404,1.075089,1.204426,...,1.2803,1.047229,0.101299,0.154585,-0.328612,-0.272746,1.021879,-0.01045,-1.38016,-0.267716
7,1993-12-31,38612.125,1.430557,20046.358763,1.544642,0.111624,-0.337573,0.341395,1.120304,0.890807,...,1.202465,1.370446,0.111624,-0.250778,-0.198419,0.67901,-4.1e-05,-0.995715,-0.171914,-0.380032
8,1994-12-31,39361.875,1.019418,-0.008577,-2.221426e-07,-0.281362,0.121447,-0.89772,0.795148,0.68083,...,0.640625,0.899389,-0.281362,-0.208056,0.610866,0.011832,-1.030999,-0.144386,-0.427715,0.539492
9,1995-12-31,42028.0,1.066668,41.93746,0.001065433,-0.160509,0.462912,-0.522323,0.85623,1.336853,...,0.688416,0.709845,-0.160509,0.633324,-0.009902,-0.81635,-0.168886,-0.283138,0.438215,-0.50608


In [75]:
calc_exf(test)

Unnamed: 0,date,mv,retx_p1,exf,exf_to_mv,car1,car3,car5,wr1,wr2,...,wr4,wr5,ret_n1,ret_n2,ret_n3,ret_n4,ret_n5,ret_n6,ret_n7,ret_n8
0,1986-12-31,14540.625,1.064515,,,-0.032335,-0.574328,-0.58585,0.961525,0.68629,...,0.54841,0.452834,-0.032335,-0.277023,-0.26497,0.241619,-0.253141,-0.187731,-0.196349,0.187721
1,1987-12-31,11750.0,0.808082,-0.013582,-9.341005e-07,-0.335878,-0.376648,-0.885846,0.713751,0.481404,...,0.470954,0.503963,-0.335878,-0.323941,0.283171,-0.297362,-0.211836,-0.218039,0.22535,-0.709889
2,1988-12-31,9840.625,0.8375,0.00493,4.195653e-07,-0.352979,-0.369345,-0.494632,0.67447,0.799094,...,0.706076,0.759094,-0.352979,0.202879,-0.219245,-0.077999,-0.047288,0.209663,-0.562271,-0.382646
3,1989-12-31,7196.875,0.731343,0.006905,7.017218e-07,0.127311,0.065773,0.458322,1.184772,0.978292,...,1.125467,1.260865,0.127311,-0.149048,0.08751,0.171806,0.220743,-0.45211,-0.387604,1.534011
4,1990-12-31,5875.0,0.816327,-0.002175,-3.02171e-07,-0.253274,-0.137564,-0.538593,0.825722,0.883596,...,1.064226,0.846217,-0.253274,0.016143,0.099567,0.301334,-0.702362,-0.53841,1.722299,0.014003
5,1991-12-31,7050.0,1.2,-0.001615,-2.748649e-07,0.098247,0.506804,-0.329636,1.070089,1.150441,...,1.024821,0.877482,0.098247,0.186242,0.222314,-0.446987,-0.389453,1.581327,-0.02927,-1.98653
6,1992-12-31,12978.0,1.499999,2403.003647,0.3408516,0.101299,-0.072728,0.676404,1.075089,1.204426,...,0.820009,1.2803,0.101299,0.154585,-0.328612,-0.272746,1.021879,-0.01045,-1.38016,-0.267716
7,1993-12-31,38612.125,1.430557,20046.358763,1.544642,0.111624,-0.337573,0.341395,1.120304,0.890807,...,1.190878,1.202465,0.111624,-0.250778,-0.198419,0.67901,-4.1e-05,-0.995715,-0.171914,-0.380032
8,1994-12-31,39361.875,1.019418,-0.008577,-2.221426e-07,-0.281362,0.121447,-0.89772,0.795148,0.68083,...,1.073338,0.640625,-0.281362,-0.208056,0.610866,0.011832,-1.030999,-0.144386,-0.427715,0.539492
9,1995-12-31,42028.0,1.066668,41.93746,0.001065433,-0.160509,0.462912,-0.522323,0.85623,1.336853,...,0.805667,0.688416,-0.160509,0.633324,-0.009902,-0.81635,-0.168886,-0.283138,0.438215,-0.50608


In [19]:
# 通过上面的预览，查看next return
calc_exf(test)

Unnamed: 0,date,mv,retx_p1,exf,exf_to_mv,car1,car3,car5,wr5,ret_n1,ret_n2,ret_n3,ret_n4,ret_n5,ret_n6,ret_n7,ret_n8
0,1986-12-31,14540.625,1.064515,,,-0.032335,-0.574328,-0.58585,0.452834,0.808082,0.8375,0.731343,0.816327,1.2,1.499999,1.450353,1.039473
1,1987-12-31,11750.0,0.808082,-0.013582,-9.341005e-07,-0.335878,-0.376648,-0.885846,0.503963,0.8375,0.731343,0.816327,1.2,1.499999,1.450353,1.039473,1.092129
2,1988-12-31,9840.625,0.8375,0.00493,4.195653e-07,-0.352979,-0.369345,-0.494632,0.759094,0.731343,0.816327,1.2,1.499999,1.450353,1.039473,1.092129,0.955922
3,1989-12-31,7196.875,0.731343,0.006905,7.017218e-07,0.127311,0.065773,0.458322,1.260865,0.816327,1.2,1.499999,1.450353,1.039473,1.092129,0.955922,1.96296
4,1990-12-31,5875.0,0.816327,-0.002175,-3.02171e-07,-0.253274,-0.137564,-0.538593,0.846217,1.2,1.499999,1.450353,1.039473,1.092129,0.955922,1.96296,0.951829
5,1991-12-31,7050.0,1.2,-0.001615,-2.748649e-07,0.098247,0.506804,-0.329636,0.877482,1.499999,1.450353,1.039473,1.092129,0.955922,1.96296,0.951829,0.820407
6,1992-12-31,12978.0,1.499999,2403.003647,0.3408516,0.101299,-0.072728,0.676404,1.2803,1.450353,1.039473,1.092129,0.955922,1.96296,0.951829,0.820407,0.787577
7,1993-12-31,38612.125,1.430557,20046.358763,1.544642,0.111624,-0.337573,0.341395,1.202465,1.039473,1.092129,0.955922,1.96296,0.951829,0.820407,0.787577,1.119581
8,1994-12-31,39361.875,1.019418,-0.008577,-2.221426e-07,-0.281362,0.121447,-0.89772,0.640625,1.092129,0.955922,1.96296,0.951829,0.820407,0.787577,1.119581,1.171632
9,1995-12-31,42028.0,1.066668,41.93746,0.001065433,-0.160509,0.462912,-0.522323,0.688416,0.955922,1.96296,0.951829,0.820407,0.787577,1.119581,1.171632,1.492715


In [21]:
tic = time.perf_counter()
result_exf = crsp_fa_filtered.set_index('permno').groupby('permno').apply(calc_exf)
toc = time.perf_counter()
print(toc-tic)

185.4494121


In [23]:
result_exf.head()

Unnamed: 0_level_0,date,mv,retx_p1,exf,exf_to_mv,car1,car3,car5,wr5,ret_n1,ret_n2,ret_n3,ret_n4,ret_n5,ret_n6,ret_n7,ret_n8
permno,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
10002,1986-12-31,14540.625,1.064515,,,-0.032335,-0.574328,-0.58585,0.452834,0.808082,0.8375,0.731343,0.816327,1.2,1.499999,1.450353,1.039473
10002,1987-12-31,11750.0,0.808082,-0.013582,-9.341005e-07,-0.335878,-0.376648,-0.885846,0.503963,0.8375,0.731343,0.816327,1.2,1.499999,1.450353,1.039473,1.092129
10002,1988-12-31,9840.625,0.8375,0.00493,4.195653e-07,-0.352979,-0.369345,-0.494632,0.759094,0.731343,0.816327,1.2,1.499999,1.450353,1.039473,1.092129,0.955922
10002,1989-12-31,7196.875,0.731343,0.006905,7.017218e-07,0.127311,0.065773,0.458322,1.260865,0.816327,1.2,1.499999,1.450353,1.039473,1.092129,0.955922,1.96296
10002,1990-12-31,5875.0,0.816327,-0.002175,-3.02171e-07,-0.253274,-0.137564,-0.538593,0.846217,1.2,1.499999,1.450353,1.039473,1.092129,0.955922,1.96296,0.951829


In [22]:
res1 = result_exf.copy()
res1 = res1.dropna(subset=['exf_to_mv']) # 只是剔除exf_to_mv中的异常值
# res1 = res1.dropna() # 只要有异常值，就全部剔除，两者差距不是很大
res1 = res1.reset_index().set_index('date')[:'2008'] # 筛选出到08年的数据
# res1 = res1.loc['1973':'2001']
# 可以自行调整limits大小
res1.exf_to_mv = mstats.winsorize(res1.exf_to_mv, limits=[0.05, 0.05]) # 这里也可以调整

per1 = np.linspace(0,1,11).tolist()
bins = res1.describe(percentiles=per1).exf_to_mv.iloc[4:-1].tolist()
bins = [bins[0]-1] + bins[1:]
# bins = bins[:-1] + [bins[-1]-1]
res1.groupby(pd.cut(res1.exf_to_mv,bins, right=True)).mean().drop(['mv', 'retx_p1', 'exf'], axis=1)

Unnamed: 0_level_0,permno,exf_to_mv,car1,car3,car5,wr5,ret_n1,ret_n2,ret_n3,ret_n4,ret_n5,ret_n6,ret_n7,ret_n8
exf_to_mv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
"(-1.066, -0.0261]",56231.114253,-0.054192,0.053575,0.174042,0.261691,1.163329,1.200047,1.2039,1.215397,1.191313,1.182051,1.174958,1.156115,1.165902
"(-0.0261, -0.00114]",56067.244276,-0.010753,0.043743,0.125188,0.237322,1.134676,1.212704,1.189755,1.172387,1.173705,1.182688,1.160627,1.165607,1.168867
"(-0.00114, 5.6e-08]",53933.99728,-9.2e-05,0.046179,0.116728,0.191206,1.128983,1.253339,1.222459,1.205264,1.22199,1.216564,1.205986,1.204022,1.174335
"(5.6e-08, 0.000466]",54328.340399,6e-05,0.046644,0.099444,0.137528,1.100556,1.251164,1.221318,1.201822,1.213521,1.213174,1.189737,1.212871,1.183618
"(0.000466, 0.00379]",55713.870438,0.001938,0.038457,0.072648,0.101784,1.056119,1.225395,1.21254,1.178176,1.173523,1.195828,1.162011,1.175164,1.171222
"(0.00379, 0.0102]",56242.428814,0.006644,0.036466,0.069184,0.087717,1.054752,1.197335,1.208298,1.178215,1.158998,1.176728,1.16998,1.172332,1.172859
"(0.0102, 0.0236]",57251.515529,0.015937,0.023408,0.045566,0.079703,1.048517,1.174601,1.186735,1.185521,1.173122,1.173364,1.169687,1.167454,1.188733
"(0.0236, 0.0625]",58577.015189,0.039091,0.021788,0.043701,0.004793,1.008651,1.165848,1.195076,1.173926,1.157725,1.170193,1.181117,1.165156,1.171004
"(0.0625, 0.205]",59350.63353,0.11777,-0.007937,-0.048625,-0.113072,0.945136,1.132263,1.177443,1.169817,1.162963,1.185592,1.191166,1.173365,1.175545
"(0.205, 0.453]",61933.97852,0.379414,-0.030501,-0.168517,-0.266185,0.838642,1.086715,1.108028,1.134426,1.170932,1.136396,1.196443,1.168301,1.187897


为了按照每年进行分组计算，先添加year字段

In [49]:
# 进行复制操作，避免破坏数据
res1 = result_exf.copy()
res1 = res1.reset_index().set_index('date')[:'2008'] # 筛选出到08年的数据
res1.exf_to_mv = mstats.winsorize(res1.exf_to_mv, limits=[0.05, 0.05]) 

In [50]:
# 如果exf_to_mv中有异常值，则drop这一行数据
res1 = res1.dropna(subset=['exf_to_mv'])
# res1 = res1.dropna()

In [51]:
#重置index, 并添加year字段
res1 = res1.reset_index()
res1 = res1.assign(year = res1.date.dt.year)

In [62]:
res1.date.iloc[0]

Timestamp('1987-12-31 00:00:00')

In [52]:
res1.head()

Unnamed: 0,date,permno,mv,retx_p1,exf,exf_to_mv,car1,car3,car5,wr5,ret_n1,ret_n2,ret_n3,ret_n4,ret_n5,ret_n6,ret_n7,ret_n8,year
0,1987-12-31,10002,11750.0,0.808082,-0.013582,-9.341005e-07,-0.335878,-0.376648,-0.885846,0.503963,0.8375,0.731343,0.816327,1.2,1.499999,1.450353,1.039473,1.092129,1987
1,1988-12-31,10002,9840.625,0.8375,0.00493,4.195653e-07,-0.352979,-0.369345,-0.494632,0.759094,0.731343,0.816327,1.2,1.499999,1.450353,1.039473,1.092129,0.955922,1988
2,1989-12-31,10002,7196.875,0.731343,0.006905,7.017218e-07,0.127311,0.065773,0.458322,1.260865,0.816327,1.2,1.499999,1.450353,1.039473,1.092129,0.955922,1.96296,1989
3,1990-12-31,10002,5875.0,0.816327,-0.002175,-3.02171e-07,-0.253274,-0.137564,-0.538593,0.846217,1.2,1.499999,1.450353,1.039473,1.092129,0.955922,1.96296,0.951829,1990
4,1991-12-31,10002,7050.0,1.2,-0.001615,-2.748649e-07,0.098247,0.506804,-0.329636,0.877482,1.499999,1.450353,1.039473,1.092129,0.955922,1.96296,0.951829,0.820407,1991


In [53]:
def group_calc(g):
#     g.exf_to_mv = mstats.winsorize(g.exf_to_mv, limits=[0.05, 0.05])
    per = np.linspace(0,1,11).tolist()
    bins = g.describe(percentiles=per).exf_to_mv.iloc[4:-1].tolist()
    bins = [bins[0]-1] + bins[1:]
    res = g.groupby(pd.cut(g.exf_to_mv,bins, right=True)).mean().\
    drop(['mv', 'retx_p1', 'exf', 'permno', 'year'], axis=1)
    res.index = list(range(1,11))
    res.index.name = 'decile'
    return res

In [47]:
# year_list = res1.year.sort_values().unique().tolist()

In [54]:
# 按年分组，然后对每一组数据，
res2 = res1.groupby('year').apply(group_calc)

In [55]:
res2

Unnamed: 0_level_0,Unnamed: 1_level_0,exf_to_mv,car1,car3,car5,wr5,ret_n1,ret_n2,ret_n3,ret_n4,ret_n5,ret_n6,ret_n7,ret_n8
year,decile,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1973,1,1.490950e-02,0.140180,-0.353762,-0.244304,0.829888,0.783673,0.822917,1.215189,1.052083,1.445544,,,
1973,2,2.772924e-02,-0.111225,0.629499,-0.414555,0.798857,0.643581,2.025243,1.433643,0.808768,1.089435,1.484961,1.466641,
1973,3,5.078518e-02,0.116064,10.731742,23.111763,12.473473,0.853333,6.117179,2.379310,1.310993,1.543136,0.796611,0.784474,1.136963
1973,4,,,,,,,,,,,,,
1973,5,7.666662e-02,0.289559,-0.400718,-1.060146,0.551640,1.000001,1.391304,0.937500,1.066667,0.937501,1.133335,1.147060,
1973,6,7.868620e-02,0.042125,0.042125,0.042125,1.056841,0.783227,,,,,,,
1973,7,,,,,,,,,,,,,
1973,8,1.097972e-01,-0.398057,-0.988046,-1.561999,0.227510,0.363637,1.322582,0.585366,1.541664,1.059937,1.439725,1.782746,1.022196
1973,9,2.787971e-01,-0.054490,0.338521,-0.044877,0.966168,0.762954,1.814968,1.111983,0.919689,0.904982,1.197566,1.395591,1.261297
1973,10,6.787886e-01,-0.359616,-0.726670,-0.908424,0.546756,0.289903,1.500002,1.148147,1.516128,1.447677,1.999913,1.981451,2.279778


In [56]:
res2.groupby('decile').mean()

Unnamed: 0_level_0,exf_to_mv,car1,car3,car5,wr5,ret_n1,ret_n2,ret_n3,ret_n4,ret_n5,ret_n6,ret_n7,ret_n8
decile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,-0.04638,0.05832,0.187645,0.31618,1.172453,1.220058,1.206037,1.215133,1.189342,1.188221,1.177922,1.157732,1.187488
2,-0.009271,0.03231,0.121786,0.16664,1.112564,1.196416,1.221533,1.190604,1.162454,1.176945,1.194847,1.184936,1.166463
3,0.000503,0.04832,0.416327,0.877436,1.464626,1.219065,1.338685,1.219173,1.195415,1.218704,1.167397,1.193052,1.185462
4,0.000531,0.041132,0.086031,0.11943,1.081335,1.229043,1.20844,1.186116,1.192529,1.196802,1.178373,1.188573,1.160457
5,0.004791,0.041275,0.057708,0.047096,1.039136,1.210775,1.214204,1.189225,1.175196,1.189708,1.173848,1.20893,1.175717
6,0.009279,0.037188,0.07235,0.099338,1.062136,1.198377,1.211191,1.182769,1.177751,1.181094,1.17478,1.174705,1.169574
7,0.016491,0.018401,0.057591,0.067558,1.045496,1.18584,1.205384,1.189865,1.176063,1.179972,1.190878,1.179734,1.199814
8,0.042272,0.003651,-0.00264,0.000527,1.00077,1.152291,1.199334,1.160026,1.189256,1.180303,1.186776,1.195566,1.182571
9,0.12173,-0.000225,-0.014239,-0.044608,0.972452,1.148714,1.189835,1.173852,1.177046,1.176689,1.202299,1.195981,1.196653
10,1.343707,-0.034549,-0.161582,-0.280415,0.843247,1.099028,1.146372,1.132333,1.171036,1.156306,1.229085,1.205042,1.212562


In [65]:
test2 = res1[res1.year == 1973]
group_calc(test2)

Unnamed: 0_level_0,exf_to_mv,car1,car3,car5,wr5,ret_n1,ret_n2,ret_n3,ret_n4,ret_n5,ret_n6,ret_n7,ret_n8
decile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,0.014909,0.14018,-0.353762,-0.244304,0.829888,0.783673,0.822917,1.215189,1.052083,1.445544,,,
2,0.027729,-0.111225,0.629499,-0.414555,0.798857,0.643581,2.025243,1.433643,0.808768,1.089435,1.484961,1.466641,
3,0.050785,0.116064,10.731742,23.111763,12.473473,0.853333,6.117179,2.37931,1.310993,1.543136,0.796611,0.784474,1.136963
4,,,,,,,,,,,,,
5,0.076667,0.289559,-0.400718,-1.060146,0.55164,1.000001,1.391304,0.9375,1.066667,0.937501,1.133335,1.14706,
6,0.078686,0.042125,0.042125,0.042125,1.056841,0.783227,,,,,,,
7,,,,,,,,,,,,,
8,0.109797,-0.398057,-0.988046,-1.561999,0.22751,0.363637,1.322582,0.585366,1.541664,1.059937,1.439725,1.782746,1.022196
9,0.278797,-0.05449,0.338521,-0.044877,0.966168,0.762954,1.814968,1.111983,0.919689,0.904982,1.197566,1.395591,1.261297
10,0.678789,-0.359616,-0.72667,-0.908424,0.546756,0.289903,1.500002,1.148147,1.516128,1.447677,1.999913,1.981451,2.279778


In [61]:
test2

Unnamed: 0,date,permno,mv,retx_p1,exf,exf_to_mv,car1,car3,car5,wr5,ret_n1,ret_n2,ret_n3,ret_n4,ret_n5,ret_n6,ret_n7,ret_n8,year
15123,1973-12-31,13370,21804.0,0.383333,3633.997757,0.076667,0.289559,-0.400718,-1.060146,0.55164,1.000001,1.391304,0.9375,1.066667,0.937501,1.133335,1.14706,,1973
45835,1973-06-30,36003,188263.0,0.421768,74921.227314,0.278797,-0.05449,0.338521,-0.044877,0.966168,0.762954,1.814968,1.111983,0.919689,0.904982,1.197566,1.395591,1.261297,1973
52831,1973-12-31,41486,32406.0,0.1875,11968.119832,0.109797,-0.398057,-0.988046,-1.561999,0.22751,0.363637,1.322582,0.585366,1.541664,1.059937,1.439725,1.782746,1.022196,1973
66313,1973-12-31,49913,13474.125,0.637363,1480.662765,0.078686,0.042125,0.042125,0.042125,1.056841,0.783227,,,,,,,,1973
74927,1973-07-31,54893,38872.0,0.573333,1793.308567,0.027729,-0.111225,0.629499,-0.414555,0.798857,0.643581,2.025243,1.433643,0.808768,1.089435,1.484961,1.466641,,1973
100253,1973-12-31,70077,7256.25,0.237676,1277.501168,0.050785,0.116064,10.731742,23.111763,12.473473,0.853333,6.117179,2.37931,1.310993,1.543136,0.796611,0.784474,1.136963,1973
103387,1973-10-31,73235,24820.0,1.454545,7897.280703,0.678789,-0.359616,-0.72667,-0.908424,0.546756,0.289903,1.500002,1.148147,1.516128,1.447677,1.999913,1.981451,2.279778,1973
117643,1973-10-31,76989,194070.625,0.772872,3672.968276,0.014909,0.14018,-0.353762,-0.244304,0.829888,0.783673,0.822917,1.215189,1.052083,1.445544,,,,1973


In [59]:
test.describe(percentiles=per1)

Unnamed: 0,permno,ret_p1,retx_p1,dret_p1,mv,mv_adj
count,26.0,26.0,26.0,26.0,26.0,26.0
mean,10002.0,1.03377,1.012388,1.118589,89033.847981,101336.55987
std,0.0,0.383126,0.369814,0.259993,89206.419254,92222.988526
min,10002.0,0.195867,0.194444,0.581334,5875.0,9254.062732
0%,10002.0,0.195867,0.194444,0.581334,5875.0,9254.062732
10%,10002.0,0.610229,0.60274,0.851979,8518.75,14601.746149
20%,10002.0,0.808082,0.79918,0.921717,12978.0,20748.170292
30.0%,10002.0,0.828954,0.816534,0.942202,30446.2225,36435.659767
40%,10002.0,0.930313,0.928571,1.041902,39361.875,51946.690866
50%,10002.0,0.997698,0.976545,1.100376,48852.855,56820.038868


In [68]:
test3 = result_exf.loc[77418]

In [71]:
test3 = test3.reset_index().set_index('date')

In [72]:
test3

Unnamed: 0_level_0,permno,mv,retx_p1,exf,exf_to_mv,car1,car3,car5,wr5,ret_n1,ret_n2,ret_n3,ret_n4,ret_n5,ret_n6,ret_n7,ret_n8
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1992-06-30,77418,73993.5,0.885246,,,1.485581,11.499109,30.486357,13.248084,2.741783,1.540542,3.087717,1.988631,1.27143,3.823007,4.185493,0.957955
1993-06-30,77418,218004.0,2.740738,15207.21,0.205521,0.494495,7.960438,43.39794,17.811951,1.540542,3.087717,1.988631,1.27143,3.823007,4.185493,0.957955,0.922413
1994-06-30,77418,412965.0,1.540542,77120.78,0.353759,1.91357,5.912793,121.919548,41.619246,3.087717,1.988631,1.27143,3.823007,4.185493,0.957955,0.922413,0.408099
1995-06-30,77418,1652420.0,3.087717,377301.0,0.913639,0.768163,7.564375,35.965321,13.885897,1.988631,1.27143,3.823007,4.185493,0.957955,0.922413,0.408099,1.373283
1996-06-30,77418,3935138.0,1.988631,649084.0,0.392808,-0.050407,18.249801,16.001558,9.100687,1.27143,3.823007,4.185493,0.957955,0.922413,0.408099,1.373283,1.081156
1997-06-30,77418,5443073.0,1.27143,439823.2,0.111768,2.520221,13.598321,4.597881,4.922155,3.823007,4.185493,0.957955,0.922413,0.408099,1.373283,1.081156,0.901661
1998-06-30,77418,22728240.0,3.822978,1919487.0,0.352648,2.96919,2.55136,0.903554,1.772812,4.185493,0.957955,0.922413,0.408099,1.373283,1.081156,0.901661,1.263697
1999-06-30,77418,119515800.0,4.185493,24386900.0,1.072978,-0.133864,-0.379197,-0.537914,0.498832,0.957955,0.922413,0.408099,1.373283,1.081156,0.901661,1.263697,0.767318
2000-06-30,77418,121650700.0,0.957955,7159994.0,0.059908,0.058645,-0.363463,-0.549944,0.478177,0.922413,0.408099,1.373283,1.081156,0.901661,1.263697,0.767318,0.620778
2001-12-31,77418,136599900.0,0.922413,24387660.0,0.200473,-0.37636,-0.532187,-0.725796,0.487503,0.408099,1.373283,1.081156,0.901661,1.263697,0.767318,0.620778,1.341498
