In [12]:

import pandas as pd
import numpy as np
from pandas.tseries.offsets import MonthEnd
from numpy_ext import rolling_apply
from scipy import stats
%config IPCompleter.use_jedi = False
from copy import copy

In [13]:
#fdm_orig = pd.read_csv('Fundamental Data (With Tickers).csv')
fdm_orig = pd.read_csv('fundamentals2010c.csv')

#crsp_raw = pd.read_csv('CRSP stock price421.csv')
crsp_raw = pd.read_csv('stockdata.csv')
crsp_raw = crsp_raw.drop(columns = 'Unnamed: 0')
crsp_raw

In [14]:
%%time
price_orig = pd.read_csv('stockdata.csv')

CPU times: user 85.2 ms, sys: 25.2 ms, total: 110 ms
Wall time: 115 ms


In [716]:
ff = pd.read_csv('F-F_Research_Data_5_Factors_2x3_daily.CSV', skiprows=2)
ff = ff.rename({'Unnamed: 0': 'Date'}, axis=1)
ff['Date'] = pd.to_datetime(ff['Date'], format='%Y%m%d')

In [15]:
def clean_data(df, type_dict):
    print('Cleaning date variables:')
    for v in type_dict['date_vars']:
        print(v)
        df[v] = pd.to_datetime(df[v], format = '%Y%m%d', errors = 'coerce')
        
    print('Cleaning numeric variables:')
    for v in type_dict['float_vars']:
        print(v)
        df[v] = pd.to_numeric(df[v], errors = 'coerce')
    
    print('Cleaning integer variables:')
    for v in type_dict['int_vars']:
        print(v)
        df[v] = pd.to_numeric(df[v], downcast = 'signed', errors = 'coerce')
        
    print('Final data types:')
    print(df.dtypes)
    
    return df

compustat_raw = pd.read_csv('fundamentals2010c.csv')
crsp_datatypes = {'date_vars': ['date'],
                 'float_vars': ['PRC', 'DIVAMT', 'BID', 'ASK', 'CFACPR', 'CFACSHR', "RET"],
                 'int_vars': ['SHROUT', 'VOL']}
crsp = clean_data(copy(crsp_raw), crsp_datatypes)

# Choose the right variables
crsp_names = {
              "RET" : "Return",
              'SHROUT': 'Shares Outstanding on Trading Day', 
              'COMNAM': 'Company Name',\
              'date': 'datadate', 
              'NCUSIP': 'cusip',
              "TICKER": "Ticker",
              'DIVAMT': 'Dividend Cash Amount',
              'PRC': 'Price',
              'BID': 'Bid',
              'ASK': 'Ask',
              'VOL': 'Volume on Trading Day',
              'CFACPR': 'Price Adjustment Factor',
              'CFACSHR': 'Share Adjustment Factor'}

crsp = crsp.rename(index = str, columns = crsp_names)
crsp = crsp[list(crsp_names.values())]

NameError: name 'crsp_raw' is not defined

In [10]:
# Select columns
fdm = fdm_orig[['datadate', 'tic', 'Current Assets', 'Total Assets',
                'Book Value Per Share', 'Cash',
                'Common shares outstanding', 'Current debt changes',
                'Depreciation and Amortization', 'Total Debt', 'Dividends',
                'Income Before Extraordinary Items', 'Current Liabilities', 'Sales',
                'Income Taxes Payable', 
                'Common Shares Traded - Annual - Fiscal',
                'Dividends per Share - Pay Date - Fiscal']].copy()

price = price_orig[['date', 'TICKER', 'PRC', 'SHROUT', 'VOL']].copy()

KeyError: "['Current Assets', 'Common shares outstanding', 'Book Value Per Share', 'Sales', 'Current Liabilities', 'Dividends per Share - Pay Date - Fiscal', 'Depreciation and Amortization', 'Total Assets', 'Total Debt', 'Income Before Extraordinary Items', 'Income Taxes Payable', 'Cash', 'Dividends', 'Common Shares Traded - Annual - Fiscal', 'Current debt changes'] not in index"

In [718]:
%%time
price = price.sort_values('date')
price['date'] = pd.to_datetime(price['date'])
price = price.rename({'TICKER':'tic'}, axis=1)
price = price.sort_values(['tic', 'date'])
price = price.reset_index(drop=True)

fdm['date'] = pd.to_datetime(fdm['datadate'])
fdm = fdm.drop('datadate', axis=1)
fdm = fdm.sort_values(['tic', 'date'])
fdm = fdm.reset_index(drop=True)

CPU times: user 9.39 s, sys: 396 ms, total: 9.79 s
Wall time: 10 s


In [719]:
price.PRC = price.PRC.astype(np.float64)
price.SHROUT = price.SHROUT.astype(np.float64)
price = price[(price.tic.notna()) & (price.PRC >= 0)]
fdm = fdm[(fdm.tic.notna()) & (fdm['Book Value Per Share'] >= 0)]
fdm = fdm.fillna(0)
price = price.fillna(0)

#### Get monthly features for price

In [720]:
price1 = price.copy()

In [721]:
price['date_eom'] = price.date - pd.Timedelta('1 days') + MonthEnd(1)

In [722]:
price1['date_eom'] = price1.date - pd.Timedelta('1 days') + MonthEnd(1)
price1['m_1'] = price1.date - pd.Timedelta('1 days') + MonthEnd(-1)
price1['m_2'] = price1.date - pd.Timedelta('1 days') + MonthEnd(-2)
price1['m_12'] = price1.date - pd.Timedelta('1 days') + MonthEnd(-12)
price1['m_13'] = price1.date - pd.Timedelta('1 days') + MonthEnd(-13)
# price1['m_36'] = price1.date - pd.Timedelta('1 days') + MonthEnd(-36)

In [None]:
%%time
# get price of the end of current month
price_monthly = price1.groupby(['tic', 'date_eom']).apply(lambda x: x.tail(1))
price_monthly = price_monthly.reset_index(level=[0,1], drop=True)

In [None]:
price_monthly_agg = price_monthly.copy()

In [None]:
%%time
for i in [1, 2, 12, 13]:
    price_monthly_agg = pd.merge(price_monthly_agg, price_monthly[['tic', 'date_eom', 'PRC', 'SHROUT', 'VOL']], 
                                 left_on=['tic', f'm_{i}'], right_on=['tic', 'date_eom'], 
                                 suffixes=['', f'_{i}']).drop(f'date_eom_{i}', axis=1)

In [None]:
price_monthly_agg['log_size'] = np.log1p(price_monthly_agg.PRC_1*price_monthly_agg.SHROUT_1)
price_monthly_agg['return'] = (price_monthly_agg.PRC_2 - price_monthly_agg.PRC_12) / price_monthly_agg.PRC_12
# price_monthly_agg['log_issues_36'] = np.log1p(price_monthly_agg.SHROUT_1/price_monthly_agg.SHROUT_36)
price_monthly_agg['log_issues_12'] = np.log1p(price_monthly_agg.SHROUT_1/price_monthly_agg.SHROUT_12)
# price_monthly_agg['log_return'] = np.log1p(price_monthly_agg.PRC_13/price_monthly_agg.PRC_36)
price_monthly_agg['turnover'] = price_monthly_agg.VOL / price_monthly_agg.SHROUT

In [501]:
price['year'] = price.date.dt.year
prc_std = price.groupby(['tic', 'date_eom'])['PRC'].std()
prc_std = prc_std.reset_index(level=[0,1])
prc_std = prc_std.rename({'PRC':'stddev'}, axis=1)

In [506]:
price_monthly_agg = pd.merge(price_monthly_agg, prc_std, on=['tic', 'date_eom'])

#### Beta

In [508]:
ff['date'] = ff.Date - pd.Timedelta('1 days') + MonthEnd(1)
ff = ff.groupby(['date']).apply(lambda x: x.tail(1))
ff = ff.reset_index(drop=True)

In [509]:
%%time
price_beta = price1.groupby(['tic', 'date_eom']).apply(lambda x: x.tail(1))

CPU times: user 1min 52s, sys: 2.74 s, total: 1min 55s
Wall time: 2min 4s


In [510]:
price_beta = price_beta.reset_index(level=[0,1], drop=True)

In [511]:
price_beta = pd.merge(price_beta, price_beta[['tic', 'date_eom', 'PRC', 'SHROUT', 'VOL']], 
                      left_on=['tic', f'm_1'], right_on=['tic', 'date_eom'], 
                      suffixes=['', f'_1']).drop(f'date_eom_1', axis=1)

In [512]:
price_beta['return'] = (price_beta.PRC - price_beta.PRC_1) / price_beta.PRC_1

In [513]:
price_beta = pd.merge(price_beta, ff[['date', 'RF', 'Mkt-RF']], left_on='date_eom', right_on='date', how='left') \
    .drop('date_y', axis=1)

In [514]:
price_beta['RF'] = price_beta['RF'].bfill().ffill()
price_beta['Mkt-RF'] = price_beta['Mkt-RF'].bfill().ffill()

In [515]:
def get_beta(x, window=36):
    def beta(r, rf, mkt_return):
        excess_returns = r - rf
        excess_market = mkt_return - rf
        beta, alpha, _, _, _ = stats.linregress(excess_returns, excess_market)
        return beta

    if len(x) < window:
        betas = np.nan
    else:
        betas = rolling_apply(beta, window, x['return'].shift(1).values, x['RF'].values, x['Mkt-RF'].values)

    df = pd.DataFrame({
        'tic': x['tic'],
        'date': x['date_eom'],
        'beta': betas
    })
    return df

In [516]:
%%time
df_list = []
for tic, data in price_beta.groupby('tic'):
    df_list.append(get_beta(data))

CPU times: user 33.1 s, sys: 303 ms, total: 33.4 s
Wall time: 35.4 s


In [517]:
beta_df = pd.concat(df_list)

In [518]:
price_monthly_agg = pd.merge(price_monthly_agg, beta_df, on=['tic', 'date'], how='left').bfill().ffill()

#### Calculations

In [519]:
fdm['date_eom'] = fdm.date - pd.Timedelta('1 days') + MonthEnd(1)
fdm['year'] = fdm['date'].dt.year

In [521]:
price_monthly_agg['year'] = price_monthly_agg['date'].dt.year
price_monthly_agg['year_1'] = price_monthly_agg['year'] - 1

In [523]:
df = pd.merge(price_monthly_agg.drop('date',axis=1), fdm, 
              left_on=['tic', 'year_1'], right_on=['tic', 'year'], suffixes=['', '_y'],
              how='inner').drop(['date_eom_y', 'year_y'], axis=1)
df['logb/m'] = np.log1p(df['Book Value Per Share'] / df['PRC_1'])

In [524]:
df = pd.merge(df, fdm[['tic','year', 'Total Assets']], 
              left_on=['tic', 'year'], right_on=['tic', 'year'], suffixes=['', '_curr'],
              how='inner')
df['logAG'] = np.log1p(df['Total Assets_curr']/df['Total Assets'])

In [525]:
%%time
# DY
div = price_orig[['date', 'TICKER', 'DIVAMT']]
div = div[div.TICKER.notna()].fillna(0)
div.date = pd.to_datetime(div.date)
div = div.rename({'TICKER':'tic'}, axis=1)
div['date_eom'] = div.date - pd.Timedelta('2 days') + MonthEnd(1)
div['year'] = div.date.apply(lambda x: x.year)
div_by_year = div.groupby(['tic', 'year'])['DIVAMT'].sum()
div_by_year = div_by_year.reset_index(level=[0,1])
df = pd.merge(df, div_by_year, left_on=['tic', 'year'], right_on=['tic', 'year'], suffixes=['', '_2'], how='left')
df['DY'] = df['DIVAMT'] / df['PRC_1']

CPU times: user 13 s, sys: 663 ms, total: 13.6 s
Wall time: 14.3 s


In [526]:
df['debt/price'] = df['Total Debt'] / (df['PRC_1'] * df['SHROUT_1'])

In [527]:
df['sales/price'] = df['Sales'] / (df['PRC_1'] * df['SHROUT_1'])

In [528]:
df['avg total assets'] = df.groupby('tic').apply(lambda x: x['Total Assets'].rolling(2, min_periods=1).mean()).values

In [529]:
df['ROA'] = df['Income Before Extraordinary Items'] / df['avg total assets']

In [530]:
delta_ca = df.groupby('tic').apply(lambda x: x['Current Assets'].diff().fillna(0)).values
delta_cash = df.groupby('tic').apply(lambda x: x['Cash'].diff().fillna(0)).values
delta_cl = df.groupby('tic').apply(lambda x: x['Current Liabilities'].diff().fillna(0)).values
delta_debt = df.groupby('tic').apply(lambda x: x['Total Debt'].diff().fillna(0)).values
delta_itp = df.groupby('tic').apply(lambda x: x['Income Taxes Payable'].diff().fillna(0)).values

In [531]:
df['Accruals'] = delta_ca - delta_cash - (delta_cl-delta_debt-delta_itp) - df['Depreciation and Amortization']

In [532]:
df['return_pct'] = (df['PRC'] - df['PRC_1']) / df['PRC_1']

In [533]:
df['mktval'] = df['PRC'] * df['SHROUT']

In [534]:
df.to_csv('final_df.csv', index=False)

In [1]:
import pandas as pd
import numpy as np
from pandas.tseries.offsets import MonthEnd
from numpy_ext import rolling_apply
from scipy import stats
%config IPCompleter.use_jedi = False

df = pd.read_csv('final_df.csv')

In [2]:
df.rename({
    'logb/m':'logbm',
    'debt/price':'debt_price',
    'sales/price':'sales_price',
    'return':'Return'
}, inplace=True, axis=1)

In [3]:
columns = ['return_pct', 'log_size', 'logbm', 'Return', 'log_issues_12', 
           'ROA', 'logAG', 'DY', 'beta', 'stddev', 'turnover', 'debt_price', 'sales_price', 'mktval']

In [4]:
mktval = df['mktval']

In [5]:
df_by_tic = df.groupby('tic').mean()

In [6]:
mktval = df_by_tic['mktval']
df_all_but_tiny = df_by_tic[df_by_tic.mktval >= np.percentile(mktval, q=20)]
df_large = df_by_tic[df_by_tic.mktval >= np.median(mktval)]

In [7]:
df_by_tic[columns].describe().transpose()[['mean', 'std', 'count']]

Unnamed: 0,mean,std,count
return_pct,0.01351435,0.02243466,473.0
log_size,15.65765,1.636699,473.0
logbm,0.3835821,0.2948106,473.0
Return,0.1410832,0.2622649,473.0
log_issues_12,0.7271252,0.05571943,473.0
ROA,0.04859831,0.07444163,473.0
logAG,0.7490607,0.0643981,473.0
DY,0.02941159,0.05507465,473.0
beta,0.5577607,1.311763,473.0
stddev,4.484879,4.505702,473.0


In [8]:
df_all_but_tiny[columns].describe().transpose()[['mean', 'std', 'count']]

Unnamed: 0,mean,std,count
return_pct,0.01238581,0.0212463,378.0
log_size,16.22687,1.103515,378.0
logbm,0.34898,0.1926366,378.0
Return,0.1350418,0.25444,378.0
log_issues_12,0.7260021,0.05305018,378.0
ROA,0.0541795,0.06275628,378.0
logAG,0.7458702,0.05804201,378.0
DY,0.02721167,0.04241666,378.0
beta,0.5751616,1.347316,378.0
stddev,4.857237,4.747069,378.0


In [9]:
df_large[columns].describe().transpose()[['mean', 'std', 'count']]

Unnamed: 0,mean,std,count
return_pct,0.01178677,0.0250005,237.0
log_size,16.83675,0.8976899,237.0
logbm,0.3474341,0.1917301,237.0
Return,0.1294438,0.3021852,237.0
log_issues_12,0.7253242,0.05596458,237.0
ROA,0.06223174,0.0481376,237.0
logAG,0.7414214,0.04909288,237.0
DY,0.03143115,0.04770205,237.0
beta,0.6617118,1.416672,237.0
stddev,4.755393,4.041653,237.0


In [10]:
df_large_full = df[df.tic.isin(df_large.index)].copy()
df_abt_full = df[df.tic.isin(df_all_but_tiny.index)].copy()

In [11]:
# df = df_abt_full.copy()

## Model

Model 1 includes `size, B/M, and past 12-month stock returns`

Model 2 adds `three-year share issuance` and `one-year accruals`, `profitability`, and `asset growth`

Model 3 includes eight additional characteristics that have a weaker relation historically to subsequent returns, including `beta, dividend yield, one-year share issuance, three-year stock returns, 12-month volatility, 12-month turnover, market leverage, and the sales-to-price ratio`

In [12]:
import statsmodels.formula.api as smf
from linearmodels import FamaMacBeth

In [13]:
# def ols_coef(x,formula):
#     params = smf.ols(formula,data=x).fit().params
#     return params

In [14]:
df['target_return'] = (df['PRC'] - df['PRC_1'])/df['PRC_1']

In [15]:
df_train = df[df.date_eom < '2013-10-31'].copy()

In [16]:
df_test = df[df.date_eom >= '2013-10-31'].copy()

#### Linear model

In [17]:
# df['Intercept'] = 1

In [18]:
ols_1 = ['log_size', 'logbm', 'Return']
ols_2 = ols_1 + ['ROA', 'logAG']
ols_3 = ols_2 + ['DY', 'log_issues_12', 'beta', 'turnover', 'stddev', 'debt_price', 'sales_price']

In [19]:
# df.date_eom = pd.to_datetime(df.date_eom)

In [20]:
# df_fm = df.set_index(['tic','date_eom'])

In [21]:
# model1 = FamaMacBeth(df_fm.target_return, df_fm[ols_1])
# model2 = FamaMacBeth(df_fm.target_return, df_fm[ols_2])
# model3 = FamaMacBeth(df_fm.target_return, df_fm[ols_3])

In [22]:
# result1 = model1.fit()
# result1.summary         

In [23]:
# result2 = model2.fit()
# result2.summary         

In [24]:
# result3 = model3.fit()
# result3.summary         

In [25]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
from sklearn.preprocessing import StandardScaler

In [26]:
def ols_coef(x, formula, return_value='slope'):
    result = smf.ols(formula, data=x).fit()
    if return_value == 'slope':
        return result.params
    if return_value == 'tvalues':
        return result.tvalues
    if return_value == 'r2':
        return result.rsquared

formula1 = 'target_return ~ 1 + log_size + logbm + Return'
formula2 = formula1 + '+ ROA + logAG'
formula3 = formula2 + '+ DY  + log_issues_12 + beta + turnover + stddev + debt_price + sales_price'

In [27]:
def fm_summary(slopes, tvals, rsq):
    ret = pd.DataFrame(slopes.mean(), columns=['slope'])
    tvals = tvals
    ret['tstat'] = tvals.mean()
    ret['R2'] = rsq.mean()
    return ret

In [28]:
# scaler = StandardScaler()
# df[columns] = scaler.fit_transform(df[columns])

by = 'year'

slope1 = df.groupby(by).apply(ols_coef, formula1)
slope2 = df.groupby(by).apply(ols_coef, formula2)
slope3 = df.groupby(by).apply(ols_coef, formula3)

tval1 = df.groupby(by).apply(ols_coef, formula1, 'tvalues')
tval2 = df.groupby(by).apply(ols_coef, formula2, 'tvalues')
tval3 = df.groupby(by).apply(ols_coef, formula3, 'tvalues')

rsq1 = df.groupby(by).apply(ols_coef, formula1, 'r2')
rsq2 = df.groupby(by).apply(ols_coef, formula2, 'r2')
rsq3 = df.groupby(by).apply(ols_coef, formula3, 'r2')

In [29]:
fm_summary(slope1, tval1, rsq1)

Unnamed: 0,slope,tstat,R2
Intercept,0.047016,2.813366,0.012301
log_size,-0.003819,-2.93425,0.012301
logbm,0.068699,4.182077,0.012301
Return,-0.001879,-0.594745,0.012301


In [30]:
fm_summary(slope2, tval2, rsq2)

Unnamed: 0,slope,tstat,R2
Intercept,-0.003041,0.538521,0.014866
log_size,-0.003735,-2.669153,0.014866
logbm,0.07348,4.443534,0.014866
Return,-0.003337,-0.939788,0.014866
ROA,0.017157,-0.144868,0.014866
logAG,0.061632,2.833797,0.014866


In [31]:
fm_summary(slope3, tval3, rsq3)

Unnamed: 0,slope,tstat,R2
Intercept,-0.012031,0.25222,0.099479
log_size,-0.002431,-2.105139,0.099479
logbm,0.04926,3.294468,0.099479
Return,-0.003359,-1.304455,0.099479
ROA,0.013749,0.233247,0.099479
logAG,0.058299,2.770075,0.099479
DY,0.025576,1.046715,0.099479
log_issues_12,-0.004568,-0.564141,0.099479
beta,-0.000591,-0.549656,0.099479
turnover,9.7e-05,1.037746,0.099479


## Rolling vs Cumulative

In [32]:
from sklearn.metrics import mean_squared_error
from statsmodels.regression.linear_model import OLS
from statsmodels.api import add_constant

In [33]:
def backtest(df, slopes, test_dates, model, rolling_window=5, method='rolling'):
    slope_df = pd.DataFrame()
    if method == 'rolling':
        rolling_slope = slopes.rolling(window=rolling_window).mean()
        rolling_slope = rolling_slope.reset_index()
#         rolling_slope.date_eom = pd.to_datetime(rolling_slope.date_eom) - pd.Timedelta('1 days') + MonthEnd(1)
        slope_df = rolling_slope.copy()
    elif method == 'cumulative':
        cum_slope = slopes.divide(pd.Series(np.arange(1, len(slopes)+1), slopes.index), axis=0)
        cum_slope = cum_slope.reset_index()
#         cum_slope.date_eom = pd.to_datetime(cum_slope.date_eom) - pd.Timedelta('1 days') + MonthEnd(1)
        slope_df = cum_slope.copy()
    
    df['last_year'] = df.year - 1
    df = pd.merge(df, slope_df, left_on='last_year', right_on='year', suffixes=['', '_slope'])

    test_df = df[df.date_eom.isin(test_dates)]
    for f in ols_1:
        pred = test_df.Intercept.copy()
        pred += test_df[f+'_slope']*test_df[f]
    mean = pred.mean()
    std = pred.std()
    p10 = np.quantile(pred, q=0.1)
    p90 = np.quantile(pred, q=0.9)
    # predictive result
    pred.name = 'Pred'
    X = add_constant(pred)
    result = OLS(test_df.target_return, X).fit()
    slope = result.params['Pred']
    se = result.bse['Pred']
    tstat = result.tvalues['Pred']
    r2 = result.rsquared
    print(f"{model:^15} | {mean:^8.3f} | {std:^8.3f} | {p10:^8.3f} | {p90:^8.3f} | {slope:^8.3f} | {se:^8.3f} |" +
          f" {tstat:^8.3f} | {r2:^8.3f} ")
    return (mean, std, p10, p90)

In [34]:
test_dates = pd.date_range(start='2011-01-31', end='2013-12-31', freq='1m')

In [35]:
dfs = {
    'All-stocks': df,
    'All-but-tiny-stocks': df_abt_full,
    'Large-stocks': df_large_full
}

In [36]:
for d in dfs:
    dfs[d]['target_return'] = (dfs[d]['PRC'] - dfs[d]['PRC_1'])/dfs[d]['PRC_1']
    dfs[d]['date_eom'] = pd.to_datetime(dfs[d].date_eom)

In [37]:
by = 'year'
slope1 = {m: df.groupby(by).apply(ols_coef, formula1) for m, df in dfs.items()}
slope2 = {m: df.groupby(by).apply(ols_coef, formula2) for m, df in dfs.items()}
slope3 = {m: df.groupby(by).apply(ols_coef, formula3) for m, df in dfs.items()}

In [38]:
for d in dfs:
    print('#'*30)
    print(d)
    print('#'*30)
    print('Rolling 5 year')
    print(f"{' ':^15} | {'Mean':^8} | {'Std':^8} | {'p10':^8} | {'p90':^8} | {'P. Slope':^8} | {'S.E.':^8} |" +
          f" {'t-stat':^8} | {'R2':^8}")
    rolling1 = backtest(dfs[d], slope1[d], test_dates, 'Model 1')
    rolling2 = backtest(dfs[d], slope2[d], test_dates, 'Model 2')
    rolling3 = backtest(dfs[d], slope3[d], test_dates, 'Model 3')

    print()
    print()


    print('Cumulative')
    print(f"{' ':^15} | {'Mean':^8} | {'Std':^8} | {'p10':^8} | {'p90':^8} | {'P. Slope':^8} | {'S.E.':^8} |" +
          f" {'t-stat':^8} | {'R2':^8}")
    cum1 = backtest(dfs[d], slope1[d], test_dates, 'Model 1', method='cumulative')
    cum2 = backtest(dfs[d], slope2[d], test_dates, 'Model 2', method='cumulative')
    cum3 = backtest(dfs[d], slope3[d], test_dates, 'Model 3', method='cumulative')

##############################
All-stocks
##############################
Rolling 5 year
                |   Mean   |   Std    |   p10    |   p90    | P. Slope |   S.E.   |  t-stat  |    R2   
    Model 1     |  0.024   |  0.005   |  0.018   |  0.031   |  1.458   |  0.157   |  9.299   |  0.005   
    Model 2     |  -0.010  |  0.009   |  -0.023  |  -0.001  |  0.925   |  0.092   |  10.084  |  0.006   
    Model 3     |  -0.023  |  0.002   |  -0.025  |  -0.020  |  0.690   |  0.424   |  1.627   |  0.000   


Cumulative
                |   Mean   |   Std    |   p10    |   p90    | P. Slope |   S.E.   |  t-stat  |    R2   
    Model 1     |  0.006   |  0.002   |  0.004   |  0.009   |  -2.023  |  0.412   |  -4.911  |  0.001   
    Model 2     |  0.003   |  0.002   |  0.002   |  0.006   |  -4.814  |  0.510   |  -9.433  |  0.005   
    Model 3     |  0.002   |  0.002   |  0.000   |  0.005   |  -3.742  |  0.369   | -10.135  |  0.006   
##############################
All-but-tiny-stocks
##########