# Task1 : Reproduce Table 1

## 1. merged data

* First, according to the research ideas of the paper, we integrate the factor return data with Fama’s 3-factor and 5-factor models.

In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy.stats import trim_mean

In [2]:
table1_data = pd.read_csv('data/merged_data.csv')
table1_ff3 = pd.read_csv('data/FF3_daily.csv')
table1_ff5 = pd.read_csv('data/FF5_daily.csv')

In [3]:
table1_ff3.columns =['date','Mkt-RF','SMB','HML','RF']
table1_ff5.columns =['date','Mkt-RF','SMB','HML','RMW','CMA','RF']
print(table1_data.columns.tolist())
print(table1_ff3.columns.tolist())
print(table1_ff5.columns.tolist())

table1_ff3.dtypes

['date', 'port01', 'port02', 'port03', 'port04', 'port05', 'portLS', 'predictor']
['date', 'Mkt-RF', 'SMB', 'HML', 'RF']
['date', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']


date        int64
Mkt-RF    float64
SMB       float64
HML       float64
RF        float64
dtype: object

In [4]:
table1_data['date']= pd.to_datetime(table1_data['date']).dt.strftime('%Y-%m-%d')

In [5]:
table1_data

Unnamed: 0,date,port01,port02,port03,port04,port05,portLS,predictor
0,1952-07-01,0.124831,0.336154,0.839240,0.637685,0.656345,0.531514,Accruals
1,1952-07-02,-0.246625,-0.244496,-0.049348,-0.189462,-0.104565,0.142060,Accruals
2,1952-07-03,0.023978,0.084967,-0.142413,-0.223899,-0.035177,-0.059155,Accruals
3,1952-07-07,-0.171018,-0.076185,-0.263179,-0.661418,-0.503844,-0.332827,Accruals
4,1952-07-08,-0.209479,0.235904,-0.025679,0.218441,0.140033,0.349512,Accruals
...,...,...,...,...,...,...,...,...
1237143,2023-12-22,1.857859,0.723824,0.498934,0.233261,0.428393,-1.429466,std_turn
1237144,2023-12-26,3.266919,0.875071,1.303056,0.540725,0.561904,-2.705015,std_turn
1237145,2023-12-27,2.341186,0.200744,0.285204,0.159429,0.059149,-2.282037,std_turn
1237146,2023-12-28,-1.669881,-0.263774,-0.543158,-0.266308,-0.110725,1.559157,std_turn


In [6]:
table1_ff3['date'] = pd.to_datetime(table1_ff3['date'].astype(str), format='%Y%m%d').dt.strftime('%Y-%m-%d')
table1_ff3

Unnamed: 0,date,Mkt-RF,SMB,HML,RF
0,1926-07-01,0.10,-0.25,-0.27,0.009
1,1926-07-02,0.45,-0.33,-0.06,0.009
2,1926-07-06,0.17,0.30,-0.39,0.009
3,1926-07-07,0.09,-0.58,0.02,0.009
4,1926-07-08,0.21,-0.38,0.19,0.009
...,...,...,...,...,...
25896,2024-12-24,1.11,-0.09,-0.05,0.017
25897,2024-12-26,0.02,1.04,-0.19,0.017
25898,2024-12-27,-1.17,-0.66,0.56,0.017
25899,2024-12-30,-1.09,0.12,0.74,0.017


In [7]:
table1_ff5['date'] = pd.to_datetime(table1_ff5['date'].astype(str), format='%Y%m%d').dt.strftime('%Y-%m-%d')
table1_ff5

Unnamed: 0,date,Mkt-RF,SMB,HML,RMW,CMA,RF
0,1963-07-01,-0.67,0.02,-0.35,0.03,0.13,0.012
1,1963-07-02,0.79,-0.28,0.28,-0.08,-0.21,0.012
2,1963-07-03,0.63,-0.18,-0.10,0.13,-0.25,0.012
3,1963-07-05,0.40,0.09,-0.28,0.07,-0.30,0.012
4,1963-07-08,-0.63,0.07,-0.20,-0.27,0.06,0.012
...,...,...,...,...,...,...,...
15476,2024-12-24,1.11,-0.12,-0.05,-0.13,-0.37,0.017
15477,2024-12-26,0.02,1.09,-0.19,-0.44,0.35,0.017
15478,2024-12-27,-1.17,-0.44,0.56,0.41,0.03,0.017
15479,2024-12-30,-1.09,0.24,0.74,0.55,0.14,0.017


In [8]:
table1_ff3.head()
table1_ff3.dtypes

date       object
Mkt-RF    float64
SMB       float64
HML       float64
RF        float64
dtype: object

## 2.Divided the time & dataset

* The data analysis timeline of the paper is from 1963 to 2016. Combined with the data obtained by the team, we divided the research time into three groups: 2000_2016, 2017-2023, and 2000-2023, in order to explore the impact of long, medium and short time on factor regression.

In [9]:
# all period from 2000 Jan 1 to 2023 Dec 31
start_date = '2000-01-01'
end_date = '2023-12-31'

In [10]:
# sample period from 2000 Jan 1 to 2016 Dec 31
start_date_1 = '2000-01-01'
end_date_1 = '2016-12-31'

In [11]:
# post sample period from 2017 Jan 1 to 2023 Dec 31
start_date_2 = '2017-01-01'
end_date_2 = '2023-12-31'

In [121]:
# original sample period from 1963 July 1 to 2016 Dec 31
start_date_3 = '1963-07-01'
end_date_3 = '2016-12-31'

In [122]:
filtered_data_1 = table1_data[(table1_data['date'] >= start_date) & (table1_data['date'] <= end_date)]
filtered_ff3_1 = table1_ff3[(table1_ff3['date'] >= start_date) & (table1_ff3['date'] <= end_date)]
filtered_ff5_1 = table1_ff5[(table1_ff5['date'] >= start_date) & (table1_ff5['date'] <= end_date)]
filtered_data_2 = table1_data[(table1_data['date'] >= start_date_1) & (table1_data['date'] <= end_date_1)]
filtered_ff3_2 = table1_ff3[(table1_ff3['date'] >= start_date_1) & (table1_ff3['date'] <= end_date_1)]
filtered_ff5_2 = table1_ff5[(table1_ff5['date'] >= start_date_1) & (table1_ff5['date'] <= end_date_1)]
filtered_data_3 = table1_data[(table1_data['date'] >= start_date_2) & (table1_data['date'] <= end_date_2)]
filtered_ff3_3 = table1_ff3[(table1_ff3['date'] >= start_date_2) & (table1_ff3['date'] <= end_date_2)]
filtered_ff5_3 = table1_ff5[(table1_ff5['date'] >= start_date_2) & (table1_ff5['date'] <= end_date_2)]
filtered_data_4 = table1_data[(table1_data['date'] >= start_date_3) & (table1_data['date'] <= end_date_3)]
filtered_ff3_4 = table1_ff3[(table1_ff3['date'] >= start_date_3) & (table1_ff3['date'] <= end_date_3)]
filtered_ff5_4 = table1_ff5[(table1_ff5['date'] >= start_date_3) & (table1_ff5['date'] <= end_date_3)]

In [123]:
merged_data_1 = pd.merge(filtered_data_1, filtered_ff3_1, on='date', how='inner')
merged_data_2 = pd.merge(filtered_data_1, filtered_ff5_1, on='date', how='inner')
merged_data_3 = pd.merge(filtered_data_2, filtered_ff3_2, on='date', how='inner')
merged_data_4 = pd.merge(filtered_data_2, filtered_ff5_2, on='date', how='inner')
merged_data_5 = pd.merge(filtered_data_3, filtered_ff3_3, on='date', how='inner')
merged_data_6 = pd.merge(filtered_data_3, filtered_ff5_3, on='date', how='inner')
merged_data_7 = pd.merge(filtered_data_4, filtered_ff3_4, on='date', how='inner')
merged_data_8 = pd.merge(filtered_data_4, filtered_ff5_4, on='date', how='inner')

print(merged_data_1.columns.tolist())
print(merged_data_2.columns.tolist())
print(merged_data_3.columns.tolist())
print(merged_data_4.columns.tolist())
print(merged_data_5.columns.tolist())
print(merged_data_6.columns.tolist())
print(merged_data_7.columns.tolist())
print(merged_data_8.columns.tolist())

['date', 'port01', 'port02', 'port03', 'port04', 'port05', 'portLS', 'predictor', 'Mkt-RF', 'SMB', 'HML', 'RF']
['date', 'port01', 'port02', 'port03', 'port04', 'port05', 'portLS', 'predictor', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']
['date', 'port01', 'port02', 'port03', 'port04', 'port05', 'portLS', 'predictor', 'Mkt-RF', 'SMB', 'HML', 'RF']
['date', 'port01', 'port02', 'port03', 'port04', 'port05', 'portLS', 'predictor', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']
['date', 'port01', 'port02', 'port03', 'port04', 'port05', 'portLS', 'predictor', 'Mkt-RF', 'SMB', 'HML', 'RF']
['date', 'port01', 'port02', 'port03', 'port04', 'port05', 'portLS', 'predictor', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']
['date', 'port01', 'port02', 'port03', 'port04', 'port05', 'portLS', 'predictor', 'Mkt-RF', 'SMB', 'HML', 'RF']
['date', 'port01', 'port02', 'port03', 'port04', 'port05', 'portLS', 'predictor', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']


In [124]:
merged_data_1.to_csv('data/merged_data_with_ff3_1.csv', index=False)
merged_data_2.to_csv('data/merged_data_with_ff5_1.csv', index=False)
merged_data_3.to_csv('data/merged_data_with_ff3_2.csv', index=False)
merged_data_4.to_csv('data/merged_data_with_ff5_2.csv', index=False)
merged_data_5.to_csv('data/merged_data_with_ff3_3.csv', index=False)
merged_data_6.to_csv('data/merged_data_with_ff5_3.csv', index=False)
merged_data_7.to_csv('data/merged_data_with_ff3_4.csv', index=False)
merged_data_8.to_csv('data/merged_data_with_ff5_4.csv', index=False)

In [30]:
accounting_factors = [
    'Accruals', 'AnalystValue', 'AssetGrowth', 'BM', 'BPEBM', 'BookLeverage',
    'CBOperProf', 'CF', 'ChAssetTurnover', 'ChNWC', 'CompEquIss', 'CompositeDebtIssuance',
    'EBM', 'EP', 'EarningsSurprise', 'FirmAge', 'GP', 'Herf', 'InvGrowth',
    'NOA', 'OperProf', 'PS', 'RDAbility', 'RoE', 'SP', 'ShareIss1Y', 'ShareIss5Y',
    'XFIN', 'cfp', 'roaq'
]

return_factors = [
    'Beta', 'BetaLiquidityPS', 'CPVolSpread', 'Coskewness', 'CustomerMomentum',
    'DolVol', 'High52', 'IdioVol3F', 'Illiquidity', 'IntMom', 'LRreversal', 'MaxRet',
    'Mom12m', 'Mom6m', 'Mom6mJunk', 'MomOffSeason', 'MomOffSeason06YrPlus',
    'MomOffSeason11YrPlus', 'MomOffSeason16YrPlus', 'MomSeason', 'MomSeason06YrPlus',
    'MomSeason11YrPlus', 'MomSeason16YrPlus', 'MomSeasonShort', 'RIVolSpread',
    'ResidualMomentum', 'STreversal', 'Size', 'VolMkt', 'VolSD', 'std_turn', 'Frontier'
]

## 3. regression in the ‘all' time period

* In this section, calculations and regression studies will be conducted for the timeline 2000-2023.
* In the first step, we will calculate the first two columns of Table 1 to obtain the average return rate and t value of each factor.
* The second step is to calculate the regression results of each factor and the FF3 factor model, and output the excess return and the corresponding t value.
* The third step is to calculate the regression results of each factor and the FF5 factor model. The overall idea is similar to the second step.

+ <span style="color:red">The Newey-West standard error can effectively deal with heteroscedasticity by adjusting the covariance matrix. At the same time, the Newey-West method introduces a lag term to correct the autocorrelation and provide a more accurate standard error.</span>

### 3.1 Preliminary analysis

In [76]:
# Read the merged data
all_data = pd.read_csv('data/merged_data_with_ff3_1.csv')
# Divide by 100
port_columns_1 = ['port01', 'port02', 'port03', 'port04', 'port05']
all_data[port_columns_1] = all_data[port_columns_1] / 100
# only retain columns that are needed
all_data = all_data[['date', 'predictor'] + port_columns_1]

# Separate accounting and return data
accounting_data_1 = all_data[all_data['predictor'].isin(accounting_factors)]
return_data_1 = all_data[all_data['predictor'].isin(return_factors)]

In [79]:
# Calculate the dynamic factor return
def calculate_dynamic_factor_return(row):
    high_ports = row[['port04', 'port05']].values
    low_ports = row[['port01', 'port02']].values
    return np.mean(high_ports) - np.mean(low_ports)

# Define a function to compound returns
def compound_returns(x):
    return (1 + x).prod() - 1

# Resample and calculate dynamic factor return for each period
def resample_and_calculate(df, freq):
    df['date'] = pd.to_datetime(df['date'])
    df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()
    return df_resampled

# Calculate Newey-West t-value
def calculate_newey_west_t_value(df, max_lag):
    results = []

    for factor, group in df.groupby('predictor'):
        returns = group['factor_return'].dropna()
        mean_returns = returns.mean() * 100  # Calculate mean returns for each factor

        # Regression to compute t-stat using Newey-West standard errors
        X = sm.add_constant(np.ones(len(returns)))  # Constant term
        model = sm.OLS(returns, X).fit(cov_type='HAC', cov_kwds={'maxlags': max_lag})

        t_value = model.tvalues.iloc[0]  # T-Value

        results.append({
            'Factor': factor,
            'Average Returns': mean_returns,
            'T-Value': t_value
        })

    return pd.DataFrame(results)

In [81]:
# Resample accounting data annually
accounting_data_resampled = resample_and_calculate(accounting_data_1, 'A')

# Resample return data monthly
return_data_resampled = resample_and_calculate(return_data_1, 'M')

# Calculate dynamic factor return
accounting_data_resampled['factor_return'] = accounting_data_resampled.apply(calculate_dynamic_factor_return, axis=1)
return_data_resampled['factor_return'] = return_data_resampled.apply(calculate_dynamic_factor_return, axis=1)

# Calculate stats for accounting data
accounting_results_1 = calculate_newey_west_t_value(accounting_data_resampled, max_lag=1)

# Calculate stats for return data
return_results_1 = calculate_newey_west_t_value(return_data_resampled, max_lag=12)

# Display results
print("\nAccounting factors (annual rebalancing):")
print(accounting_results_1.round(4))

print("\nReturn factors (monthly rebalancing):")
print(return_results_1.round(4))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()



Accounting factors (annual rebalancing):
                   Factor  Average Returns  T-Value
0                Accruals           1.8641   1.3816
1            AnalystValue           3.3016   0.8501
2             AssetGrowth           2.2233   0.9308
3                      BM           3.2310   1.0727
4                   BPEBM           2.5179   1.5679
5            BookLeverage          -2.1442  -1.1015
6              CBOperProf           3.4958   2.1242
7                      CF           2.6408   0.9466
8         ChAssetTurnover          -0.5460  -0.6002
9                   ChNWC          -1.4564  -1.6484
10             CompEquIss           2.8295   1.3150
11  CompositeDebtIssuance           0.4813   0.2808
12                    EBM           0.2185   0.0780
13                     EP           3.7061   1.4373
14       EarningsSurprise           1.4133   1.4741
15                FirmAge           1.3193   0.4778
16                     GP           4.7328   2.2386
17                   H

### 3.2 Regression 1 - FF3

In [92]:
# read the merged data
all_data_2 = pd.read_csv('data/merged_data_with_ff3_1.csv')
# divide by 100
port_columns = ['port01', 'port02', 'port03', 'port04', 'port05', 'Mkt-RF', 'SMB', 'HML', 'RF']
all_data_2[port_columns] = all_data_2[port_columns] / 100
# only retain columns that are needed
all_data_2 = all_data_2[['date', 'predictor'] + port_columns]

# Separate accounting and return data
accounting_data_2 = all_data_2[all_data_2['predictor'].isin(accounting_factors)]
return_data_2 = all_data_2[all_data_2['predictor'].isin(return_factors)]

In [97]:
# Calculate alpha and Newey-West t-value
def calculate_ff3_alpha_and_tvalues(df, max_lag):
    results = []

    for factor, group in df.groupby('predictor'):
        group = group.dropna(subset=['factor_return', 'RF', 'Mkt-RF', 'SMB', 'HML'])
        y = group['factor_return'] - group['RF']
        X = sm.add_constant(group[['Mkt-RF', 'SMB', 'HML']])  # FF3 factors

        # Perform regression with Newey-West standard errors
        model = sm.OLS(y, X).fit(cov_type='HAC', cov_kwds={'maxlags': max_lag})

        alpha = model.params['const'] * 100  # FF3 Alpha
        t_value = model.tvalues['const']  # T-Value

        results.append({
            'Factor': factor,
            'FF3 Alpha': alpha,
            'T-Value': t_value
        })

    return pd.DataFrame(results)

In [98]:
# Resample accounting data annually
accounting_data_resampled_2 = resample_and_calculate(accounting_data_2, 'A')

# Resample return data monthly
return_data_resampled_2 = resample_and_calculate(return_data_2, 'M')

# Calculate dynamic factor return
accounting_data_resampled_2['factor_return'] = accounting_data_resampled_2.apply(calculate_dynamic_factor_return, axis=1)
return_data_resampled_2['factor_return'] = return_data_resampled_2.apply(calculate_dynamic_factor_return, axis=1)

# Calculate alpha and t-values for accounting data
accounting_results_2 = calculate_ff3_alpha_and_tvalues(accounting_data_resampled_2, max_lag=1)
# Calculate stats for return data
return_results_2 = calculate_ff3_alpha_and_tvalues(return_data_resampled_2, max_lag=12)

# Dsiplay results
print("\nAccounting factors (annual rebalancing):")
print(accounting_results_2.round(4))

print("\nReturn factors (monthly rebalancing):")
print(return_results_2.round(4))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()



Accounting factors (annual rebalancing):
                   Factor  FF3 Alpha  T-Value
0                Accruals     1.5755   1.5579
1            AnalystValue     0.2005   0.1095
2             AssetGrowth    -1.1236  -0.8602
3                      BM    -2.2018  -1.7850
4                   BPEBM    -0.7369  -0.5519
5            BookLeverage    -3.1817  -1.7677
6              CBOperProf     3.7576   2.7390
7                      CF    -0.5388  -0.2611
8         ChAssetTurnover    -2.2165  -2.7621
9                   ChNWC    -3.3191  -2.4363
10             CompEquIss     2.7804   2.0430
11  CompositeDebtIssuance    -1.3495  -0.6716
12                    EBM    -5.6587  -4.9423
13                     EP    -0.9596  -0.7964
14       EarningsSurprise     0.0569   0.0881
15                FirmAge    -1.9017  -1.3101
16                     GP     4.5335   2.0606
17                   Herf    -3.2390  -2.8039
18              InvGrowth    -2.7385  -1.4090
19                    NOA     0.0817  

### 3.3 Regression 2 - FF5

In [109]:
# read the merged data
all_data_3 = pd.read_csv('data/merged_data_with_ff5_1.csv')
# divide by 100
port_columns = ['port01', 'port02', 'port03', 'port04', 'port05', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']
all_data_3[port_columns] = all_data_3[port_columns] / 100
# only retain columns that are needed
all_data_3 = all_data_3[['date', 'predictor'] + port_columns]

# Separate accounting and return data
accounting_data_3 = all_data_3[all_data_3['predictor'].isin(accounting_factors)]
return_data_3 = all_data_3[all_data_3['predictor'].isin(return_factors)]

In [111]:
# Calculate alpha and Newey-West t-value
def calculate_ff5_alpha_and_tvalues(df, max_lag):
    results = []

    for factor, group in df.groupby('predictor'):
        group = group.dropna(subset=['factor_return', 'RF', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA'])
        y = group['factor_return'] - group['RF']
        X = sm.add_constant(group[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']])  # FF5 factors

        # Perform regression with Newey-West standard errors
        model = sm.OLS(y, X).fit(cov_type='HAC', cov_kwds={'maxlags': max_lag})

        alpha = model.params['const'] * 100  # FF5 Alpha
        t_value = model.tvalues['const']  # T-Value

        results.append({
            'Factor': factor,
            'FF5 Alpha': alpha,
            'T-Value': t_value
        })

    return pd.DataFrame(results)

In [112]:
# Resample accounting data annually
accounting_data_resampled_3 = resample_and_calculate(accounting_data_3, 'A')

# Resample return data monthly
return_data_resampled_3 = resample_and_calculate(return_data_3, 'M')

# Calculate dynamic factor return
accounting_data_resampled_3['factor_return'] = accounting_data_resampled_3.apply(calculate_dynamic_factor_return, axis=1)
return_data_resampled_3['factor_return'] = return_data_resampled_3.apply(calculate_dynamic_factor_return, axis=1)

# Calculate alpha and t-values for accounting data for FF5
accounting_results_3 = calculate_ff5_alpha_and_tvalues(accounting_data_resampled_3, max_lag=1)
# Calculate stats for return data for FF5
return_results_3 = calculate_ff5_alpha_and_tvalues(return_data_resampled_3, max_lag=12)

# Display results for FF5
print("\nAccounting factors (annual rebalancing) - FF5:")
print(accounting_results_3.round(4))

print("\nReturn factors (monthly rebalancing) - FF5:")
print(return_results_3.round(4))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()



Accounting factors (annual rebalancing) - FF5:
                   Factor  FF5 Alpha  T-Value
0                Accruals     1.3280   0.9866
1            AnalystValue    -1.6779  -0.7135
2             AssetGrowth    -0.4162  -0.3102
3                      BM    -0.1508  -0.1114
4                   BPEBM    -2.4170  -1.1428
5            BookLeverage    -2.4197  -1.6582
6              CBOperProf    -0.6778  -0.3477
7                      CF    -2.2022  -1.0842
8         ChAssetTurnover    -0.6244  -0.6621
9                   ChNWC    -4.4258  -2.7449
10             CompEquIss     3.1409   1.8614
11  CompositeDebtIssuance     0.1916   0.0809
12                    EBM    -4.7145  -2.8754
13                     EP    -2.1844  -1.4730
14       EarningsSurprise    -0.6025  -0.7805
15                FirmAge     0.1260   0.0568
16                     GP    -0.4867  -0.1634
17                   Herf    -2.9240  -2.5937
18              InvGrowth    -5.9937  -2.3858
19                    NOA    -0.

### 3.4 Combine all the regressions

In [116]:
# combine the results of accounting_results_1, accounting_results_2, accounting_results_3 according to the factor
accounting_results_all = accounting_results_1.merge(accounting_results_2, on='Factor', suffixes=('_1', '_2')).merge(accounting_results_3, on='Factor', suffixes=('', '_3'))

# combine the results of return_results_1, return_results_2, return_results_3
return_results_all = return_results_1.merge(return_results_2, on='Factor', suffixes=('_1', '_2')).merge(return_results_3, on='Factor', suffixes=('', '_3'))

# display the results
print("\nAccounting factors (annual rebalancing):")
print(accounting_results_all.round(4))

print("\nReturn factors (monthly rebalancing):")
print(return_results_all.round(4))


Accounting factors (annual rebalancing):
                   Factor  Average Returns  T-Value_1  FF3 Alpha  T-Value_2  \
0                Accruals           1.8641     1.3816     1.5755     1.5579   
1            AnalystValue           3.3016     0.8501     0.2005     0.1095   
2             AssetGrowth           2.2233     0.9308    -1.1236    -0.8602   
3                      BM           3.2310     1.0727    -2.2018    -1.7850   
4                   BPEBM           2.5179     1.5679    -0.7369    -0.5519   
5            BookLeverage          -2.1442    -1.1015    -3.1817    -1.7677   
6              CBOperProf           3.4958     2.1242     3.7576     2.7390   
7                      CF           2.6408     0.9466    -0.5388    -0.2611   
8         ChAssetTurnover          -0.5460    -0.6002    -2.2165    -2.7621   
9                   ChNWC          -1.4564    -1.6484    -3.3191    -2.4363   
10             CompEquIss           2.8295     1.3150     2.7804     2.0430   
11  Compos

In [120]:
# save the results
accounting_results_all.to_csv('accounting_factors_annual_all_2000_2023.csv', index=False)
return_results_all.to_csv('return_factors_monthly_all_2000_2023.csv', index=False)

## 4. Regression in the 2000-2016 time priod

### 4.1 Preliminary analysis