# Task1 : Reproduce Table 1

## 1. merged data

* First, according to the research ideas of the paper, we integrate the factor return data with Fama’s 3-factor and 5-factor models.

In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy.stats import trim_mean

In [2]:
table1_data = pd.read_csv('data/merged_data.csv')
table1_ff3 = pd.read_csv('data/FF3_daily.csv')
table1_ff5 = pd.read_csv('data/FF5_daily.csv')

In [3]:
table1_ff3.columns =['date','Mkt-RF','SMB','HML','RF']
table1_ff5.columns =['date','Mkt-RF','SMB','HML','RMW','CMA','RF']
print(table1_data.columns.tolist())
print(table1_ff3.columns.tolist())
print(table1_ff5.columns.tolist())

table1_ff3.dtypes

['date', 'port01', 'port02', 'port03', 'port04', 'port05', 'portLS', 'predictor']
['date', 'Mkt-RF', 'SMB', 'HML', 'RF']
['date', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']


date        int64
Mkt-RF    float64
SMB       float64
HML       float64
RF        float64
dtype: object

In [4]:
table1_data['date']= pd.to_datetime(table1_data['date']).dt.strftime('%Y-%m-%d')

In [5]:
table1_data

Unnamed: 0,date,port01,port02,port03,port04,port05,portLS,predictor
0,1952-07-01,0.124831,0.336154,0.839240,0.637685,0.656345,0.531514,Accruals
1,1952-07-02,-0.246625,-0.244496,-0.049348,-0.189462,-0.104565,0.142060,Accruals
2,1952-07-03,0.023978,0.084967,-0.142413,-0.223899,-0.035177,-0.059155,Accruals
3,1952-07-07,-0.171018,-0.076185,-0.263179,-0.661418,-0.503844,-0.332827,Accruals
4,1952-07-08,-0.209479,0.235904,-0.025679,0.218441,0.140033,0.349512,Accruals
...,...,...,...,...,...,...,...,...
1237143,2023-12-22,1.857859,0.723824,0.498934,0.233261,0.428393,-1.429466,std_turn
1237144,2023-12-26,3.266919,0.875071,1.303056,0.540725,0.561904,-2.705015,std_turn
1237145,2023-12-27,2.341186,0.200744,0.285204,0.159429,0.059149,-2.282037,std_turn
1237146,2023-12-28,-1.669881,-0.263774,-0.543158,-0.266308,-0.110725,1.559157,std_turn


In [6]:
table1_ff3['date'] = pd.to_datetime(table1_ff3['date'].astype(str), format='%Y%m%d').dt.strftime('%Y-%m-%d')
table1_ff3

Unnamed: 0,date,Mkt-RF,SMB,HML,RF
0,1926-07-01,0.10,-0.25,-0.27,0.009
1,1926-07-02,0.45,-0.33,-0.06,0.009
2,1926-07-06,0.17,0.30,-0.39,0.009
3,1926-07-07,0.09,-0.58,0.02,0.009
4,1926-07-08,0.21,-0.38,0.19,0.009
...,...,...,...,...,...
25896,2024-12-24,1.11,-0.09,-0.05,0.017
25897,2024-12-26,0.02,1.04,-0.19,0.017
25898,2024-12-27,-1.17,-0.66,0.56,0.017
25899,2024-12-30,-1.09,0.12,0.74,0.017


In [7]:
table1_ff5['date'] = pd.to_datetime(table1_ff5['date'].astype(str), format='%Y%m%d').dt.strftime('%Y-%m-%d')
table1_ff5

Unnamed: 0,date,Mkt-RF,SMB,HML,RMW,CMA,RF
0,1963-07-01,-0.67,0.02,-0.35,0.03,0.13,0.012
1,1963-07-02,0.79,-0.28,0.28,-0.08,-0.21,0.012
2,1963-07-03,0.63,-0.18,-0.10,0.13,-0.25,0.012
3,1963-07-05,0.40,0.09,-0.28,0.07,-0.30,0.012
4,1963-07-08,-0.63,0.07,-0.20,-0.27,0.06,0.012
...,...,...,...,...,...,...,...
15476,2024-12-24,1.11,-0.12,-0.05,-0.13,-0.37,0.017
15477,2024-12-26,0.02,1.09,-0.19,-0.44,0.35,0.017
15478,2024-12-27,-1.17,-0.44,0.56,0.41,0.03,0.017
15479,2024-12-30,-1.09,0.24,0.74,0.55,0.14,0.017


In [8]:
table1_ff3.head()
table1_ff3.dtypes

date       object
Mkt-RF    float64
SMB       float64
HML       float64
RF        float64
dtype: object

## 2.Divided the time & dataset

* The data analysis timeline of the paper is from 1963 to 2016. Combined with the data obtained by the team, we divided the research time into four groups: 2000_2016, 2017-2023, 2000-2023, and 1963-2016, in order to explore the impact of long, medium and short time on factor regression.

In [9]:
# all period from 2000 Jan 1 to 2023 Dec 31
start_date = '2000-01-01'
end_date = '2023-12-31'

In [10]:
# sample period from 2000 Jan 1 to 2016 Dec 31
start_date_1 = '2000-01-01'
end_date_1 = '2016-12-31'

In [11]:
# post sample period from 2017 Jan 1 to 2023 Dec 31
start_date_2 = '2017-01-01'
end_date_2 = '2023-12-31'

In [12]:
# original sample period from 1963 July 1 to 2016 Dec 31
start_date_3 = '1963-07-01'
end_date_3 = '2016-12-31'

In [13]:
# Adjust for the time
filtered_data_1 = table1_data[(table1_data['date'] >= start_date) & (table1_data['date'] <= end_date)]
filtered_ff3_1 = table1_ff3[(table1_ff3['date'] >= start_date) & (table1_ff3['date'] <= end_date)]
filtered_ff5_1 = table1_ff5[(table1_ff5['date'] >= start_date) & (table1_ff5['date'] <= end_date)]
filtered_data_2 = table1_data[(table1_data['date'] >= start_date_1) & (table1_data['date'] <= end_date_1)]
filtered_ff3_2 = table1_ff3[(table1_ff3['date'] >= start_date_1) & (table1_ff3['date'] <= end_date_1)]
filtered_ff5_2 = table1_ff5[(table1_ff5['date'] >= start_date_1) & (table1_ff5['date'] <= end_date_1)]
filtered_data_3 = table1_data[(table1_data['date'] >= start_date_2) & (table1_data['date'] <= end_date_2)]
filtered_ff3_3 = table1_ff3[(table1_ff3['date'] >= start_date_2) & (table1_ff3['date'] <= end_date_2)]
filtered_ff5_3 = table1_ff5[(table1_ff5['date'] >= start_date_2) & (table1_ff5['date'] <= end_date_2)]
filtered_data_4 = table1_data[(table1_data['date'] >= start_date_3) & (table1_data['date'] <= end_date_3)]
filtered_ff3_4 = table1_ff3[(table1_ff3['date'] >= start_date_3) & (table1_ff3['date'] <= end_date_3)]
filtered_ff5_4 = table1_ff5[(table1_ff5['date'] >= start_date_3) & (table1_ff5['date'] <= end_date_3)]

In [14]:
merged_data_1 = pd.merge(filtered_data_1, filtered_ff3_1, on='date', how='inner')
merged_data_2 = pd.merge(filtered_data_1, filtered_ff5_1, on='date', how='inner')
merged_data_3 = pd.merge(filtered_data_2, filtered_ff3_2, on='date', how='inner')
merged_data_4 = pd.merge(filtered_data_2, filtered_ff5_2, on='date', how='inner')
merged_data_5 = pd.merge(filtered_data_3, filtered_ff3_3, on='date', how='inner')
merged_data_6 = pd.merge(filtered_data_3, filtered_ff5_3, on='date', how='inner')
merged_data_7 = pd.merge(filtered_data_4, filtered_ff3_4, on='date', how='inner')
merged_data_8 = pd.merge(filtered_data_4, filtered_ff5_4, on='date', how='inner')

print(merged_data_1.columns.tolist())
print(merged_data_2.columns.tolist())
print(merged_data_3.columns.tolist())
print(merged_data_4.columns.tolist())
print(merged_data_5.columns.tolist())
print(merged_data_6.columns.tolist())
print(merged_data_7.columns.tolist())
print(merged_data_8.columns.tolist())

['date', 'port01', 'port02', 'port03', 'port04', 'port05', 'portLS', 'predictor', 'Mkt-RF', 'SMB', 'HML', 'RF']
['date', 'port01', 'port02', 'port03', 'port04', 'port05', 'portLS', 'predictor', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']
['date', 'port01', 'port02', 'port03', 'port04', 'port05', 'portLS', 'predictor', 'Mkt-RF', 'SMB', 'HML', 'RF']
['date', 'port01', 'port02', 'port03', 'port04', 'port05', 'portLS', 'predictor', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']
['date', 'port01', 'port02', 'port03', 'port04', 'port05', 'portLS', 'predictor', 'Mkt-RF', 'SMB', 'HML', 'RF']
['date', 'port01', 'port02', 'port03', 'port04', 'port05', 'portLS', 'predictor', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']
['date', 'port01', 'port02', 'port03', 'port04', 'port05', 'portLS', 'predictor', 'Mkt-RF', 'SMB', 'HML', 'RF']
['date', 'port01', 'port02', 'port03', 'port04', 'port05', 'portLS', 'predictor', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']


In [15]:
merged_data_1.to_csv('data/merged_data_with_ff3_1.csv', index=False)
merged_data_2.to_csv('data/merged_data_with_ff5_1.csv', index=False)
merged_data_3.to_csv('data/merged_data_with_ff3_2.csv', index=False)
merged_data_4.to_csv('data/merged_data_with_ff5_2.csv', index=False)
merged_data_5.to_csv('data/merged_data_with_ff3_3.csv', index=False)
merged_data_6.to_csv('data/merged_data_with_ff5_3.csv', index=False)
merged_data_7.to_csv('data/merged_data_with_ff3_4.csv', index=False)
merged_data_8.to_csv('data/merged_data_with_ff5_4.csv', index=False)

In [16]:
# Define for accounting factors and return factors
accounting_factors = [
    'Accruals', 'AnalystValue', 'AssetGrowth', 'BM', 'BPEBM', 'BookLeverage',
    'CBOperProf', 'CF', 'ChAssetTurnover', 'ChNWC', 'CompEquIss', 'CompositeDebtIssuance',
    'EBM', 'EP', 'EarningsSurprise', 'GP', 'Herf', 'InvGrowth',
    'NOA', 'OperProf', 'PS', 'RDAbility', 'RoE', 'SP', 'Size', 'ShareIss1Y', 'ShareIss5Y',
    'XFIN', 'cfp', 'roaq'
]

return_factors = [
    'Beta', 'BetaLiquidityPS', 'CPVolSpread', 'Coskewness', 'CustomerMomentum',
    'DolVol', 'FirmAge', 'High52', 'IdioVol3F', 'Illiquidity', 'IntMom', 'LRreversal', 'MaxRet',
    'Mom12m', 'Mom6m', 'Mom6mJunk', 'MomOffSeason', 'MomOffSeason06YrPlus',
    'MomOffSeason11YrPlus', 'MomOffSeason16YrPlus', 'MomSeason', 'MomSeason06YrPlus',
    'MomSeason11YrPlus', 'MomSeason16YrPlus', 'MomSeasonShort', 'RIVolSpread',
    'ResidualMomentum', 'STreversal', 'VolMkt', 'VolSD', 'std_turn', 'Frontier'
]

## 3. regression in the ‘all' time period

* In this section, calculations and regression studies will be conducted for the timeline 2000-2023.
* In the first step, we will calculate the first two columns of Table 1 to obtain the average return rate and t value of each factor.
* The second step is to calculate the regression results of each factor and the FF3 factor model, and output the excess return and the corresponding t value.
* The third step is to calculate the regression results of each factor and the FF5 factor model. The overall idea is similar to the second step.

+ <span style="color:red">The Newey-West standard error can effectively deal with heteroscedasticity by adjusting the covariance matrix. At the same time, the Newey-West method introduces a lag term to correct the autocorrelation and provide a more accurate standard error.</span>

### 3.1 Preliminary analysis

In [17]:
# Read the merged data
all_data = pd.read_csv('data/merged_data_with_ff3_1.csv')
# Divide by 100
port_columns_1 = ['port01', 'port02', 'port03', 'port04', 'port05']
all_data[port_columns_1] = all_data[port_columns_1] / 100
# only retain columns that are needed
all_data = all_data[['date', 'predictor'] + port_columns_1]

# Separate accounting and return data
accounting_data_1 = all_data[all_data['predictor'].isin(accounting_factors)]
return_data_1 = all_data[all_data['predictor'].isin(return_factors)]

In [18]:
# Calculate the dynamic factor return
def calculate_dynamic_factor_return(row):
    high_ports = row[['port04', 'port05']].values
    low_ports = row[['port01', 'port02']].values
    return np.mean(high_ports) - np.mean(low_ports)

# Define a function to compound returns
def compound_returns(x):
    return (1 + x).prod() - 1

# Resample and calculate dynamic factor return for each period
def resample_and_calculate(df, freq):
    df['date'] = pd.to_datetime(df['date'])
    df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()
    return df_resampled

# Calculate Newey-West t-value
def calculate_newey_west_t_value(df, max_lag):
    results = []

    for factor, group in df.groupby('predictor'):
        returns = group['factor_return'].dropna()
        mean_returns = returns.mean() * 100  # Calculate mean returns for each factor

        # Regression to compute t-stat using Newey-West standard errors
        X = sm.add_constant(np.ones(len(returns)))  # Constant term
        model = sm.OLS(returns, X).fit(cov_type='HAC', cov_kwds={'maxlags': max_lag})

        t_value = model.tvalues.iloc[0]  # T-Value

        results.append({
            'Factor': factor,
            'Average Returns': mean_returns,
            'T-Value': t_value
        })

    return pd.DataFrame(results)

In [19]:
# Resample accounting data annually
accounting_data_resampled = resample_and_calculate(accounting_data_1, 'A')

# Resample return data monthly
return_data_resampled = resample_and_calculate(return_data_1, 'M')

# Calculate dynamic factor return
accounting_data_resampled['factor_return'] = accounting_data_resampled.apply(calculate_dynamic_factor_return, axis=1)
return_data_resampled['factor_return'] = return_data_resampled.apply(calculate_dynamic_factor_return, axis=1)

# Calculate stats for accounting data
accounting_results_1 = calculate_newey_west_t_value(accounting_data_resampled, max_lag=1)

# Calculate stats for return data
return_results_1 = calculate_newey_west_t_value(return_data_resampled, max_lag=12)

# Display results
print("\nAccounting factors (annual rebalancing):")
print(accounting_results_1.round(4))

print("\nReturn factors (monthly rebalancing):")
print(return_results_1.round(4))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()



Accounting factors (annual rebalancing):
                   Factor  Average Returns  T-Value
0                Accruals           1.8641   1.3816
1            AnalystValue           3.3016   0.8501
2             AssetGrowth           2.2233   0.9308
3                      BM           3.2310   1.0727
4                   BPEBM           2.5179   1.5679
5            BookLeverage          -2.1442  -1.1015
6              CBOperProf           3.4958   2.1242
7                      CF           2.6408   0.9466
8         ChAssetTurnover          -0.5460  -0.6002
9                   ChNWC          -1.4564  -1.6484
10             CompEquIss           2.8295   1.3150
11  CompositeDebtIssuance           0.4813   0.2808
12                    EBM           0.2185   0.0780
13                     EP           3.7061   1.4373
14       EarningsSurprise           1.4133   1.4741
15                     GP           4.7328   2.2386
16                   Herf          -1.4233  -1.0353
17              InvGro

### 3.2 Regression 1 - FF3

In [20]:
# read the merged data
all_data_2 = pd.read_csv('data/merged_data_with_ff3_1.csv')
# divide by 100
port_columns = ['port01', 'port02', 'port03', 'port04', 'port05', 'Mkt-RF', 'SMB', 'HML', 'RF']
all_data_2[port_columns] = all_data_2[port_columns] / 100
# only retain columns that are needed
all_data_2 = all_data_2[['date', 'predictor'] + port_columns]

# Separate accounting and return data
accounting_data_2 = all_data_2[all_data_2['predictor'].isin(accounting_factors)]
return_data_2 = all_data_2[all_data_2['predictor'].isin(return_factors)]

In [21]:
# Calculate alpha and Newey-West t-value
def calculate_ff3_alpha_and_tvalues(df, max_lag):
    results = []

    for factor, group in df.groupby('predictor'):
        group = group.dropna(subset=['factor_return', 'RF', 'Mkt-RF', 'SMB', 'HML'])
        y = group['factor_return'] - group['RF']
        X = sm.add_constant(group[['Mkt-RF', 'SMB', 'HML']])  # FF3 factors

        # Perform regression with Newey-West standard errors
        model = sm.OLS(y, X).fit(cov_type='HAC', cov_kwds={'maxlags': max_lag})

        alpha = model.params['const'] * 100  # FF3 Alpha
        t_value = model.tvalues['const']  # T-Value

        results.append({
            'Factor': factor,
            'FF3 Alpha': alpha,
            'T-Value': t_value
        })

    return pd.DataFrame(results)

In [22]:
# Resample accounting data annually
accounting_data_resampled_2 = resample_and_calculate(accounting_data_2, 'A')

# Resample return data monthly
return_data_resampled_2 = resample_and_calculate(return_data_2, 'M')

# Calculate dynamic factor return
accounting_data_resampled_2['factor_return'] = accounting_data_resampled_2.apply(calculate_dynamic_factor_return, axis=1)
return_data_resampled_2['factor_return'] = return_data_resampled_2.apply(calculate_dynamic_factor_return, axis=1)

# Calculate alpha and t-values for accounting data
accounting_results_2 = calculate_ff3_alpha_and_tvalues(accounting_data_resampled_2, max_lag=1)
# Calculate stats for return data
return_results_2 = calculate_ff3_alpha_and_tvalues(return_data_resampled_2, max_lag=12)

# Dsiplay results
print("\nAccounting factors (annual rebalancing):")
print(accounting_results_2.round(4))

print("\nReturn factors (monthly rebalancing):")
print(return_results_2.round(4))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()



Accounting factors (annual rebalancing):
                   Factor  FF3 Alpha  T-Value
0                Accruals     1.5755   1.5579
1            AnalystValue     0.2005   0.1095
2             AssetGrowth    -1.1236  -0.8602
3                      BM    -2.2018  -1.7850
4                   BPEBM    -0.7369  -0.5519
5            BookLeverage    -3.1817  -1.7677
6              CBOperProf     3.7576   2.7390
7                      CF    -0.5388  -0.2611
8         ChAssetTurnover    -2.2165  -2.7621
9                   ChNWC    -3.3191  -2.4363
10             CompEquIss     2.7804   2.0430
11  CompositeDebtIssuance    -1.3495  -0.6716
12                    EBM    -5.6587  -4.9423
13                     EP    -0.9596  -0.7964
14       EarningsSurprise     0.0569   0.0881
15                     GP     4.5335   2.0606
16                   Herf    -3.2390  -2.8039
17              InvGrowth    -2.7385  -1.4090
18                    NOA     0.0817   0.0376
19               OperProf     4.4293  

### 3.3 Regression 2 - FF5

In [23]:
# read the merged data
all_data_3 = pd.read_csv('data/merged_data_with_ff5_1.csv')
# divide by 100
port_columns = ['port01', 'port02', 'port03', 'port04', 'port05', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']
all_data_3[port_columns] = all_data_3[port_columns] / 100
# only retain columns that are needed
all_data_3 = all_data_3[['date', 'predictor'] + port_columns]

# Separate accounting and return data
accounting_data_3 = all_data_3[all_data_3['predictor'].isin(accounting_factors)]
return_data_3 = all_data_3[all_data_3['predictor'].isin(return_factors)]

In [24]:
# Calculate alpha and Newey-West t-value
def calculate_ff5_alpha_and_tvalues(df, max_lag):
    results = []

    for factor, group in df.groupby('predictor'):
        group = group.dropna(subset=['factor_return', 'RF', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA'])
        y = group['factor_return'] - group['RF']
        X = sm.add_constant(group[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']])  # FF5 factors

        # Perform regression with Newey-West standard errors
        model = sm.OLS(y, X).fit(cov_type='HAC', cov_kwds={'maxlags': max_lag})

        alpha = model.params['const'] * 100  # FF5 Alpha
        t_value = model.tvalues['const']  # T-Value

        results.append({
            'Factor': factor,
            'FF5 Alpha': alpha,
            'T-Value': t_value
        })

    return pd.DataFrame(results)

In [25]:
# Resample accounting data annually
accounting_data_resampled_3 = resample_and_calculate(accounting_data_3, 'A')

# Resample return data monthly
return_data_resampled_3 = resample_and_calculate(return_data_3, 'M')

# Calculate dynamic factor return
accounting_data_resampled_3['factor_return'] = accounting_data_resampled_3.apply(calculate_dynamic_factor_return, axis=1)
return_data_resampled_3['factor_return'] = return_data_resampled_3.apply(calculate_dynamic_factor_return, axis=1)

# Calculate alpha and t-values for accounting data for FF5
accounting_results_3 = calculate_ff5_alpha_and_tvalues(accounting_data_resampled_3, max_lag=1)
# Calculate stats for return data for FF5
return_results_3 = calculate_ff5_alpha_and_tvalues(return_data_resampled_3, max_lag=12)

# Display results for FF5
print("\nAccounting factors (annual rebalancing) - FF5:")
print(accounting_results_3.round(4))

print("\nReturn factors (monthly rebalancing) - FF5:")
print(return_results_3.round(4))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()



Accounting factors (annual rebalancing) - FF5:
                   Factor  FF5 Alpha  T-Value
0                Accruals     1.3280   0.9866
1            AnalystValue    -1.6779  -0.7135
2             AssetGrowth    -0.4162  -0.3102
3                      BM    -0.1508  -0.1114
4                   BPEBM    -2.4170  -1.1428
5            BookLeverage    -2.4197  -1.6582
6              CBOperProf    -0.6778  -0.3477
7                      CF    -2.2022  -1.0842
8         ChAssetTurnover    -0.6244  -0.6621
9                   ChNWC    -4.4258  -2.7449
10             CompEquIss     3.1409   1.8614
11  CompositeDebtIssuance     0.1916   0.0809
12                    EBM    -4.7145  -2.8754
13                     EP    -2.1844  -1.4730
14       EarningsSurprise    -0.6025  -0.7805
15                     GP    -0.4867  -0.1634
16                   Herf    -2.9240  -2.5937
17              InvGrowth    -5.9937  -2.3858
18                    NOA    -0.5617  -0.2208
19               OperProf    -1.

### 3.4 Combine all the regressions

In [26]:
# combine the results of accounting_results_1, accounting_results_2, accounting_results_3 according to the factor
accounting_results_all = accounting_results_1.merge(accounting_results_2, on='Factor', suffixes=('_1', '_2')).merge(accounting_results_3, on='Factor', suffixes=('', '_3'))

# combine the results of return_results_1, return_results_2, return_results_3
return_results_all = return_results_1.merge(return_results_2, on='Factor', suffixes=('_1', '_2')).merge(return_results_3, on='Factor', suffixes=('', '_3'))

# display the results
print("\nAccounting factors (annual rebalancing):")
print(accounting_results_all.round(4))

print("\nReturn factors (monthly rebalancing):")
print(return_results_all.round(4))


Accounting factors (annual rebalancing):
                   Factor  Average Returns  T-Value_1  FF3 Alpha  T-Value_2  \
0                Accruals           1.8641     1.3816     1.5755     1.5579   
1            AnalystValue           3.3016     0.8501     0.2005     0.1095   
2             AssetGrowth           2.2233     0.9308    -1.1236    -0.8602   
3                      BM           3.2310     1.0727    -2.2018    -1.7850   
4                   BPEBM           2.5179     1.5679    -0.7369    -0.5519   
5            BookLeverage          -2.1442    -1.1015    -3.1817    -1.7677   
6              CBOperProf           3.4958     2.1242     3.7576     2.7390   
7                      CF           2.6408     0.9466    -0.5388    -0.2611   
8         ChAssetTurnover          -0.5460    -0.6002    -2.2165    -2.7621   
9                   ChNWC          -1.4564    -1.6484    -3.3191    -2.4363   
10             CompEquIss           2.8295     1.3150     2.7804     2.0430   
11  Compos

In [27]:
# save the results
accounting_results_all.to_csv('accounting_factors_annual_all_2000_2023.csv', index=False)
return_results_all.to_csv('return_factors_monthly_all_2000_2023.csv', index=False)

## 4. Regression in the 2000-2016 'sample' time period

* In this section, calculations and regression studies will be conducted for the timeline 2000-2016.
* The calculation steps are the same as the previous section, and the results are output.

### 4.1 Preliminary analysis

In [28]:
# Read the merged data
sample_data = pd.read_csv('data/merged_data_with_ff3_2.csv')
# Divide by 100
port_columns_1 = ['port01', 'port02', 'port03', 'port04', 'port05']
sample_data[port_columns_1] = sample_data[port_columns_1] / 100
# only retain columns that are needed
sample_data = sample_data[['date', 'predictor'] + port_columns_1]

# Separate accounting and return data
accounting_data_4 = sample_data[sample_data['predictor'].isin(accounting_factors)]
return_data_4 = sample_data[sample_data['predictor'].isin(return_factors)]
# Resample accounting data annually
accounting_data_resampled_4 = resample_and_calculate(accounting_data_4, 'A')

# Resample return data monthly
return_data_resampled_4 = resample_and_calculate(return_data_4, 'M')

# Calculate dynamic factor return
accounting_data_resampled_4['factor_return'] = accounting_data_resampled_4.apply(calculate_dynamic_factor_return, axis=1)
return_data_resampled_4['factor_return'] = return_data_resampled_4.apply(calculate_dynamic_factor_return, axis=1)

# Calculate stats for accounting data
accounting_results_4 = calculate_newey_west_t_value(accounting_data_resampled_4, max_lag=1)

# Calculate stats for return data
return_results_4 = calculate_newey_west_t_value(return_data_resampled_4, max_lag=12)

# Display results
print("\nAccounting factors (annual rebalancing):")
print(accounting_results_4.round(4))

print("\nReturn factors (monthly rebalancing):")
print(return_results_4.round(4))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()



Accounting factors (annual rebalancing):
                   Factor  Average Returns  T-Value
0                Accruals           1.3672   0.7513
1            AnalystValue           5.2369   1.3515
2             AssetGrowth           4.5956   1.9041
3                      BM           5.5090   1.8247
4                   BPEBM           0.4651   0.3839
5            BookLeverage          -2.3794  -0.8972
6              CBOperProf           1.2786   0.7626
7                      CF           5.5339   2.4908
8         ChAssetTurnover          -0.2058  -0.2138
9                   ChNWC          -2.0116  -2.8594
10             CompEquIss           1.7482   0.6605
11  CompositeDebtIssuance           3.4452   3.1524
12                    EBM           2.5826   0.8227
13                     EP           4.9772   1.5964
14       EarningsSurprise           1.6227   1.2572
15                     GP           1.0183   0.5508
16                   Herf          -2.5717  -2.5392
17              InvGro

### 4.2 Regression 1 - FF3

In [29]:
# read the merged data
sample_data_2 = pd.read_csv('data/merged_data_with_ff3_2.csv')
# divide by 100
port_columns = ['port01', 'port02', 'port03', 'port04', 'port05', 'Mkt-RF', 'SMB', 'HML', 'RF']
sample_data_2[port_columns] = sample_data_2[port_columns] / 100
# only retain columns that are needed
sample_data_2 = sample_data_2[['date', 'predictor'] + port_columns]

# Separate accounting and return data
accounting_data_5 = sample_data_2[sample_data_2['predictor'].isin(accounting_factors)]
return_data_5 = sample_data_2[sample_data_2['predictor'].isin(return_factors)]

In [30]:
# Resample accounting data annually
accounting_data_resampled_5 = resample_and_calculate(accounting_data_5, 'A')

# Resample return data monthly
return_data_resampled_5 = resample_and_calculate(return_data_5, 'M')

# Calculate dynamic factor return
accounting_data_resampled_5['factor_return'] = accounting_data_resampled_5.apply(calculate_dynamic_factor_return, axis=1)
return_data_resampled_5['factor_return'] = return_data_resampled_5.apply(calculate_dynamic_factor_return, axis=1)

# Calculate alpha and t-values for accounting data
accounting_results_5 = calculate_ff3_alpha_and_tvalues(accounting_data_resampled_5, max_lag=1)
# Calculate stats for return data
return_results_5 = calculate_ff3_alpha_and_tvalues(return_data_resampled_5, max_lag=12)

# Dsiplay results
print("\nAccounting factors (annual rebalancing):")
print(accounting_results_5.round(4))

print("\nReturn factors (monthly rebalancing):")
print(return_results_5.round(4))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()



Accounting factors (annual rebalancing):
                   Factor  FF3 Alpha  T-Value
0                Accruals     2.0269   1.6537
1            AnalystValue     1.3451   0.6741
2             AssetGrowth    -0.5732  -0.4429
3                      BM    -1.1189  -0.6940
4                   BPEBM    -2.1438  -1.1774
5            BookLeverage    -1.3604  -0.7662
6              CBOperProf     2.4849   2.7927
7                      CF     2.5372   1.3782
8         ChAssetTurnover    -2.2388  -2.6820
9                   ChNWC    -3.1211  -2.3622
10             CompEquIss     4.5607   3.6458
11  CompositeDebtIssuance     2.6408   2.2855
12                    EBM    -5.1105  -3.7984
13                     EP    -1.1724  -0.6967
14       EarningsSurprise    -0.3017  -0.4078
15                     GP     1.8384   0.6041
16                   Herf    -4.5666  -4.0404
17              InvGrowth    -2.8158  -1.6999
18                    NOA    -2.3752  -0.8449
19               OperProf     3.9567  

### 4.3 Regression 2 - FF5

In [31]:
# read the merged data
sample_data_3 = pd.read_csv('data/merged_data_with_ff5_2.csv')
# divide by 100
port_columns = ['port01', 'port02', 'port03', 'port04', 'port05', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']
sample_data_3[port_columns] = sample_data_3[port_columns] / 100
# only retain columns that are needed
sample_data_3 = sample_data_3[['date', 'predictor'] + port_columns]

# Separate accounting and return data
accounting_data_6 = sample_data_3[sample_data_3['predictor'].isin(accounting_factors)]
return_data_6 = sample_data_3[sample_data_3['predictor'].isin(return_factors)]

In [32]:
# Resample accounting data annually
accounting_data_resampled_6 = resample_and_calculate(accounting_data_6, 'A')

# Resample return data monthly
return_data_resampled_6 = resample_and_calculate(return_data_6, 'M')

# Calculate dynamic factor return
accounting_data_resampled_6['factor_return'] = accounting_data_resampled_6.apply(calculate_dynamic_factor_return, axis=1)
return_data_resampled_6['factor_return'] = return_data_resampled_6.apply(calculate_dynamic_factor_return, axis=1)

# Calculate alpha and t-values for accounting data for FF5
accounting_results_6 = calculate_ff5_alpha_and_tvalues(accounting_data_resampled_6, max_lag=1)
# Calculate stats for return data for FF5
return_results_6 = calculate_ff5_alpha_and_tvalues(return_data_resampled_6, max_lag=12)

# Display results for FF5
print("\nAccounting factors (annual rebalancing) - FF5:")
print(accounting_results_6.round(4))

print("\nReturn factors (monthly rebalancing) - FF5:")
print(return_results_6.round(4))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()



Accounting factors (annual rebalancing) - FF5:
                   Factor  FF5 Alpha  T-Value
0                Accruals     1.3354   0.6928
1            AnalystValue    -1.4842  -0.6677
2             AssetGrowth     2.4952   1.4692
3                      BM     2.1416   1.3354
4                   BPEBM     0.3251   0.1028
5            BookLeverage    -0.4248  -0.1493
6              CBOperProf     1.5739   1.2092
7                      CF    -0.5907  -0.2185
8         ChAssetTurnover    -1.2591  -1.1160
9                   ChNWC    -4.6118  -2.5678
10             CompEquIss     5.5441   5.0769
11  CompositeDebtIssuance     4.1725   1.9266
12                    EBM    -2.4192  -1.0973
13                     EP    -2.1530  -0.7294
14       EarningsSurprise     1.4258   1.0428
15                     GP     0.8561   0.1872
16                   Herf    -1.3029  -0.7080
17              InvGrowth    -1.1852  -0.4215
18                    NOA    -1.0558  -0.2155
19               OperProf    -2.

### 4.4 Combine all the regressions

In [33]:
# combine the results
accounting_results_sample = accounting_results_4.merge(accounting_results_5, on='Factor', suffixes=('_1', '_2')).merge(accounting_results_6, on='Factor', suffixes=('', '_3'))

# combine the results of return_results_1, return_results_2, return_results_3
return_results_sample = return_results_4.merge(return_results_5, on='Factor', suffixes=('_1', '_2')).merge(return_results_6, on='Factor', suffixes=('', '_3'))

# display the results
print("\nAccounting factors (annual rebalancing):")
print(accounting_results_sample.round(4))

print("\nReturn factors (monthly rebalancing):")
print(return_results_sample.round(4))


Accounting factors (annual rebalancing):
                   Factor  Average Returns  T-Value_1  FF3 Alpha  T-Value_2  \
0                Accruals           1.3672     0.7513     2.0269     1.6537   
1            AnalystValue           5.2369     1.3515     1.3451     0.6741   
2             AssetGrowth           4.5956     1.9041    -0.5732    -0.4429   
3                      BM           5.5090     1.8247    -1.1189    -0.6940   
4                   BPEBM           0.4651     0.3839    -2.1438    -1.1774   
5            BookLeverage          -2.3794    -0.8972    -1.3604    -0.7662   
6              CBOperProf           1.2786     0.7626     2.4849     2.7927   
7                      CF           5.5339     2.4908     2.5372     1.3782   
8         ChAssetTurnover          -0.2058    -0.2138    -2.2388    -2.6820   
9                   ChNWC          -2.0116    -2.8594    -3.1211    -2.3622   
10             CompEquIss           1.7482     0.6605     4.5607     3.6458   
11  Compos

In [34]:
# save the results
accounting_results_sample.to_csv('accounting_factors_annual_sample_2000_2016.csv', index=False)
return_results_sample.to_csv('return_factors_monthly_sample_2000_2016.csv', index=False)

## 5. Regression in the 2017-2023 'post' time period

* In this section, calculations and regression studies will be conducted for the timeline 2017-2023.
* The calculation steps are the same as the previous section, and the results are output.

### 5.1 Preliminary analysis

In [35]:
# Read the merged data
post_data = pd.read_csv('data/merged_data_with_ff3_3.csv')
# Divide by 100
port_columns_1 = ['port01', 'port02', 'port03', 'port04', 'port05']
post_data[port_columns_1] = post_data[port_columns_1] / 100
# only retain columns that are needed
post_data = post_data[['date', 'predictor'] + port_columns_1]

# Separate accounting and return data
accounting_data_7 = post_data[post_data['predictor'].isin(accounting_factors)]
return_data_7 = post_data[post_data['predictor'].isin(return_factors)]
# Resample accounting data annually
accounting_data_resampled_7 = resample_and_calculate(accounting_data_7, 'A')

# Resample return data monthly
return_data_resampled_7 = resample_and_calculate(return_data_7, 'M')

# Calculate dynamic factor return
accounting_data_resampled_7['factor_return'] = accounting_data_resampled_7.apply(calculate_dynamic_factor_return, axis=1)
return_data_resampled_7['factor_return'] = return_data_resampled_7.apply(calculate_dynamic_factor_return, axis=1)

# Calculate stats for accounting data
accounting_results_7 = calculate_newey_west_t_value(accounting_data_resampled_7, max_lag=1)

# Calculate stats for return data
return_results_7 = calculate_newey_west_t_value(return_data_resampled_7, max_lag=12)

# Display results
print("\nAccounting factors (annual rebalancing):")
print(accounting_results_7.round(4))

print("\nReturn factors (monthly rebalancing):")
print(return_results_7.round(4))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()



Accounting factors (annual rebalancing):
                   Factor  Average Returns  T-Value
0                Accruals           3.0707   2.3000
1            AnalystValue          -1.3983  -0.1554
2             AssetGrowth          -3.5381  -0.6802
3                      BM          -2.3012  -0.3411
4                   BPEBM           7.5033   1.8866
5            BookLeverage          -1.5729  -0.7963
6              CBOperProf           8.8805   3.6424
7                      CF          -4.3853  -0.6471
8         ChAssetTurnover          -1.3721  -0.6764
9                   ChNWC          -0.1080  -0.0441
10             CompEquIss           5.4558   1.4655
11  CompositeDebtIssuance          -6.7167  -2.1431
12                    EBM          -5.5230  -1.0961
13                     EP           0.6192   0.1521
14       EarningsSurprise           0.9045   0.8006
15                     GP          13.7538   5.1291
16                   Herf           1.3655   0.3726
17              InvGro

### 5.2 Regression 1 - FF3

In [36]:
# read the merged data
post_data_2 = pd.read_csv('data/merged_data_with_ff3_3.csv')
# divide by 100
port_columns = ['port01', 'port02', 'port03', 'port04', 'port05', 'Mkt-RF', 'SMB', 'HML', 'RF']
post_data_2[port_columns] = post_data_2[port_columns] / 100
# only retain columns that are needed
post_data_2 = post_data_2[['date', 'predictor'] + port_columns]

# Separate accounting and return data
accounting_data_8 = post_data_2[post_data_2['predictor'].isin(accounting_factors)]
return_data_8 = post_data_2[post_data_2['predictor'].isin(return_factors)]

In [37]:
# Resample accounting data annually
accounting_data_resampled_8 = resample_and_calculate(accounting_data_8, 'A')

# Resample return data monthly
return_data_resampled_8 = resample_and_calculate(return_data_8, 'M')

# Calculate dynamic factor return
accounting_data_resampled_8['factor_return'] = accounting_data_resampled_8.apply(calculate_dynamic_factor_return, axis=1)
return_data_resampled_8['factor_return'] = return_data_resampled_8.apply(calculate_dynamic_factor_return, axis=1)

# Calculate alpha and t-values for accounting data
accounting_results_8 = calculate_ff3_alpha_and_tvalues(accounting_data_resampled_8, max_lag=1)
# Calculate stats for return data
return_results_8 = calculate_ff3_alpha_and_tvalues(return_data_resampled_8, max_lag=12)

# Dsiplay results
print("\nAccounting factors (annual rebalancing):")
print(accounting_results_8.round(4))

print("\nReturn factors (monthly rebalancing):")
print(return_results_8.round(4))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()



Accounting factors (annual rebalancing):
                   Factor  FF3 Alpha  T-Value
0                Accruals     2.2930   1.3350
1            AnalystValue    -6.8087  -2.8550
2             AssetGrowth     3.0637   3.1404
3                      BM    -3.1577  -8.9570
4                   BPEBM    -0.3409  -0.1605
5            BookLeverage    -5.3331 -22.7514
6              CBOperProf     2.7114   0.8592
7                      CF   -10.2273  -4.1887
8         ChAssetTurnover    -1.5310  -1.7689
9                   ChNWC    -2.7247  -1.0012
10             CompEquIss     2.0206   0.8748
11  CompositeDebtIssuance    -7.3449  -6.4830
12                    EBM    -7.8155  -5.3184
13                     EP    -3.9997  -6.7482
14       EarningsSurprise     1.9416   3.1652
15                     GP     8.4199   3.6093
16                   Herf     2.0073   4.1105
17              InvGrowth     1.1586   0.3521
18                    NOA     3.8384  10.8106
19               OperProf     0.5956  

### 5.3 Regression 2 - FF5

In [38]:
# read the merged data
post_data_3 = pd.read_csv('data/merged_data_with_ff5_3.csv')
# divide by 100
port_columns = ['port01', 'port02', 'port03', 'port04', 'port05', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']
post_data_3[port_columns] = post_data_3[port_columns] / 100
# only retain columns that are needed
post_data_3 = post_data_3[['date', 'predictor'] + port_columns]

# Separate accounting and return data
accounting_data_9 = post_data_3[post_data_3['predictor'].isin(accounting_factors)]
return_data_9 = post_data_3[post_data_3['predictor'].isin(return_factors)]

In [39]:
# Resample accounting data annually
accounting_data_resampled_9 = resample_and_calculate(accounting_data_9, 'A')

# Resample return data monthly
return_data_resampled_9 = resample_and_calculate(return_data_9, 'M')

# Calculate dynamic factor return
accounting_data_resampled_9['factor_return'] = accounting_data_resampled_9.apply(calculate_dynamic_factor_return, axis=1)
return_data_resampled_9['factor_return'] = return_data_resampled_9.apply(calculate_dynamic_factor_return, axis=1)

# Calculate alpha and t-values for accounting data for FF5
accounting_results_9 = calculate_ff5_alpha_and_tvalues(accounting_data_resampled_9, max_lag=1)
# Calculate stats for return data for FF5
return_results_9 = calculate_ff5_alpha_and_tvalues(return_data_resampled_9, max_lag=12)

# Display results for FF5
print("\nAccounting factors (annual rebalancing) - FF5:")
print(accounting_results_9.round(4))

print("\nReturn factors (monthly rebalancing) - FF5:")
print(return_results_9.round(4))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()



Accounting factors (annual rebalancing) - FF5:
                   Factor  FF5 Alpha  T-Value
0                Accruals     1.0552  15.8763
1            AnalystValue    -4.2300  -1.0039
2             AssetGrowth    -6.9771 -18.8303
3                      BM    -2.8743 -14.6652
4                   BPEBM    -0.9187  -1.2294
5            BookLeverage    -4.5181  -3.3281
6              CBOperProf    -4.6434  -3.4164
7                      CF   -21.8642  -6.9187
8         ChAssetTurnover    -4.2547  -1.2284
9                   ChNWC   -17.3889 -10.0382
10             CompEquIss    -8.2143  -2.7239
11  CompositeDebtIssuance   -17.9511  -7.4080
12                    EBM   -15.4486  -6.9946
13                     EP     0.4341   0.1699
14       EarningsSurprise    -3.0109  -1.4495
15                     GP     9.1686  62.5820
16                   Herf     3.0969   3.8416
17              InvGrowth   -29.6955 -11.4080
18                    NOA     6.1038   9.3501
19               OperProf    -4.

### 5.4 Combine all the regressions

In [40]:
# combine the results
accounting_results_post = accounting_results_7.merge(accounting_results_8, on='Factor', suffixes=('_1', '_2')).merge(accounting_results_9, on='Factor', suffixes=('', '_3'))

# combine the results
return_results_post = return_results_7.merge(return_results_8, on='Factor', suffixes=('_1', '_2')).merge(return_results_9, on='Factor', suffixes=('', '_3'))

# display the results
print("\nAccounting factors (annual rebalancing):")
print(accounting_results_post.round(4))

print("\nReturn factors (monthly rebalancing):")
print(return_results_post.round(4))


Accounting factors (annual rebalancing):
                   Factor  Average Returns  T-Value_1  FF3 Alpha  T-Value_2  \
0                Accruals           3.0707     2.3000     2.2930     1.3350   
1            AnalystValue          -1.3983    -0.1554    -6.8087    -2.8550   
2             AssetGrowth          -3.5381    -0.6802     3.0637     3.1404   
3                      BM          -2.3012    -0.3411    -3.1577    -8.9570   
4                   BPEBM           7.5033     1.8866    -0.3409    -0.1605   
5            BookLeverage          -1.5729    -0.7963    -5.3331   -22.7514   
6              CBOperProf           8.8805     3.6424     2.7114     0.8592   
7                      CF          -4.3853    -0.6471   -10.2273    -4.1887   
8         ChAssetTurnover          -1.3721    -0.6764    -1.5310    -1.7689   
9                   ChNWC          -0.1080    -0.0441    -2.7247    -1.0012   
10             CompEquIss           5.4558     1.4655     2.0206     0.8748   
11  Compos

In [41]:
# save the results
accounting_results_post.to_csv('accounting_factors_annual_post_2017_2023.csv', index=False)
return_results_post.to_csv('return_factors_monthly_post_2017_2023.csv', index=False)

## 6. Regression in the 1963-2016 'original' time period

* In this section, calculations and regression studies will be conducted for the timeline 1963-2016.
* The calculation steps are the same as the previous section, and the results are output.

### 6.1 Preliminary analysis

In [42]:
# Read the merged data
original_data = pd.read_csv('data/merged_data_with_ff3_4.csv')
# Divide by 100
port_columns_1 = ['port01', 'port02', 'port03', 'port04', 'port05']
original_data[port_columns_1] = original_data[port_columns_1] / 100
# only retain columns that are needed
original_data = original_data[['date', 'predictor'] + port_columns_1]

# Separate accounting and return data
accounting_data_10 = original_data[original_data['predictor'].isin(accounting_factors)]
return_data_10 = original_data[original_data['predictor'].isin(return_factors)]
# Resample accounting data annually
accounting_data_resampled_10 = resample_and_calculate(accounting_data_10, 'A')

# Resample return data monthly
return_data_resampled_10 = resample_and_calculate(return_data_10, 'M')

# Calculate dynamic factor return
accounting_data_resampled_10['factor_return'] = accounting_data_resampled_10.apply(calculate_dynamic_factor_return, axis=1)
return_data_resampled_10['factor_return'] = return_data_resampled_10.apply(calculate_dynamic_factor_return, axis=1)

# Calculate stats for accounting data
accounting_results_10 = calculate_newey_west_t_value(accounting_data_resampled_10, max_lag=1)

# Calculate stats for return data
return_results_10 = calculate_newey_west_t_value(return_data_resampled_10, max_lag=12)

# Display results
print("\nAccounting factors (annual rebalancing):")
print(accounting_results_10.round(4))

print("\nReturn factors (monthly rebalancing):")
print(return_results_10.round(4))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()



Accounting factors (annual rebalancing):
                   Factor  Average Returns  T-Value
0                Accruals           2.1615   1.9205
1            AnalystValue           3.1375   1.4509
2             AssetGrowth           2.8832   2.0183
3                      BM           4.0902   2.7008
4                   BPEBM           2.1676   2.2706
5            BookLeverage          -0.7899  -0.5196
6              CBOperProf           2.8133   2.6004
7                      CF           3.3870   2.0365
8         ChAssetTurnover           2.0172   2.2672
9                   ChNWC           1.4237   1.8564
10             CompEquIss           2.7986   1.8020
11  CompositeDebtIssuance           1.6606   2.0889
12                    EBM           2.5648   1.6827
13                     EP           3.7493   2.3048
14       EarningsSurprise           1.7377   1.9847
15                     GP           2.5004   1.7160
16                   Herf           1.3938   1.0963
17              InvGro

### 6.2 Regression 1 - FF3

In [43]:
# read the merged data
original_data_2 = pd.read_csv('data/merged_data_with_ff3_4.csv')
# divide by 100
port_columns = ['port01', 'port02', 'port03', 'port04', 'port05', 'Mkt-RF', 'SMB', 'HML', 'RF']
original_data_2[port_columns] = original_data_2[port_columns] / 100
# only retain columns that are needed
original_data_2 = original_data_2[['date', 'predictor'] + port_columns]

# Separate accounting and return data
accounting_data_11 = original_data_2[original_data_2['predictor'].isin(accounting_factors)]
return_data_11 = original_data_2[original_data_2['predictor'].isin(return_factors)]

In [44]:
# Resample accounting data annually
accounting_data_resampled_11 = resample_and_calculate(accounting_data_11, 'A')

# Resample return data monthly
return_data_resampled_11 = resample_and_calculate(return_data_11, 'M')

# Calculate dynamic factor return
accounting_data_resampled_11['factor_return'] = accounting_data_resampled_11.apply(calculate_dynamic_factor_return, axis=1)
return_data_resampled_11['factor_return'] = return_data_resampled_11.apply(calculate_dynamic_factor_return, axis=1)

# Calculate alpha and t-values for accounting data
accounting_results_11 = calculate_ff3_alpha_and_tvalues(accounting_data_resampled_11, max_lag=1)
# Calculate stats for return data
return_results_11 = calculate_ff3_alpha_and_tvalues(return_data_resampled_11, max_lag=12)

# Dsiplay results
print("\nAccounting factors (annual rebalancing):")
print(accounting_results_11.round(4))

print("\nReturn factors (monthly rebalancing):")
print(return_results_11.round(4))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()



Accounting factors (annual rebalancing):
                   Factor  FF3 Alpha  T-Value
0                Accruals    -1.2268  -0.8509
1            AnalystValue    -3.5375  -1.7627
2             AssetGrowth    -4.3966  -3.9094
3                      BM    -5.7981  -5.1434
4                   BPEBM    -2.6225  -1.9449
5            BookLeverage    -1.6373  -1.0017
6              CBOperProf     0.9729   1.0191
7                      CF    -4.9085  -3.1751
8         ChAssetTurnover    -3.1395  -3.1794
9                   ChNWC    -2.8150  -3.1404
10             CompEquIss     0.2845   0.1880
11  CompositeDebtIssuance    -2.9208  -2.2690
12                    EBM    -7.7047  -9.3412
13                     EP    -6.3619  -5.4565
14       EarningsSurprise    -2.3650  -3.3660
15                     GP     0.0339   0.0180
16                   Herf    -3.4756  -2.2343
17              InvGrowth    -2.9281  -2.9424
18                    NOA    -3.7608  -2.6158
19               OperProf    -0.6302  

### 6.3 Regression 2 - FF5

In [45]:
# read the merged data
original_data_3 = pd.read_csv('data/merged_data_with_ff5_4.csv')
# divide by 100
port_columns = ['port01', 'port02', 'port03', 'port04', 'port05', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']
original_data_3[port_columns] = original_data_3[port_columns] / 100
# only retain columns that are needed
original_data_3 = original_data_3[['date', 'predictor'] + port_columns]

# Separate accounting and return data
accounting_data_12 = original_data_3[original_data_3['predictor'].isin(accounting_factors)]
return_data_12 = original_data_3[original_data_3['predictor'].isin(return_factors)]

In [46]:
# Resample accounting data annually
accounting_data_resampled_12 = resample_and_calculate(accounting_data_12, 'A')

# Resample return data monthly
return_data_resampled_12 = resample_and_calculate(return_data_12, 'M')

# Calculate dynamic factor return
accounting_data_resampled_12['factor_return'] = accounting_data_resampled_12.apply(calculate_dynamic_factor_return, axis=1)
return_data_resampled_12['factor_return'] = return_data_resampled_12.apply(calculate_dynamic_factor_return, axis=1)

# Calculate alpha and t-values for accounting data for FF5
accounting_results_12 = calculate_ff5_alpha_and_tvalues(accounting_data_resampled_12, max_lag=1)
# Calculate stats for return data for FF5
return_results_12 = calculate_ff5_alpha_and_tvalues(return_data_resampled_12, max_lag=12)

# Display results for FF5
print("\nAccounting factors (annual rebalancing) - FF5:")
print(accounting_results_12.round(4))

print("\nReturn factors (monthly rebalancing) - FF5:")
print(return_results_12.round(4))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'])
  df_resampled = df.set_index('date').groupby('predictor').resample(freq).apply(compound_returns).reset_index()



Accounting factors (annual rebalancing) - FF5:
                   Factor  FF5 Alpha  T-Value
0                Accruals     0.7805   0.5325
1            AnalystValue    -9.3033  -3.8593
2             AssetGrowth    -5.7178  -4.2401
3                      BM    -4.2823  -3.6382
4                   BPEBM    -1.0727  -0.7271
5            BookLeverage     0.7618   0.4053
6              CBOperProf    -0.2815  -0.3106
7                      CF    -4.1535  -2.2421
8         ChAssetTurnover    -3.3104  -2.3520
9                   ChNWC    -2.3667  -2.3427
10             CompEquIss     0.8863   0.4077
11  CompositeDebtIssuance    -2.5527  -1.9976
12                    EBM    -7.4816  -6.6767
13                     EP    -7.2165  -5.5511
14       EarningsSurprise    -2.5270  -2.0586
15                     GP    -2.3743  -1.3106
16                   Herf    -2.1972  -1.0904
17              InvGrowth    -3.0900  -3.1815
18                    NOA    -5.6445  -3.5354
19               OperProf    -5.

### 6.4 Combine all the regressions

In [47]:
# combine the results
accounting_results_original = accounting_results_10.merge(accounting_results_11, on='Factor', suffixes=('_1', '_2')).merge(accounting_results_12, on='Factor', suffixes=('', '_3'))

# combine the results
return_results_original = return_results_10.merge(return_results_11, on='Factor', suffixes=('_1', '_2')).merge(return_results_12, on='Factor', suffixes=('', '_3'))

# display the results
print("\nAccounting factors (annual rebalancing):")
print(accounting_results_original.round(4))

print("\nReturn factors (monthly rebalancing):")
print(return_results_original.round(4))


Accounting factors (annual rebalancing):
                   Factor  Average Returns  T-Value_1  FF3 Alpha  T-Value_2  \
0                Accruals           2.1615     1.9205    -1.2268    -0.8509   
1            AnalystValue           3.1375     1.4509    -3.5375    -1.7627   
2             AssetGrowth           2.8832     2.0183    -4.3966    -3.9094   
3                      BM           4.0902     2.7008    -5.7981    -5.1434   
4                   BPEBM           2.1676     2.2706    -2.6225    -1.9449   
5            BookLeverage          -0.7899    -0.5196    -1.6373    -1.0017   
6              CBOperProf           2.8133     2.6004     0.9729     1.0191   
7                      CF           3.3870     2.0365    -4.9085    -3.1751   
8         ChAssetTurnover           2.0172     2.2672    -3.1395    -3.1794   
9                   ChNWC           1.4237     1.8564    -2.8150    -3.1404   
10             CompEquIss           2.7986     1.8020     0.2845     0.1880   
11  Compos

In [48]:
# save the results
accounting_results_original.to_csv('accounting_factors_annual_original_1963_2016.csv', index=False)
return_results_original.to_csv('return_factors_monthly_original_1963_2016.csv', index=False)