In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import statsmodels.api as sm
import matplotlib.pyplot as plt
from itertools import combinations
from datetime import datetime

In [2]:
table2_data = pd.read_csv("data/merged_data.csv")
# only need the rows with the data, first 4 rows and last 1 roe are not needed
table2_ff3 = pd.read_csv("data/FF3_daily.csv", skiprows=4, skipfooter=1, engine='python')
table2_ff5 = pd.read_csv("data/FF5_daily.csv", skiprows=4, skipfooter=1, engine='python')

# Rename columns to ensure 'date' column is correctly named
table2_ff3.columns = ['date', 'Mkt-RF', 'SMB', 'HML', 'RF']
table2_ff5.columns = ['date', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']

table2_data['date'] = pd.to_datetime(table2_data['date'])
table2_ff3['date'] = pd.to_datetime(table2_ff3['date'], format='%Y%m%d')
table2_ff5['date'] = pd.to_datetime(table2_ff5['date'], format='%Y%m%d')

In [3]:
# Time frame for the data
start_date = '1999-07-01'
end_date = '2023-12-31'
table2_data = table2_data[(table2_data['date'] >= start_date) & (table2_data['date'] <= end_date)]

# Get the remaining factors belonging to the time frame
unique_predictors = table2_data['predictor'].unique()
print(f'The list of predictors is: \n{unique_predictors}')
print(f'The number of predictors we selected are: {len(unique_predictors)}')

The list of predictors is: 
['Accruals' 'AnalystValue' 'AssetGrowth' 'BM' 'BPEBM' 'Beta'
 'BetaLiquidityPS' 'BookLeverage' 'CBOperProf' 'CF' 'CPVolSpread'
 'ChAssetTurnover' 'ChNWC' 'CompEquIss' 'CompositeDebtIssuance'
 'Coskewness' 'CustomerMomentum' 'DolVol' 'EBM' 'EP' 'EarningsSurprise'
 'FirmAge' 'Frontier' 'GP' 'Herf' 'High52' 'IdioVol3F' 'Illiquidity'
 'IntMom' 'InvGrowth' 'LRreversal' 'MaxRet' 'Mom12m' 'Mom6m' 'Mom6mJunk'
 'MomOffSeason' 'MomOffSeason06YrPlus' 'MomOffSeason11YrPlus'
 'MomOffSeason16YrPlus' 'MomSeason' 'MomSeason06YrPlus'
 'MomSeason11YrPlus' 'MomSeason16YrPlus' 'MomSeasonShort' 'NOA' 'OperProf'
 'PS' 'RDAbility' 'RIVolSpread' 'ResidualMomentum' 'RoE' 'SP' 'STreversal'
 'ShareIss1Y' 'ShareIss5Y' 'Size' 'VolMkt' 'VolSD' 'XFIN' 'cfp' 'roaq'
 'std_turn']
The number of predictors we selected are: 62


In [4]:
# first get the monthly returns for each stock, sum of the daily returns by month
table2_data['month'] = table2_data['date'].dt.month
table2_data['year'] = table2_data['date'].dt.year
table2_data['month_year'] = table2_data['date'].dt.to_period('M')
# sum the daily returns by month
table2_data_monthly = table2_data.groupby(['month_year', 'predictor'])[['port01','port02','port03','port04','port05','portLS']].sum().reset_index()
table2_data_monthly['date'] = table2_data_monthly['month_year'].dt.to_timestamp()
table2_data_monthly = table2_data_monthly.drop(['month_year'], axis=1)
table2_data_monthly = table2_data_monthly.sort_values(by=['predictor','date'])
table2_data_monthly = table2_data_monthly.reset_index(drop=True)
# put date in first column
cols = table2_data_monthly.columns.tolist()
cols = cols[-1:] + cols[:-1]
table2_data_monthly = table2_data_monthly[cols]
table2_data_monthly.set_index('date', inplace=True)
# divide 100
table2_data_monthly[['port01', 'port02', 'port03', 'port04', 'port05', 'portLS']] = table2_data_monthly[['port01', 'port02', 'port03', 'port04', 'port05', 'portLS']] / 100
table2_data_monthly['portLS2'] = table2_data_monthly['port05'] + table2_data_monthly['port04'] - table2_data_monthly['port02'] - table2_data_monthly['port01']
table2_data_monthly

Unnamed: 0_level_0,predictor,port01,port02,port03,port04,port05,portLS,portLS2
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1999-07-01,Accruals,-0.042332,-0.033650,-0.024488,-0.026960,-0.020499,0.021833,0.028523
1999-08-01,Accruals,-0.041417,-0.026728,-0.004585,-0.000784,0.009333,0.050751,0.076694
1999-09-01,Accruals,-0.007618,-0.058277,-0.029534,-0.020111,-0.003396,0.004222,0.042388
1999-10-01,Accruals,0.054138,0.078025,0.054857,0.054379,0.063649,0.009511,-0.014135
1999-11-01,Accruals,0.020576,0.023920,0.005167,0.038106,0.046697,0.026122,0.040308
...,...,...,...,...,...,...,...,...
2023-08-01,std_turn,-0.188815,-0.068048,-0.025704,-0.032045,-0.022582,0.166233,0.202236
2023-09-01,std_turn,-0.067067,-0.050836,-0.034021,-0.052222,-0.044330,0.022738,0.021352
2023-10-01,std_turn,-0.138222,-0.100143,-0.061419,-0.033710,-0.044634,0.093587,0.160021
2023-11-01,std_turn,0.103273,0.050170,0.109865,0.095393,0.092045,-0.011228,0.033995


## Table 2
### Panel A

In [None]:
# get the data for table2 Panel A
# Pivot the data to have each predictor's portLS as a column
table2_LS1 = table2_data_monthly.pivot(columns='predictor', values='portLS') # long top 20%, short bottom 20%
table2_LS2 = table2_data_monthly.pivot(columns='predictor', values='portLS2') # Long top 40%, short bottom 40%

In [6]:

# Step 2: Define Factor Momentum Strategy Function
def factor_momentum_strategy(returns, L, H, n_long_short):

    strategy_returns = pd.Series(index=returns.index, dtype=float)
    N = returns.shape[1]  
    for t in range(L, len(returns)):
        # Formation period: t-L to t-1 (using full data history up to t)
        past_returns = returns.iloc[t-L:t].mean()  # Average return over last L months
        # Rank factors
        ranked_factors = past_returns.sort_values(ascending=False)
        # Select top and bottom n_long_short factors
        long_factors = ranked_factors.index[:n_long_short]
        short_factors = ranked_factors.index[-n_long_short:]
        
        # Holding period: t to t+H-1
        if H == 1:
            # For H=1, take the next month's return
            long_returns = returns.iloc[t][long_factors].mean()
            short_returns = returns.iloc[t][short_factors].mean()
            strategy_returns.iloc[t] = long_returns - short_returns
        else:
            # For H>1, average over H sub-strategies (Jegadeesh and Titman 1993)
            holding_returns = []
            for h in range(H):
                if t-h >= 0 and t-h+L <= len(returns):
                    past_h = returns.iloc[:t-h].iloc[-L:].mean()
                    ranked_h = past_h.sort_values(ascending=False)
                    long_h = ranked_h.index[:n_long_short]
                    short_h = ranked_h.index[-n_long_short:]
                    if t+H-1 < len(returns):
                        future_h = returns.iloc[t:t+H][long_h].mean().mean() - returns.iloc[t:t+H][short_h].mean().mean()
                        holding_returns.append(future_h)
            strategy_returns.iloc[t] = np.mean(holding_returns) if holding_returns else np.nan
    
    return strategy_returns

In [7]:
# Step 3: Compute Strategies
N = len(table2_data_monthly['predictor'].unique())  # Fixed number of factors
n = max(round(3/20 * N), 1)  # n = 9

# Long TOP 20% and Short BOTTOM 20% of factors
# L=1, H=1
LS1_1_1 = factor_momentum_strategy(table2_LS1, L=1, H=1, n_long_short=n)
# L=6, H=6
LS1_6_6 = factor_momentum_strategy(table2_LS1, L=6, H=6, n_long_short=n)

# Long TOP 40% and Short BOTTOM 40% of factors
# L=1, H=1
LS2_1_1 = factor_momentum_strategy(table2_LS2, L=1, H=1, n_long_short=n)
# L=6, H=6
LS2_6_6 = factor_momentum_strategy(table2_LS2, L=6, H=6, n_long_short=n)


In [17]:
# Step 4: Panel A - Compute Annualized Returns, Std Dev, and t-values
def compute_statistics(returns, H):
    returns = returns.dropna()
    # Annualized return: Monthly mean * 12
    ann_return = returns.mean() * 12 * 100  # In percentage
    # Annualized standard deviation: Monthly std * sqrt(12)
    ann_std = returns.std() * np.sqrt(12) *100 
    # t-value
    t_value = returns.mean() / ( (returns.std() / np.sqrt(len(returns) )))
    return ann_return, ann_std, t_value

In [19]:

def filter_returns(returns, start_date, end_date):
    return returns[(returns.index >= start_date) & (returns.index <= end_date)].dropna()

results = []
strategies = {
    'LS1_1_1': {'L': 1, 'H': 1, 'Factor': 'LS1'},
    'LS1_6_6': {'L': 6, 'H': 6, 'Factor': 'LS1'},
    'LS2_1_1': {'L': 1, 'H': 1, 'Factor': 'LS2'},
    'LS2_6_6': {'L': 6, 'H': 6, 'Factor': 'LS2'}
}

time_periods = [
    ('all', datetime(2000, 1, 1), datetime(2023, 12, 31)),
    ('sample', datetime(2000, 1, 1), datetime(2016, 12, 31)),
    ('post', datetime(2017, 1, 1), datetime(2023, 12, 31))
]

for period_name, start_date, end_date in time_periods:
    for strategy_name, params in strategies.items():
        L, H, factor = params['L'], params['H'], params['Factor']
        filtered_returns = filter_returns(globals()[strategy_name], start_date, end_date)
        ann_return, ann_std, t_value = compute_statistics(filtered_returns, H)
        results.append({
            'Factor': factor,
            'Formation period': L,
            'Holding period': H,
            'Period': period_name,
            'Annualized return': ann_return,
            'Standard deviation': ann_std,
            't-value': t_value
        })


table2_pa_results = pd.DataFrame(results)
table2_pa_results['Period'] = pd.Categorical(table2_pa_results['Period'], categories=['all', 'sample', 'post'], ordered=True)
table2_pa_results = table2_pa_results.sort_values(by=['Period', 'Factor', 'Formation period', 'Holding period'])
table2_pa_results = table2_pa_results[['Period', 'Factor', 'Formation period', 'Holding period', 'Annualized return', 'Standard deviation', 't-value']]
table2_pa_results = table2_pa_results.round(2) 

print("Panel A: Annualized percent returns and standard deviations")
table2_pa_results

Panel A: Annualized percent returns and standard deviations


Unnamed: 0,Period,Factor,Formation period,Holding period,Annualized return,Standard deviation,t-value
0,all,LS1,1,1,5.64,25.44,1.09
1,all,LS1,6,6,0.58,6.99,0.4
2,all,LS2,1,1,12.12,38.69,1.54
3,all,LS2,6,6,1.94,11.32,0.83
4,sample,LS1,1,1,5.25,27.5,0.79
5,sample,LS1,6,6,0.98,7.46,0.54
6,sample,LS2,1,1,11.46,41.51,1.14
7,sample,LS2,6,6,1.94,11.97,0.67
8,post,LS1,1,1,6.61,19.69,0.89
9,post,LS1,6,6,-0.45,5.6,-0.21


### Panel B

In [10]:
start_date = '2000-01-01'
end_date = '2023-12-31'
table2_ff3 = table2_ff3[(table2_ff3['date'] >= start_date) & (table2_ff3['date'] <= end_date)]
table2_ff5 = table2_ff5[(table2_ff5['date'] >= start_date) & (table2_ff5['date'] <= end_date)]
