In [5]:
import pandas as pd
import numpy as np
from scipy import stats
import statsmodels.api as sm
import matplotlib.pyplot as plt
from itertools import combinations

In [6]:
table2_data = pd.read_csv("data/merged_data.csv")
# only need the rows with the data, first 4 rows and last 1 roe are not needed
table2_ff3 = pd.read_csv("data/FF3_daily.csv", skiprows=4, skipfooter=1, engine='python')
table2_ff5 = pd.read_csv("data/FF5_daily.csv", skiprows=4, skipfooter=1, engine='python')

# Rename columns to ensure 'date' column is correctly named
table2_ff3.columns = ['date', 'Mkt-RF', 'SMB', 'HML', 'RF']
table2_ff5.columns = ['date', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']

table2_data['date'] = pd.to_datetime(table2_data['date'])
table2_ff3['date'] = pd.to_datetime(table2_ff3['date'], format='%Y%m%d')
table2_ff5['date'] = pd.to_datetime(table2_ff5['date'], format='%Y%m%d')

In [None]:
# Time frame for the data
start_date = '1999-07-01'
end_date = '2023-12-31'
table2_data = table2_data[(table2_data['date'] >= start_date) & (table2_data['date'] <= end_date)]

# Get the remaining factors belonging to the time frame
unique_predictors = table2_data['predictor'].unique()
print(f'The list of predictors is: \n{unique_predictors}')
print(f'The number of predictors we selected are: {len(unique_predictors)}')

The list of predictors is: 
['Accruals' 'AnalystValue' 'AssetGrowth' 'BM' 'BPEBM' 'Beta'
 'BetaLiquidityPS' 'BookLeverage' 'CBOperProf' 'CF' 'CPVolSpread'
 'ChAssetTurnover' 'ChNWC' 'CompEquIss' 'CompositeDebtIssuance'
 'Coskewness' 'CustomerMomentum' 'DolVol' 'EBM' 'EP' 'EarningsSurprise'
 'FirmAge' 'Frontier' 'GP' 'Herf' 'High52' 'IdioVol3F' 'Illiquidity'
 'IntMom' 'InvGrowth' 'LRreversal' 'MaxRet' 'Mom12m' 'Mom6m' 'Mom6mJunk'
 'MomOffSeason' 'MomOffSeason06YrPlus' 'MomOffSeason11YrPlus'
 'MomOffSeason16YrPlus' 'MomSeason' 'MomSeason06YrPlus'
 'MomSeason11YrPlus' 'MomSeason16YrPlus' 'MomSeasonShort' 'NOA' 'OperProf'
 'PS' 'RDAbility' 'RIVolSpread' 'ResidualMomentum' 'RoE' 'SP' 'STreversal'
 'ShareIss1Y' 'ShareIss5Y' 'Size' 'VolMkt' 'VolSD' 'XFIN' 'cfp' 'roaq'
 'std_turn']
The number of predictors we selected are: 62


In [8]:
# first get the monthly returns for each stock, sum of the daily returns by month
table2_data['month'] = table2_data['date'].dt.month
table2_data['year'] = table2_data['date'].dt.year
table2_data['month_year'] = table2_data['date'].dt.to_period('M')
# sum the daily returns by month
table2_data_monthly = table2_data.groupby(['month_year', 'predictor'])[['port01','port02','port03','port04','port05','portLS']].sum().reset_index()
table2_data_monthly['date'] = table2_data_monthly['month_year'].dt.to_timestamp()
table2_data_monthly = table2_data_monthly.drop(['month_year'], axis=1)
table2_data_monthly = table2_data_monthly.sort_values(by=['predictor','date'])
table2_data_monthly = table2_data_monthly.reset_index(drop=True)
# put date in first column
cols = table2_data_monthly.columns.tolist()
cols = cols[-1:] + cols[:-1]
table2_data_monthly = table2_data_monthly[cols]
table2_data_monthly.set_index('date', inplace=True)
# all the data divide by 100
table2_data_monthly[['port01','port02','port03','port04','port05','portLS']] = table2_data_monthly[['port01','port02','port03','port04','port05','portLS']].div(100)
table2_data_monthly

Unnamed: 0_level_0,predictor,port01,port02,port03,port04,port05,portLS
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1999-07-01,Accruals,-0.042332,-0.033650,-0.024488,-0.026960,-0.020499,0.021833
1999-08-01,Accruals,-0.041417,-0.026728,-0.004585,-0.000784,0.009333,0.050751
1999-09-01,Accruals,-0.007618,-0.058277,-0.029534,-0.020111,-0.003396,0.004222
1999-10-01,Accruals,0.054138,0.078025,0.054857,0.054379,0.063649,0.009511
1999-11-01,Accruals,0.020576,0.023920,0.005167,0.038106,0.046697,0.026122
...,...,...,...,...,...,...,...
2023-08-01,std_turn,-0.188815,-0.068048,-0.025704,-0.032045,-0.022582,0.166233
2023-09-01,std_turn,-0.067067,-0.050836,-0.034021,-0.052222,-0.044330,0.022738
2023-10-01,std_turn,-0.138222,-0.100143,-0.061419,-0.033710,-0.044634,0.093587
2023-11-01,std_turn,0.103273,0.050170,0.109865,0.095393,0.092045,-0.011228


## Table 2
### Panel A

In [9]:
# get the data for table2 Panel A
# Pivot the data to have each predictor's portLS as a column
factor_returns = table2_data_monthly.pivot(columns='predictor', values='portLS')

In [10]:
# Define the target date range
start_date = '2000-01-01'
end_date = '2023-12-31'
factor_returns = factor_returns.loc[start_date:end_date]

# Step 2: Define Factor Momentum Strategy Function
def factor_momentum_strategy(returns, L, H, n_long_short):

    strategy_returns = pd.Series(index=returns.index, dtype=float)
    N = returns.shape[1]  # Fixed at 62
    
    for t in range(L + H - 1, len(returns)):
        # Formation period: t-L to t-1 (using full data history up to t)
        past_returns = returns.iloc[:t].iloc[-L:].mean()  # Average return over last L months
        # Rank factors
        ranked_factors = past_returns.sort_values(ascending=False)
        # Select top and bottom n_long_short factors
        long_factors = ranked_factors.index[:n_long_short]
        short_factors = ranked_factors.index[-n_long_short:]
        
        # Holding period: t to t+H-1
        if H == 1:
            # For H=1, take the next month's return
            long_returns = returns.iloc[t][long_factors].mean()
            short_returns = returns.iloc[t][short_factors].mean()
            strategy_returns.iloc[t] = long_returns - short_returns
        else:
            # For H>1, average over H sub-strategies (Jegadeesh and Titman 1993)
            holding_returns = []
            for h in range(H):
                if t-h >= 0 and t-h+L <= len(returns):
                    past_h = returns.iloc[:t-h].iloc[-L:].mean()
                    ranked_h = past_h.sort_values(ascending=False)
                    long_h = ranked_h.index[:n_long_short]
                    short_h = ranked_h.index[-n_long_short:]
                    if t+H-1 < len(returns):
                        future_h = returns.iloc[t:t+H][long_h].mean().mean() - returns.iloc[t:t+H][short_h].mean().mean()
                        holding_returns.append(future_h)
            strategy_returns.iloc[t] = np.mean(holding_returns) if holding_returns else np.nan
    
    return strategy_returns

In [11]:
# Step 3: Compute Strategies
N = 62  # Fixed number of factors
n = max(round(3/20 * N), 1)  # n = 9

# L=1, H=1
strat_1_1 = factor_momentum_strategy(factor_returns, L=1, H=1, n_long_short=n)
# L=6, H=6
strat_6_6 = factor_momentum_strategy(factor_returns, L=6, H=6, n_long_short=n)

# Step 4: Panel A - Compute Annualized Returns, Std Dev, and t-values
def compute_statistics(returns, H=1):
    returns = returns.dropna()
    # Annualized return: Monthly mean * 12
    ann_return = returns.mean() * 12 * 100  # In percentage
    # Annualized standard deviation: Monthly std * sqrt(12)
    ann_std = returns.std() * np.sqrt(12) * 100  # In percentage
    # t-value: Rough adjustment for overlap
    effective_n = len(returns) if H == 1 else len(returns) / H
    t_value = returns.mean() / (returns.std() / np.sqrt(effective_n))
    return ann_return, ann_std, t_value

ann_ret_1_1, ann_std_1_1, t_val_1_1 = compute_statistics(strat_1_1, H=1)
ann_ret_6_6, ann_std_6_6, t_val_6_6 = compute_statistics(strat_6_6, H=6)

# Store results
results = {
    'L=1, H=1': {'Return': ann_ret_1_1, 'Std': ann_std_1_1, 't-value': t_val_1_1},
    'L=6, H=6': {'Return': ann_ret_6_6, 'Std': ann_std_6_6, 't-value': t_val_6_6}
}

# Print Panel A Results
print("\nPanel A: Factor Momentum Strategy Returns")
for strategy in ['L=1, H=1', 'L=6, H=6']:
    stats = results[strategy]
    print(f"{strategy}: Return = {stats['Return']:.2f}%, Std = {stats['Std']:.2f}%, t-value = {stats['t-value']:.2f}")


Panel A: Factor Momentum Strategy Returns
L=1, H=1: Return = 5.66%, Std = 25.49%, t-value = 1.09
L=6, H=6: Return = 1.52%, Std = 6.63%, t-value = 0.45


### Panel B

In [None]:
start_date = '2000-01-01'
end_date = '2023-12-31'
table2_ff3 = table2_ff3[(table2_ff3['date'] >= start_date) & (table2_ff3['date'] <= end_date)]
table2_ff5 = table2_ff5[(table2_ff5['date'] >= start_date) & (table2_ff5['date'] <= end_date)]


Unnamed: 0,date,Mkt-RF,SMB,HML,RMW,CMA,RF
9063,1999-07-01,0.55,-0.82,-0.34,-0.20,0.57,0.018
9064,1999-07-02,0.73,-0.03,-0.58,-0.06,-0.28,0.018
9065,1999-07-06,-0.08,0.28,-0.06,-0.51,-0.15,0.018
9066,1999-07-07,0.22,-0.64,-0.30,0.36,0.56,0.018
9067,1999-07-08,0.03,0.43,-0.42,-0.79,-0.27,0.018
...,...,...,...,...,...,...,...
15223,2023-12-22,0.20,0.61,0.10,-0.65,0.20,0.021
15224,2023-12-26,0.48,0.83,0.43,-0.33,-0.16,0.021
15225,2023-12-27,0.16,0.17,0.10,-0.32,-0.14,0.021
15226,2023-12-28,-0.01,-0.38,0.02,-0.32,0.15,0.021
