### Packages

In [None]:
import pandas as pd
import numpy as np

import statsmodels.api as sm

from tqdm import tqdm

from datetime import datetime

### Data

In [81]:
# 48 Industry Portfolios, monthly and daily

monthly = pd.read_csv('48_Industry_Portfolios.csv', skiprows=11, nrows=1182, index_col=0)
monthly.index = pd.to_datetime(monthly.index, format='%Y%m')

monthly.replace(-99.99, np.nan, inplace=True)
monthly.replace(-999, np.nan, inplace=True)

daily = pd.read_csv('48_Industry_Portfolios_Daily.csv', skiprows=9, nrows=25901, index_col=0)
daily.index = pd.to_datetime(daily.index, format='%Y%m%d')

daily.replace(-99.99, np.nan, inplace=True)
daily.replace(-999, np.nan, inplace=True)

# 48 Industry Portfolios, number of firms and avg firm size

num_firms = pd.read_csv('48_Industry_Portfolios.csv', skiprows=2587, nrows=1182, index_col=0)
num_firms.index = pd.to_datetime(num_firms.index, format='%Y%m')

num_firms.replace(-99.99, np.nan, inplace=True)
num_firms.replace(-999, np.nan, inplace=True)

firm_size = pd.read_csv('48_Industry_Portfolios.csv', skiprows=3773, nrows=1182, index_col=0)
firm_size.index = pd.to_datetime(firm_size.index, format='%Y%m')

firm_size.replace(-99.99, np.nan, inplace=True)
firm_size.replace(-999, np.nan, inplace=True)

# Sum of BE / sum of ME

b2m = pd.read_csv('48_Industry_Portfolios.csv', skiprows=4959, nrows=99, index_col=0)
b2m.index = pd.to_datetime(b2m.index, format='%Y')

# Factor data, monthly and daily

factor_m = pd.read_csv('F-F_Research_Data_Factors.csv', skiprows=3, nrows=1182, index_col=0)
factor_m.index = pd.to_datetime(factor_m.index, format='%Y%m')

factor_d = pd.read_csv('F-F_Research_Data_Factors_daily.csv', skiprows=3, nrows=25901, index_col=0)
factor_d.index = pd.to_datetime(factor_d.index, format='%Y%m%d')

factor_4 = pd.read_csv('F-F_Momentum_Factor.csv', skiprows=13, nrows=1176, index_col=0)
factor_4.index = pd.to_datetime(factor_4.index, format='%Y%m')
factor_4 = pd.concat([factor_m, factor_4], axis=1)

factor_5 = pd.read_csv('F-F_Research_Data_5_Factors_2x3.csv', skiprows=3, nrows=738, index_col=0)
factor_5.index = pd.to_datetime(factor_5.index, format='%Y%m')

# Risk-free rate
monthly_rf = factor_m[['RF']]
daily_rf = factor_d[['RF']]

### Computations

In [82]:
# Calculating market capitalization
mkt_cap = num_firms * firm_size

# Book to market ratio
if b2m.index[-1] < pd.Timestamp('2024-12-01'):
    b2m.index += pd.DateOffset(months=6)
    b2m = b2m.resample('MS').ffill()

    extra_index = pd.date_range(start=b2m.index[-1] + pd.DateOffset(months=1),
                                end='2024-12-01', freq='MS')
    if not extra_index.empty:
        extra_b2m = pd.DataFrame(np.tile(b2m.iloc[-1].values, (len(extra_index), 1)),
                                 index=extra_index, columns=b2m.columns)
        b2m = pd.concat([b2m, extra_b2m])

b2m.replace(-99.99, np.nan, inplace=True)
b2m.replace(-999, np.nan, inplace=True)

# Momentum
mom = monthly.rolling(12).mean()

In [None]:
# Beta

indices = pd.date_range("1926-07-30", "2024-12-31", freq = "ME")
betas = pd.DataFrame(index=indices, columns=daily.columns)

for industry, industry_timeseries in tqdm(daily.items()):
    # skip first 12 months since we need a 12 month lookback window
    for i in range(12, len(indices)):
        
        # start and end date
        # this ensures ranges of following form for example: Sep 1 to Aug 31 
        start = indices[i-12] + pd.Timedelta(days = 1)
        end = indices[i]
        
        X = factor_d.loc[start:end,["Mkt-RF"]]
        y = industry_timeseries.loc[start:end] - factor_d.loc[start:end, "RF"]
        
        X = sm.add_constant(X)
        model = sm.OLS(y, X, missing='none').fit()
        beta = model.params["Mkt-RF"]
        
        betas.loc[end, industry] = beta
    
# convert the indices from month ends to starts of months
betas = betas.resample("MS").last()

0it [00:00, ?it/s]

48it [02:12,  2.75s/it]


In [84]:
# Idiosyncratic risk

# Merge the daily returns with Fama-French factor data (including RF)
data = daily.join(factor_d[['RF', 'Mkt-RF', 'SMB', 'HML']], how='inner')

# Compute idiosyncratic volatility for each industry within a month
def calc_ivol(month_df):
    
    if month_df.empty:
        return pd.Series({industry: np.nan for industry in daily.columns})
    idio_vol = {}
    for industry in daily.columns:
        y = month_df[industry] - month_df['RF']  # Excess return
        X = month_df[['Mkt-RF', 'SMB', 'HML']]

        X = sm.add_constant(X)
        valid = X.notna().all(axis=1) & y.notna()

        if valid.sum() == 0:
            idio_vol[industry] = np.nan
        else:
            model = sm.OLS(y[valid], X[valid])
            results = model.fit()
            idio_vol[industry] = results.resid.std(ddof=1)  # Sample std deviation of residuals
    return pd.Series(idio_vol)

ivol_monthly = data.groupby(pd.Grouper(freq='ME')).apply(calc_ivol)
ivol_monthly.index = ivol_monthly.index.to_period('M').to_timestamp(how='start')

### Top/bottom 5 performance

In [85]:
def rank(df, n=5):
    ranks = df.rank(axis=1, ascending=False, method='first')
    positions = pd.DataFrame(0, index=df.index, columns=df.columns)
    
    positions[ranks <= n] = 1
    
    bottom_threshold = ranks.max(axis=1) - n
    positions[ranks.ge(bottom_threshold, axis=0)] = -1
    
    return positions


In [86]:
# long-short portfolio positions
pos_mkt_cap = rank(mkt_cap, 5)
pos_b2m = rank(b2m, 5)
pos_mom = rank(mom, 5)
pos_betas = rank(betas, 5)
pos_ivol = rank(ivol_monthly, 5)

In [87]:
def ls_returns(returns, pos, equal_weighted=True, mkt_cap=None):
    """
    Compute long-short portfolio returns by first calculating weights from positions
    and (optionally) market capitalization, and then applying these weights on returns.
    
    Parameters:
      returns: DataFrame
          DataFrame of asset returns (indexed by date).
      pos: DataFrame
          DataFrame of portfolio positions (1 for long, -1 for short, 0 for neutral).
      equal_weighted: bool, default True
          If True, assign equal weights to all long/short positions.
          If False, use market capitalization weights.
      mkt_cap: DataFrame, optional
          DataFrame of market capitalizations. Required if equal_weighted is False.
          
    Returns:
      Series
          Series of portfolio returns indexed by date.
    """
    if not equal_weighted and mkt_cap is None:
        raise ValueError("For value-weighted returns, 'mkt_cap' must be provided.")
    
    # Compute weights based on positions
    weights = pd.DataFrame(index=pos.index, columns=pos.columns)
    
    for date in pos.index:
        longs = pos.loc[date] == 1
        shorts = pos.loc[date] == -1
        
        if equal_weighted:
            if longs.sum() > 0:
                weights.loc[date, longs] = 1 / longs.sum()
            if shorts.sum() > 0:
                weights.loc[date, shorts] = -1 / shorts.sum()
        else:
            # Value weighted using market capitalization
            long_cap = mkt_cap.loc[date, longs]
            short_cap = mkt_cap.loc[date, shorts]
            
            if long_cap.sum() > 0:
                weights.loc[date, longs] = long_cap / long_cap.sum()
            if short_cap.sum() > 0:
                weights.loc[date, shorts] = - short_cap / short_cap.sum()
    
    # Calculate portfolio returns using weights lagged by one period
    shifted_weights = weights.shift(1)
    portfolio_returns = (shifted_weights * returns).sum(axis=1)
    
    return portfolio_returns

In [88]:
# Calculate long-short portfolio returns
ls_mkt_cap = ls_returns(monthly, pos_mkt_cap, equal_weighted=True)
ls_b2m = ls_returns(monthly, pos_b2m, equal_weighted=True)
ls_mom = ls_returns(monthly, pos_mom, equal_weighted=True)
ls_betas = ls_returns(monthly, pos_betas, equal_weighted=True)
ls_ivol = ls_returns(monthly, pos_ivol, equal_weighted=True)

# Calculate long-short portfolio returns for value-weighted portfolios
ls_mkt_cap_vw = ls_returns(monthly, pos_mkt_cap, equal_weighted=False, mkt_cap=mkt_cap)
ls_b2m_vw = ls_returns(monthly, pos_b2m, equal_weighted=False, mkt_cap=mkt_cap)
ls_mom_vw = ls_returns(monthly, pos_mom, equal_weighted=False, mkt_cap=mkt_cap)
ls_betas_vw = ls_returns(monthly, pos_betas, equal_weighted=False, mkt_cap=mkt_cap)
ls_ivol_vw = ls_returns(monthly, pos_ivol, equal_weighted=False, mkt_cap=mkt_cap)

# Collect returns in a dataframe for equal-weighted returns
ls_returns_ew = pd.DataFrame({
    'EW Market Capitalization': ls_mkt_cap,
    'EW Book-to-Market': ls_b2m,
    'EW Momentum': ls_mom,
    'EW Beta': ls_betas,
    'EW Idiosyncratic Volatility': ls_ivol},
    index = monthly.index)

# Collect returns in a dataframe for value-weighted returns
ls_returns_vw = pd.DataFrame({
    'VW Market Capitalization': ls_mkt_cap_vw,
    'VW Book-to-Market': ls_b2m_vw,
    'VW Momentum': ls_mom_vw,
    'VW Beta': ls_betas_vw,
    'VW Idiosyncratic Volatility': ls_ivol_vw},
    index = monthly.index)

returns = pd.concat([ls_returns_ew, ls_returns_vw], axis=1)

returns.index.name = 'Date'
returns = returns.apply(pd.to_numeric, errors='coerce')

### Performance Comparison

In [89]:
# Functions for Sharpe ratio, and alpha according to Fama-French models
def avg_return(returns):
    return returns.mean() * 12

def sharpe_ratio(returns, risk_free_rate):
    excess_returns = returns - risk_free_rate
    return np.sqrt(12) * excess_returns.mean() / excess_returns.std()

def alpha_ff(returns, factors):
    X = sm.add_constant(factors)
    model = sm.OLS(returns, X).fit()
    return 12 * model.params[0]

def alpha_p_val(returns, factors):
    X = sm.add_constant(factors)
    model = sm.OLS(returns, X).fit()
    return model.pvalues[0]

In [None]:
start_date = [datetime(year = 1950, month = 1, day = 1), datetime(year = 1990, month = 1, day = 1), datetime(year = 2000, month = 1, day = 1)]
results = [np.nan, np.nan, np.nan]

for i in range(len(start_date)):
    
    start = start_date[i]
    # select current period for returns
    mask = returns.index >= start
    current_portfolio = returns[mask]
    
    # select current period for alpha
    current_ff3 = factor_m.loc[factor_m.index >= start, ["Mkt-RF", "SMB", "HML", "RF"]]
    current_factor_4 = factor_4.loc[factor_4.index >= start, ["Mkt-RF", "SMB", "HML", "Mom   ", "RF"]]
    current_factor_5 = factor_5.loc[(factor_5.index >= max(start, datetime(1963, 7, 1))), ["Mkt-RF", "SMB", "RMW", "CMA", "RF"]]
    
    mean_rets= current_portfolio.apply(avg_return, axis = 0)
    sharpe_ratios = current_portfolio.apply(lambda x: sharpe_ratio(x, factor_m["RF"]), axis = 0)

    alpha_ff3 = current_portfolio.apply(lambda x: alpha_ff(x - current_ff3["RF"], current_ff3[["Mkt-RF", "SMB", "HML"]]), axis = 0)
    p_vals_ff3 = current_portfolio.apply(lambda x: alpha_p_val(x - current_ff3["RF"], current_ff3[["Mkt-RF", "SMB", "HML"]]), axis = 0)

    alpha_factor_4 = current_portfolio.apply(lambda x: alpha_ff(x - current_factor_4["RF"], current_factor_4[["Mkt-RF", "SMB", "HML", "Mom   "]]), axis = 0)
    p_vals_factor_4 = current_portfolio.apply(lambda x: alpha_p_val(x - current_ff3["RF"], current_ff3[["Mkt-RF", "SMB", "HML"]]), axis = 0)
    
    alpha_factor_5 = current_portfolio.loc[(current_portfolio.index >= datetime(1963, 7, 1))].apply(lambda x: alpha_ff(x - current_factor_5["RF"], current_factor_5[["Mkt-RF", "SMB", "RMW", "CMA"]]), axis = 0)
    p_vals_factor_5 = current_portfolio.apply(lambda x: alpha_p_val(x - current_ff3["RF"], current_ff3[["Mkt-RF", "SMB", "HML"]]), axis = 0)

    results[i] = pd.concat([mean_rets, sharpe_ratios, alpha_ff3, p_vals_ff3, alpha_factor_4, p_vals_factor_4, alpha_factor_5, p_vals_factor_5], axis = 1)
    results[i].columns = ["Mean Return", "Sharpe Ratio", "Alpha 3 Factor Fama French", "P-Value of Alpha 3 Factor Fama French", "Alpha 4 Factor Fama French", "P-Value of Alpha 4 Factor Fama French", "Alpha 5 Factor Fama French", "P-Value of Alpha 5 Factor Fama French"]
    

In [None]:
print("Results for period starting in 1950 (5 factor data is only available from 1963):")
results[0]


Results for period starting in 1950 (5 factor data is only available from 1963):


Unnamed: 0,Mean Return,Sharpe Ratio,Alpha 3 Factor Fama French,P-Value of Alpha 3 Factor Fama French,Alpha 4 Factor Fama French,P-Value of Alpha 4 Factor Fama French,Alpha 5 Factor Fama French,P-Value of Alpha 5 Factor Fama French
EW Market Capitalization,-1.649533,-0.378802,-2.218466,0.1175752,-3.600854,0.1175752,0.847325,0.1175752
EW Book-to-Market,1.076058,-0.190996,-7.788765,4.583592e-09,-5.01396,4.583592e-09,-7.990233,4.583592e-09
EW Momentum,12.372484,0.448771,10.791959,8.291191e-07,-0.27166,8.291191e-07,9.747585,8.291191e-07
EW Beta,1.30136,-0.136736,-7.225341,7.548546e-05,-5.441252,7.548546e-05,-2.222862,7.548546e-05
EW Idiosyncratic Volatility,-1.484431,-0.383372,-6.867978,1.96482e-05,-6.222562,1.96482e-05,-7.312824,1.96482e-05
VW Market Capitalization,-1.845469,-0.412008,-2.860184,0.038463,-4.393855,0.038463,0.135803,0.038463
VW Book-to-Market,0.399906,-0.220531,-8.702763,8.946047e-12,-6.332006,8.946047e-12,-9.648713,8.946047e-12
VW Momentum,11.479354,0.3705,9.659394,4.742945e-05,-2.772649,4.742945e-05,6.380648,4.742945e-05
VW Beta,1.048601,-0.142631,-7.117604,0.0001709319,-5.872158,0.0001709319,-1.989581,0.0001709319
VW Idiosyncratic Volatility,-0.927675,-0.287514,-6.679166,0.0005292403,-5.296892,0.0005292403,-6.463775,0.0005292403


In [None]:
print("Results for period starting in 1990:")
results[1]


Results for period starting in 1990:


Unnamed: 0,Mean Return,Sharpe Ratio,Alpha 3 Factor Fama French,P-Value of Alpha 3 Factor Fama French,Alpha 4 Factor Fama French,P-Value of Alpha 4 Factor Fama French,Alpha 5 Factor Fama French,P-Value of Alpha 5 Factor Fama French
EW Market Capitalization,-1.015171,-0.219213,-1.793794,0.441654,-3.528773,0.441654,3.355298,0.441654
EW Book-to-Market,0.568448,-0.119504,-5.687184,0.011478,-3.191082,0.011478,-8.469701,0.011478
EW Momentum,11.258781,0.393157,11.573554,0.001639,2.332412,0.001639,12.131624,0.001639
EW Beta,6.083276,0.145722,-3.926945,0.201108,-1.974117,0.201108,1.615939,0.201108
EW Idiosyncratic Volatility,-1.840238,-0.273901,-5.532125,0.046099,-4.627262,0.046099,-5.849842,0.046099
VW Market Capitalization,-1.243559,-0.254322,-2.619118,0.228557,-4.408355,0.228557,2.348187,0.228557
VW Book-to-Market,-0.571623,-0.168535,-6.386812,0.002658,-4.178799,0.002658,-9.688912,0.002658
VW Momentum,7.917167,0.222223,8.066373,0.042887,-2.246946,0.042887,8.249234,0.042887
VW Beta,3.116203,0.01968,-6.477242,0.036846,-4.560241,0.036846,-0.316401,0.036846
VW Idiosyncratic Volatility,-2.182359,-0.240259,-6.902286,0.041843,-4.637361,0.041843,-6.665494,0.041843


In [100]:
print("Results for period starting in 2000:")
results[2]

Results for period starting in 2000:


Unnamed: 0,Mean Return,Sharpe Ratio,Alpha 3 Factor Fama French,P-Value of Alpha 3 Factor Fama French,Alpha 4 Factor Fama French,P-Value of Alpha 4 Factor Fama French,Alpha 5 Factor Fama French,P-Value of Alpha 5 Factor Fama French
EW Market Capitalization,-4.94056,-0.370075,-4.059576,0.166922,-5.125386,0.166922,2.403355,0.166922
EW Book-to-Market,2.679773,0.048824,-3.445735,0.223267,-1.782463,0.223267,-7.445943,0.223267
EW Momentum,7.829627,0.25512,9.136745,0.04722,3.093582,0.04722,8.572504,0.04722
EW Beta,-1.466027,-0.128419,-10.384252,0.003737,-8.621385,0.003737,-4.730704,0.003737
EW Idiosyncratic Volatility,1.91688,0.008467,-1.330588,0.702981,-0.987842,0.702981,-2.040561,0.702981
VW Market Capitalization,-4.316541,-0.370682,-4.239298,0.114443,-5.326561,0.114443,1.989654,0.114443
VW Book-to-Market,1.995662,0.011256,-4.216542,0.11334,-2.781081,0.11334,-8.449673,0.11334
VW Momentum,3.530168,0.069009,4.756349,0.337931,-1.862815,0.337931,3.907416,0.337931
VW Beta,-3.808925,-0.220169,-11.82823,0.002199,-10.110171,0.002199,-5.03016,0.002199
VW Idiosyncratic Volatility,1.742912,-0.00117,-2.514566,0.548329,-1.466825,0.548329,-1.984067,0.548329
