In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from datetime import datetime

In [2]:
crsp_data = pd.read_csv("data/cleaned_crsp.csv")
crsp_data['date'] = pd.to_datetime(crsp_data['date'])
crsp_data['RET'] = crsp_data['RET'].str.replace('C', '')
crsp_data['RET'] = pd.to_numeric(crsp_data['RET'], errors='coerce')

In [3]:
# Calculate market value of equity (ME) for each stock
# crsp_data['mkt_cap'] = np.abs(crsp_data['PRC']) * crsp_data['SHROUT']

start_date = '1926-01-01'
end_date = '2020-12-31'

# get cumulative returns
def get_cum(stock, start_date):
    cum_ret = []
    stock_yr = pd.date_range(start_date, periods=2020-1926+1, freq="Y").dt.year
    for y in len(stock_yr):
        ind = stock['date'].startswith(stock_yr[y])
        cum_ret.append(sum(stock.loc[ind]['RET']))
    stock['cum_ret'] = cum_ret
    return cum_ret


# Define a function to assign deciles based on market cap
def assign_deciles(data):
    data['decile'] = pd.qcut(data['cum_ret'], 10, labels=False) + 1
    return data

# Apply the function to the data
crsp_data = crsp_data.groupby('date').apply(assign_deciles).reset_index(drop=True)

# get equal- and value-weighted portfolios
def calculate_portfolio_returns(data):
    ew_ret = data['RET'].mean()
    vw_ret = np.average(data['RET'], weights=data['cum_ret'])
    return pd.Series({'ew_ret': ew_ret, 'vw_ret': vw_ret})

# Group the data by date and decile and calculate the returns for each group
portfolio_returns = crsp_data.groupby(['date', 'decile']).apply(calculate_portfolio_returns).reset_index()

# Pivot the data to get a wide format with deciles as columns
ew_returns = portfolio_returns.pivot_table(values='ew_ret', index='date', columns='decile')
vw_returns = portfolio_returns.pivot_table(values='vw_ret', index='date', columns='decile')


In [4]:
# Calculate mean returns for each decile
mean_ew_returns = ew_returns.mean()
mean_vw_returns = vw_returns.mean()

# Check if the returns are monotonic
is_monotonic_ew = mean_ew_returns.is_monotonic_decreasing
is_monotonic_vw = mean_vw_returns.is_monotonic_decreasing

print("Mean equal-weighted returns:")
print(mean_ew_returns)
print("Is monotonic:", is_monotonic_ew)
print("\nMean value-weighted returns:")
print(mean_vw_returns)
print("Is monotonic:", is_monotonic_vw)

Mean equal-weighted returns:
decile
1.0    -0.004884
2.0     0.011973
3.0     0.013357
4.0     0.015108
5.0     0.017202
6.0     0.017586
7.0     0.017644
8.0     0.017627
9.0     0.016295
10.0    0.014873
dtype: float64
Is monotonic: False

Mean value-weighted returns:
decile
1.0     0.005202
2.0     0.014845
3.0     0.017260
4.0     0.018287
5.0     0.019333
6.0     0.018718
7.0     0.016182
8.0     0.015942
9.0     0.014830
10.0    0.012607
dtype: float64
Is monotonic: False


In [5]:
ew_smb = ew_returns[1] - ew_returns[10]
vw_smb = vw_returns[1] - vw_returns[10]

# Calculate mean returns
mean_ew_smb = ew_smb.mean()
mean_vw_smb = vw_smb.mean()

# Calculate volatility
vol_ew_smb = ew_smb.std()
vol_vw_smb = vw_smb.std()

# Calculate Sharpe ratio (assuming a risk-free rate of 0)
sharpe_ew_smb = mean_ew_smb / vol_ew_smb
sharpe_vw_smb = mean_vw_smb / vol_vw_smb

print("Equal-weighted SMB portfolio:")
print(f"Mean: {mean_ew_smb:.6f}")
print(f"Volatility: {vol_ew_smb:.6f}")
print(f"Sharpe Ratio: {sharpe_ew_smb:.6f}")

print("\nValue-weighted SMB portfolio:")
print(f"Mean: {mean_vw_smb:.6f}")
print(f"Volatility: {vol_vw_smb:.6f}")
print(f"Sharpe Ratio: {sharpe_vw_smb:.6f}")

Equal-weighted SMB portfolio:
Mean: -0.019758
Volatility: 0.088860
Sharpe Ratio: -0.222348

Value-weighted SMB portfolio:
Mean: -0.004710
Volatility: 0.097155
Sharpe Ratio: -0.048481


In [6]:
import pandas_datareader as pdr

start_date = '1926-01-01'
end_date = '2020-12-31'

# Download Fama-French 3-factor data
ff3_factors = pdr.get_data_famafrench('F-F_Research_Data_Factors', start=start_date, end=end_date)[0]
ff3_factors = ff3_factors / 100  # Convert to decimal
ff3_factors.index = ff3_factors.index.to_timestamp('M')  # Convert index to monthly-end dates

# FF5 - FIX DATA SOURCE
ff5_factors = pdr.get_data_famafrench('F-F_Research_Data_5_Factors_2x3', start=start_date, end=end_date)[0]
ff5_factors = ff5_factors / 100  # Convert to decimal
ff5_factors.index = ff5_factors.index.to_timestamp('M')  # Convert index to monthly-end dates

In [7]:
ff5_factors

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RMW,CMA,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1963-07-31,-0.0039,-0.0041,-0.0097,0.0068,-0.0118,0.0027
1963-08-31,0.0507,-0.0080,0.0180,0.0036,-0.0035,0.0025
1963-09-30,-0.0157,-0.0052,0.0013,-0.0071,0.0029,0.0027
1963-10-31,0.0253,-0.0139,-0.0010,0.0280,-0.0201,0.0029
1963-11-30,-0.0085,-0.0088,0.0175,-0.0051,0.0224,0.0027
...,...,...,...,...,...,...
2020-08-31,0.0763,-0.0089,-0.0296,0.0426,-0.0120,0.0001
2020-09-30,-0.0363,0.0001,-0.0268,-0.0139,-0.0189,0.0001
2020-10-31,-0.0210,0.0464,0.0422,-0.0075,-0.0076,0.0001
2020-11-30,0.1247,0.0712,0.0213,-0.0220,0.0137,0.0001


In [3]:
pdr.get_data_famafrench('F-F_Research_Data_Factors', start=start_date, end=end_date)

{0:          Mkt-RF   SMB   HML    RF
 Date                             
 1926-07    2.96 -2.56 -2.43  0.22
 1926-08    2.64 -1.17  3.82  0.25
 1926-09    0.36 -1.40  0.13  0.23
 1926-10   -3.24 -0.09  0.70  0.32
 1926-11    2.53 -0.10 -0.51  0.31
 ...         ...   ...   ...   ...
 2020-08    7.63 -0.22 -2.96  0.01
 2020-09   -3.63  0.04 -2.68  0.01
 2020-10   -2.10  4.37  4.22  0.01
 2020-11   12.47  5.81  2.13  0.01
 2020-12    4.63  4.89 -1.50  0.01
 
 [1134 rows x 4 columns],
 1:       Mkt-RF    SMB    HML    RF
 Date                            
 1927   29.47  -2.04  -4.54  3.12
 1928   35.39   4.51  -6.17  3.56
 1929  -19.54 -30.70  11.67  4.75
 1930  -31.23  -5.17 -11.54  2.41
 1931  -45.11   3.70 -13.95  1.07
 ...      ...    ...    ...   ...
 2016   13.30   6.66  22.75  0.20
 2017   21.51  -4.97 -13.51  0.80
 2018   -6.95  -3.21  -9.73  1.83
 2019   28.28  -6.11 -10.34  2.15
 2020   23.66  13.18 -46.56  0.45
 
 [94 rows x 4 columns],
 'DESCR': 'F-F Research Data Factors\n-----

In [7]:
# necessary? if so - fix
def calculate_vw_returns(data):
    data['mkt_cap'] = data['PRC'] * data['SHROUT']
    data['wgt_ret'] = data['RET'] * data['mkt_cap']
    total_mkt_cap = data['mkt_cap'].sum()
    vw_ret = data['wgt_ret'].sum() / total_mkt_cap
    return vw_ret

vw_returns = crsp_data.groupby(['date', 'decile']).apply(calculate_vw_returns).reset_index()
vw_returns = vw_returns.pivot_table(values=0, index='date', columns='decile')

In [8]:
def estimate_models(returns, factors, factors5):
    # Add a constant to the factors for regression
    factors = sm.add_constant(factors)

    # Estimate the CAPM model
    capm_model = sm.OLS(returns, factors[['const', 'Mkt-RF']]).fit()

    # Estimate the FF3 model
    ff3_model = sm.OLS(returns, factors).fit()

    # estimate FF5
    ff5_model = sm.OLS(returns, factors5).fit()

    return capm_model.params, ff3_model.params, ff5_model.params

# Merge the factor data with the portfolio returns
# Add a constant column to the returns DataFrames
ew_returns['const'] = 1
vw_returns['const'] = 1

# Merge the factor data with the portfolio returns
# ew_returns = ew_returns.merge(ff3_factors, left_index=True, right_index=True, suffixes=('', '_y'))
# vw_returns = vw_returns.merge(ff3_factors, left_index=True, right_index=True, suffixes=('', '_y'))

# Merge the ff5 data with the portfolio returns
ew_returns = ew_returns.merge(ff5_factors, left_index=True, right_index=True, suffixes=('', '_y'))
vw_returns = vw_returns.merge(ff5_factors, left_index=True, right_index=True, suffixes=('', '_y'))


# Calculate the CAPM and FF3 model parameters for each decile
ew_results = pd.DataFrame()
vw_results = pd.DataFrame()

for decile in range(1, 11):
    ew_capm_params, ew_ff3_params = estimate_models(ew_returns[decile], ew_returns[['const', 'Mkt-RF', 'SMB', 'HML']], ff5_factors)
    vw_capm_params, vw_ff3_params = estimate_models(vw_returns[decile], vw_returns[['const', 'Mkt-RF', 'SMB', 'HML']], ff5_factors)

    ew_results = pd.concat([ew_results, pd.concat([ew_capm_params, ew_ff3_params], keys=['CAPM', 'FF3', 'FF5'])], axis=1)
    vw_results = pd.concat([vw_results, pd.concat([vw_capm_params, vw_ff3_params], keys=['CAPM', 'FF3', 'FF5'])], axis=1)

ew_results.columns = range(1, 11)
vw_results.columns = range(1, 11)


print("Equal-weighted portfolio results:")
print(ew_results)

print("\nValue-weighted portfolio results:")
print(vw_results)


Equal-weighted portfolio results:
                   1         2         3         4         5         6    
CAPM const  -0.015925  0.001009  0.003584  0.005511  0.007938  0.008830  \
     Mkt-RF  1.656576  1.599239  1.456843  1.432393  1.399146  1.340762   
FF3  const  -0.019065 -0.001587  0.001684  0.003979  0.006661  0.007767   
     Mkt-RF  1.134937  1.152042  1.108285  1.135423  1.140081  1.122068   
     SMB     1.621889  1.440500  1.187582  1.058033  0.954193  0.813348   
     HML     0.967517  0.750562  0.482891  0.338609  0.246224  0.195488   

                   7         8         9         10  
CAPM const   0.009166  0.009423  0.008992  0.008153  
     Mkt-RF  1.276111  1.212673  1.138387  0.989791  
FF3  const   0.008404  0.008890  0.008614  0.008164  
     Mkt-RF  1.112478  1.091643  1.063974  0.993081  
     SMB     0.626072  0.478973  0.267695 -0.014604  
     HML     0.118697  0.062730  0.080782  0.000789  

Value-weighted portfolio results:
                   1       

In [9]:
ew_results

Unnamed: 0,Unnamed: 1,1,2,3,4,5,6,7,8,9,10
CAPM,const,-0.015925,0.001009,0.003584,0.005511,0.007938,0.00883,0.009166,0.009423,0.008992,0.008153
CAPM,Mkt-RF,1.656576,1.599239,1.456843,1.432393,1.399146,1.340762,1.276111,1.212673,1.138387,0.989791
FF3,const,-0.019065,-0.001587,0.001684,0.003979,0.006661,0.007767,0.008404,0.00889,0.008614,0.008164
FF3,Mkt-RF,1.134937,1.152042,1.108285,1.135423,1.140081,1.122068,1.112478,1.091643,1.063974,0.993081
FF3,SMB,1.621889,1.4405,1.187582,1.058033,0.954193,0.813348,0.626072,0.478973,0.267695,-0.014604
FF3,HML,0.967517,0.750562,0.482891,0.338609,0.246224,0.195488,0.118697,0.06273,0.080782,0.000789


In [10]:
vw_results

Unnamed: 0,Unnamed: 1,1,2,3,4,5,6,7,8,9,10
CAPM,const,-0.00981,0.001199,0.003483,0.005589,0.007891,0.008733,0.009095,0.009279,0.008734,0.008034
CAPM,Mkt-RF,1.628531,1.589552,1.447115,1.425369,1.392176,1.327447,1.267836,1.203432,1.125723,0.93402
FF3,const,-0.01275,-0.001376,0.001603,0.004058,0.006621,0.007703,0.008348,0.008767,0.00837,0.008203
FF3,Mkt-RF,1.131423,1.146148,1.102706,1.129894,1.135195,1.11432,1.107443,1.087439,1.055901,0.968997
FF3,SMB,1.573197,1.427876,1.172379,1.049061,0.944606,0.796081,0.614235,0.459134,0.246864,-0.130755
FF3,HML,0.878563,0.744832,0.478828,0.342646,0.247255,0.185094,0.115457,0.059971,0.082598,-0.030205


In [11]:
# Set the date ranges
post_ff_paper_start = '1993-01-01'
post_ff_paper_end = '2001-12-31'
post_dotcom_start = '2002-01-01'

# Create the subsets
ew_returns_post_ff = ew_returns.loc[(ew_returns.index >= post_ff_paper_start) & (ew_returns.index <= post_ff_paper_end)]
vw_returns_post_ff = vw_returns.loc[(vw_returns.index >= post_ff_paper_start) & (vw_returns.index <= post_ff_paper_end)]

ew_returns_post_dotcom = ew_returns.loc[ew_returns.index >= post_dotcom_start]
vw_returns_post_dotcom = vw_returns.loc[vw_returns.index >= post_dotcom_start]



In [16]:
def calculate_statistics(returns):
    mean = returns.mean()
    volatility = returns.std()
    sharpe_ratio = mean / volatility
    return mean, volatility, sharpe_ratio

# Post Fama French 1992 paper
ew_mean_post_ff, ew_vol_post_ff, ew_sharpe_post_ff = calculate_statistics(ew_returns_post_ff.iloc[:, -1] - ew_returns_post_ff.iloc[:, 0])
vw_mean_post_ff, vw_vol_post_ff, vw_sharpe_post_ff = calculate_statistics(vw_returns_post_ff.iloc[:, -1] - vw_returns_post_ff.iloc[:, 0])

# Post Dot-Com Bubble
ew_mean_post_dotcom, ew_vol_post_dotcom, ew_sharpe_post_dotcom = calculate_statistics(ew_returns_post_dotcom.iloc[:, -1] - ew_returns_post_dotcom.iloc[:, 0])
vw_mean_post_dotcom, vw_vol_post_dotcom, vw_sharpe_post_dotcom = calculate_statistics(vw_returns_post_dotcom.iloc[:, -1] - vw_returns_post_dotcom.iloc[:, 0])


In [17]:
print("Post Fama French 1992 paper:")
print(f"Equal-weighted SMB portfolio - Mean: {ew_mean_post_ff}, Volatility: {ew_vol_post_ff}, Sharpe Ratio: {ew_sharpe_post_ff}")
print(f"Value-weighted SMB portfolio - Mean: {vw_mean_post_ff}, Volatility: {vw_vol_post_ff}, Sharpe Ratio: {vw_sharpe_post_ff}")

print("\nPost Dot-Com Bubble:")
print(f"Equal-weighted SMB portfolio - Mean: {ew_mean_post_dotcom}, Volatility: {ew_vol_post_dotcom}, Sharpe Ratio: {ew_sharpe_post_dotcom}")
print(f"Value-weighted SMB portfolio - Mean: {vw_mean_post_dotcom}, Volatility: {vw_vol_post_dotcom}, Sharpe Ratio: {vw_sharpe_post_dotcom}")


Post Fama French 1992 paper:
Equal-weighted SMB portfolio - Mean: 0.028723789029534253, Volatility: 0.09651448361997186, Sharpe Ratio: 0.2976111766046936
Value-weighted SMB portfolio - Mean: 0.015741403229977663, Volatility: 0.08941032442062882, Sharpe Ratio: 0.17605800372586272

Post Dot-Com Bubble:
Equal-weighted SMB portfolio - Mean: 0.02286196226482209, Volatility: 0.08119419005368132, Sharpe Ratio: 0.28157140615242254
Value-weighted SMB portfolio - Mean: 0.012345695771850811, Volatility: 0.07930922612602084, Sharpe Ratio: 0.1556653163181007
