In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import pandas_datareader.data as web
from statsmodels.api import OLS, add_constant
from linearmodels.asset_pricing import LinearFactorModel

# Class of Fama-Macbeth Regression

In [2]:
class FM_regression:
    
    def __init__(self, start, end, ff_factor, ff_portfolio, 
                 df_factor  = None, df_portfolio = None,
                betas = None, lambdas = None):
        self.start = start
        self.end = end 
        self.ff_factor = ff_factor
        self.ff_portfolio = ff_portfolio
        
        self.df_portfolio = df_portfolio
        self.df_factor = df_factor
        self.betas = betas
        self.lambdas = lambdas
        

        
    # 데이터 처리 part   
    def factor_data(self):
        df_factor = web.DataReader(self.ff_factor, 'famafrench', start = self.start, end = self.end)[0]
        self.df_factor = df_factor

    def portfolio_data(self):
        df_portfolio = web.DataReader(self.ff_portfolio, 'famafrench', start= self.start, end= self.end)[0]
        self.df_portfolio = df_portfolio
        
    def data_preprocess(self):
        
        self.df_portfolio = self.df_portfolio.sub(self.df_factor.RF, axis=0) # port_data 개별 값들에 rf빼주기
        self.df_factor = self.df_factor.drop('RF', axis=1) # ff3f_data RF를 제외한 3개 팩터 변수만 남김
        

        
    
    # 파마맥베스 회귀 직접 구현part - step1, Cross-Sectional, step2, Time-Series
    def step1(self):
        '''First regress each of n asset returns against m 
        proposed risk factors 
        to determine each asset's beta exposures'''

        betas = []

        for industry in self.df_portfolio:
            endog = self.df_portfolio.loc[self.df_portfolio.index, industry] # 개별 포트폴리오 R_{i}
            exog = add_constant(self.df_factor) # intercept 추가

            step1 = OLS(
                endog, # 종속변수 
                exog # 독립변수
            ).fit()

            betas.append(step1.params.drop('const'))

        betas = pd.DataFrame(betas, 
                         columns = self.df_factor.columns, 
                         index = self.df_portfolio.columns)

        self.betas = betas
        
        
    def step2(self):
        '''regress all asset returns for each of T time periods 
        against the previously estimated betas 
        to determine the risk premium for each factor'''

        lambdas = []

        for period in self.df_portfolio.index: # time-series
            endog = self.df_portfolio.loc[period, self.betas.index]
            exog = self.betas

            step2 = OLS(endog, # 종속변수
                        exog # 독립변수
                        ).fit()
            lambdas.append(step2.params)

        lambdas = pd.DataFrame(lambdas, 
                               index = self.df_portfolio.index,
                               columns = self.betas.columns.tolist())

        self.lambdas = lambdas
        

        
    # 라이브러리 활용하여 파마맥베스 회귀 이후 estimation summary 출력(데이터만 있으면 바로 출력 가능)
    def linearmodel_estimation_summary(self):
        self.factor_data()
        self.portfolio_data()
        self.data_preprocess()
        
        mod = LinearFactorModel(portfolios = self.df_portfolio, 
                        factors = self.df_factor)
        res = mod.fit()
        print(res)

# 1. The Cross-Section of Expected Stock Returns - Fama-French 3
- Table V가 핵심적인 내용을 나타낸다
- Table V의 결론은 주식수익률의 결정에 있어서 사이즈 효과와 밸류 효과가 크게 존재함 </br>


### Data
- 1963-7 ~ 1990-12 기간의  NYSE, AMEX, NASDAQ 주식
- 3Factor(Mkt-Rf, SMB, HML)
- 10x10 portfolio
    - Value Factor(BE/ME)와 Size Factor(ME)를 각각 10분위 별로 쪼개서 결합한 100개의 포트폴리오

In [3]:
# init class

start = '1963-7'
end = '1990-12'
ff_factor = 'F-F_Research_Data_Factors'
ff_portfolio = '100_Portfolios_10x10'

ff3f = FM_regression(start, end, ff_factor, ff_portfolio)
ff3f.linearmodel_estimation_summary()

                      LinearFactorModel Estimation Summary                      
No. Test Portfolios:                100   R-squared:                      0.6140
No. Factors:                          3   J-statistic:                    230.88
No. Observations:                   330   P-value                         0.0000
Date:                  Sun, Mar 12 2023   Distribution:                 chi2(97)
Time:                          23:05:23                                         
Cov. Estimator:                  robust                                         
                                                                                
                            Risk Premia Estimates                             
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Mkt-RF        -0.0608     0.3516    -0.1730     0.8627     -0.7499      0.6283
SMB            0.7581     0.3480    

SMB가 가장 유의함을 나타냄

In [4]:
ff3f.df_factor

Unnamed: 0_level_0,Mkt-RF,SMB,HML
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1963-07,-0.39,-0.45,-0.97
1963-08,5.07,-0.98,1.80
1963-09,-1.57,-0.33,0.13
1963-10,2.53,-0.58,-0.10
1963-11,-0.85,-1.17,1.75
...,...,...,...
1990-08,-10.15,-3.57,1.64
1990-09,-6.12,-3.65,0.64
1990-10,-1.92,-5.57,0.10
1990-11,6.35,0.43,-3.10


In [6]:
import numpy as np

In [9]:
ff3f.df_portfolio

Unnamed: 0_level_0,SMALL LoBM,ME1 BM2,ME1 BM3,ME1 BM4,ME1 BM5,ME1 BM6,ME1 BM7,ME1 BM8,ME1 BM9,SMALL HiBM,...,BIG LoBM,ME10 BM2,ME10 BM3,ME10 BM4,ME10 BM5,ME10 BM6,ME10 BM7,ME10 BM8,ME10 BM9,BIG HiBM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1963-07,0.7676,0.4988,0.8259,-0.4229,1.9449,-0.0716,-0.9791,-1.2906,-1.6425,-1.2448,...,-1.0316,1.2858,-0.1063,1.4288,1.6740,-0.1070,1.6977,-4.5817,-0.9258,-4.7683
1963-08,-1.4951,6.3199,3.7857,-2.3966,0.6833,0.9642,1.3437,3.1524,4.4905,4.9688,...,5.6329,4.9805,4.2377,3.3615,3.7365,7.1617,5.5308,12.3808,5.6909,5.4746
1963-09,3.2439,-6.5473,1.2961,2.2687,1.0818,-2.2111,0.7188,-2.7114,-3.1879,-0.4698,...,-0.7749,-2.6075,0.4141,-2.8082,-1.1668,-2.5306,-1.2146,4.0055,-3.7642,-9.6126
1963-10,-1.8578,8.1883,1.8220,-0.7995,2.8689,-2.3727,0.4168,1.8812,0.2629,0.5660,...,3.6555,8.1143,1.7244,1.6726,-1.4348,1.4739,-1.4544,4.3293,0.1871,0.4734
1963-11,0.7776,-7.8564,-2.9200,-4.2964,-2.2519,-1.8455,-2.1602,-1.5164,-1.6357,-2.0119,...,0.0076,-4.1293,2.3904,-1.4299,-2.9912,-0.1636,-2.6737,-3.1100,-0.8676,6.0936
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1990-08,-16.5997,-17.5911,-16.0006,-13.7750,-14.0735,-11.1462,-12.5793,-10.9100,-13.6711,-11.4104,...,-8.2423,-10.8032,-12.9681,-8.3523,-6.9849,-7.3521,-8.8408,-10.8851,-11.6252,-13.7967
1990-09,-10.4458,-10.7368,-8.3102,-8.5041,-9.2548,-7.2464,-8.9067,-8.9899,-6.8975,-8.6557,...,-6.2000,-4.4871,-8.5846,-2.8036,-0.9678,-0.5998,-8.3180,-4.8246,-9.6511,-9.4050
1990-10,-7.0201,-7.0128,-6.8052,-8.7560,-5.2644,-9.0429,-6.6941,-5.3189,-6.9799,-8.2145,...,1.3874,2.0442,-0.6706,-3.0884,0.1230,-3.6671,-1.2062,6.0674,0.1757,0.6993
1990-11,-1.0733,2.8010,2.5216,3.9653,2.7531,2.9678,0.2807,2.0265,1.7369,-0.1993,...,6.3188,2.8863,7.2402,4.9789,3.9043,5.4668,4.7277,4.0207,4.1600,0.7905


# 2. Other cases?

In [6]:
# init class

start = '1963-7'
end = '1976-12'
ff_factor = 'F-F_Research_Data_Factors'
ff_portfolio = '100_Portfolios_10x10'

ff3f = FM_regression(start, end, ff_factor, ff_portfolio)
ff3f.linearmodel_estimation_summary()

                      LinearFactorModel Estimation Summary                      
No. Test Portfolios:                100   R-squared:                      0.5530
No. Factors:                          3   J-statistic:                    344.43
No. Observations:                   162   P-value                         0.0000
Date:                  Sat, Mar 11 2023   Distribution:                 chi2(97)
Time:                          12:19:36                                         
Cov. Estimator:                  robust                                         
                                                                                
                            Risk Premia Estimates                             
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Mkt-RF         0.0260     0.8905     0.0292     0.9767     -1.7192      1.7713
SMB            0.1197     1.0250    

In [7]:
# init class

start = '1977-7'
end = '1990-12'
ff_factor = 'F-F_Research_Data_Factors'
ff_portfolio = '100_Portfolios_10x10'

ff3f = FM_regression(start, end, ff_factor, ff_portfolio)
ff3f.linearmodel_estimation_summary()

                      LinearFactorModel Estimation Summary                      
No. Test Portfolios:                100   R-squared:                      0.7351
No. Factors:                          3   J-statistic:                    608.82
No. Observations:                   162   P-value                         0.0000
Date:                  Sat, Mar 11 2023   Distribution:                 chi2(97)
Time:                          12:19:40                                         
Cov. Estimator:                  robust                                         
                                                                                
                            Risk Premia Estimates                             
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Mkt-RF         0.1048     0.4088     0.2564     0.7976     -0.6963      0.9060
SMB            0.9206     0.3171    

In [8]:
# init class

start = '1977-6'
end = '1990-12'
ff_factor = 'F-F_Research_Data_5_Factors_2x3'
ff_portfolio = '6_Portfolios_2x3'

ff3f = FM_regression(start, end, ff_factor, ff_portfolio)
ff3f.linearmodel_estimation_summary()

                      LinearFactorModel Estimation Summary                      
No. Test Portfolios:                  6   R-squared:                      0.9839
No. Factors:                          5   J-statistic:                    0.0560
No. Observations:                   163   P-value                         0.8129
Date:                  Sat, Mar 11 2023   Distribution:                  chi2(1)
Time:                          12:19:43                                         
Cov. Estimator:                  robust                                         
                                                                                
                            Risk Premia Estimates                             
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Mkt-RF         0.5494     0.3840     1.4307     0.1525     -0.2032      1.3021
SMB            0.1860     0.2022    