In [5]:
import pandas as pd
import statsmodels.api as sm
import statistics

# 从 CSV 文件中读取数据
data = pd.read_csv('fama_french_data.csv')

# 获取表头名称清单
header_list = data.columns.tolist()

# 第一阶段回归
# 提取第一阶段自变量
X_first_stage = data[['Mkt-RF','SMB','HML']]  # 自变量：Mkt-RF、SMB、HML

betas1 = []
betas2 = []
betas3 = []
ri_rfs = []
resid_var = []

for col in header_list[4:]:
    
    # 因变量：Agric
    y_first_stage = data[col]
    # 添加截距项
    X_first_stage = sm.add_constant(X_first_stage)

    # 创建并拟合第一阶段线性回归模型
    model_first_stage = sm.OLS(y_first_stage, X_first_stage).fit()
    
    print(model_first_stage.summary())
    
    # 获取 SSTO（总平方和）
    SSTO = model_first_stage.centered_tss

    # 获取 SSR（回归平方和）
    SSR = model_first_stage.ess

    # 获取 SSE（残差平方和）
    SSE = model_first_stage.ssr + model_first_stage.centered_tss

    # 获取 MSR（均方回归）
    MSR = model_first_stage.mse_model

    # 获取 MSE（均方误差）
    MSE = model_first_stage.mse_resid

    print("SSTO:", SSTO)
    print("SSR:", SSR)
    print("SSE:", SSE)
    print("MSR:", MSR)
    print("MSE:", MSE)

    # 获取第一阶段回归的残差
    betas1.append( model_first_stage.params[1])
    betas2.append( model_first_stage.params[2])
    betas3.append( model_first_stage.params[3])
    ri_rfs.append(statistics.mean(y_first_stage))
    resid_var.append(statistics.variance(model_first_stage.resid))
    


                            OLS Regression Results                            
Dep. Variable:                  Agric   R-squared:                       0.406
Model:                            OLS   Adj. R-squared:                  0.374
Method:                 Least Squares   F-statistic:                     12.77
Date:                Tue, 28 Nov 2023   Prob (F-statistic):           1.81e-06
Time:                        22:24:39   Log-Likelihood:                -184.27
No. Observations:                  60   AIC:                             376.5
Df Residuals:                      56   BIC:                             384.9
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0101      0.771     -0.013      0.9

                            OLS Regression Results                            
Dep. Variable:                  Autos   R-squared:                       0.728
Model:                            OLS   Adj. R-squared:                  0.714
Method:                 Least Squares   F-statistic:                     50.01
Date:                Tue, 28 Nov 2023   Prob (F-statistic):           7.49e-16
Time:                        22:24:39   Log-Likelihood:                -162.44
No. Observations:                  60   AIC:                             332.9
Df Residuals:                      56   BIC:                             341.3
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.4649      0.536     -0.867      0.3

                            OLS Regression Results                            
Dep. Variable:                  RlEst   R-squared:                       0.809
Model:                            OLS   Adj. R-squared:                  0.798
Method:                 Least Squares   F-statistic:                     78.82
Date:                Tue, 28 Nov 2023   Prob (F-statistic):           4.33e-20
Time:                        22:24:40   Log-Likelihood:                -140.25
No. Observations:                  60   AIC:                             288.5
Df Residuals:                      56   BIC:                             296.9
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.4074      0.370     -1.100      0.2

In [3]:
# 被解釋變數
y_second_stage = ri_rfs

# 解釋變數
X_second_stage = pd.DataFrame.from_dict({'betas1': betas1, 'betas2': betas1, 'betas3': betas1, 'resid': resid_var})

# 添加截距项
X_second_stage = sm.add_constant(X_second_stage)

# 创建并拟合第二阶段线性回归模型
model_second_stage = sm.OLS(y_second_stage, X_second_stage).fit()
print(model_second_stage.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.570
Model:                            OLS   Adj. R-squared:                  0.551
Method:                 Least Squares   F-statistic:                     29.85
Date:                Sun, 26 Nov 2023   Prob (F-statistic):           5.60e-09
Time:                        16:05:01   Log-Likelihood:                -32.237
No. Observations:                  48   AIC:                             70.47
Df Residuals:                      45   BIC:                             76.09
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.8769      0.220      8.536      0.0