In [88]:
import pandas as pd
import statsmodels.api as sm
import statistics

# 从 CSV 文件中读取数据
data = pd.read_csv('fama_french_data.csv')

# 获取表头名称清单
header_list = data.columns.tolist()

# 第一阶段回归
# 提取第一阶段自变量
X_first_stage = data[['Mkt-RF']]  # 自变量：Mkt-RF

betas = []
ri_rfs = []
resid_var = []

for col in header_list[4:]:
    
    # 因变量：Agric
    y_first_stage = data[col]
    # 添加截距项
    X_first_stage = sm.add_constant(X_first_stage)

    # 创建并拟合第一阶段线性回归模型
    model_first_stage = sm.OLS(y_first_stage, X_first_stage).fit()
    
    print(model_first_stage.summary())
    # 获取均方回归（MSR）
    print('Mean Square Regression:', model_first_stage.mse_model, '\n')
    # 获取均方误差（MSE）
    print('Mean Square Error:',model_first_stage.mse_resid, '\n')

    # 获取第一阶段回归的残差
    betas.append( model_first_stage.params[1])
    ri_rfs.append(statistics.mean(y_first_stage))
    resid_var.append(statistics.variance(model_first_stage.resid))
    


                            OLS Regression Results                            
Dep. Variable:                  Agric   R-squared:                       0.406
Model:                            OLS   Adj. R-squared:                  0.396
Method:                 Least Squares   F-statistic:                     39.65
Date:                Sun, 26 Nov 2023   Prob (F-statistic):           4.39e-08
Time:                        17:29:39   Log-Likelihood:                -184.28
No. Observations:                  60   AIC:                             372.6
Df Residuals:                      58   BIC:                             376.8
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0299      0.737     -0.041      0.9

                            OLS Regression Results                            
Dep. Variable:                  Gold    R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                 -0.014
Method:                 Least Squares   F-statistic:                    0.1621
Date:                Sun, 26 Nov 2023   Prob (F-statistic):              0.689
Time:                        17:29:39   Log-Likelihood:                -221.94
No. Observations:                  60   AIC:                             447.9
Df Residuals:                      58   BIC:                             452.1
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -1.3180      1.380     -0.955      0.3

In [59]:
# 被解釋變數
y_second_stage = ri_rfs

# 解釋變數
X_second_stage = pd.DataFrame.from_dict({'beta': betas, 'resid': resid_var})

# 添加截距项
X_second_stage = sm.add_constant(X_second_stage)

# 创建并拟合第二阶段线性回归模型
model_second_stage = sm.OLS(y_second_stage, X_second_stage).fit()
print(model_second_stage.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.566
Model:                            OLS   Adj. R-squared:                  0.547
Method:                 Least Squares   F-statistic:                     29.39
Date:                Sun, 26 Nov 2023   Prob (F-statistic):           6.83e-09
Time:                        16:09:56   Log-Likelihood:                -32.449
No. Observations:                  48   AIC:                             70.90
Df Residuals:                      45   BIC:                             76.51
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.8519      0.205      9.053      0.0

According to our first time-series regression result, we can see that almost each estimated beta coefficient statistically different from zero, except for Gold industry, which has p-value of 0.689. In terms of alpha, there are some industries like Fin, Meals, PerSv, Coal, Mines, Guns, Mach, Steel, Drugs, Beer, Food having alpha statistically different from 0, which means these industries could have other factors infecting their returns. 

In [62]:
import pandas as pd
from statsmodels.tsa.stattools import adfuller

# 从 CSV 文件中读取时间序列数据
data = pd.read_csv('fama_french_data.csv')

# 获取表头名称清单
header_list = data.columns.tolist()

for col in header_list:
    # 提取时间序列数据列（假设列名为 'Column_name'）
    time_series = data[col]

    # 执行 ADF 检验
    result = adfuller(time_series)

    # 打印 ADF 检验结果
    print(col)
    print('ADF Statistic:', result[0])
    print('p-value:', result[1])
    print('Critical Values:', result[4])

Date
ADF Statistic: 0.0052447960215658085
p-value: 0.9589636837611726
Critical Values: {'1%': -3.5745892596209488, '5%': -2.9239543084490744, '10%': -2.6000391840277777}
Mkt-RF
ADF Statistic: -9.524950944377833
p-value: 2.9951113094297337e-16
Critical Values: {'1%': -3.5463945337644063, '5%': -2.911939409384601, '10%': -2.5936515282964665}
SMB
ADF Statistic: -9.43071752469358
p-value: 5.199095858768456e-16
Critical Values: {'1%': -3.5463945337644063, '5%': -2.911939409384601, '10%': -2.5936515282964665}
HML
ADF Statistic: -6.1340812488949785
p-value: 8.267483067767465e-08
Critical Values: {'1%': -3.5463945337644063, '5%': -2.911939409384601, '10%': -2.5936515282964665}
Agric
ADF Statistic: -4.9672836764221975
p-value: 2.5751072712580397e-05
Critical Values: {'1%': -3.552928203580539, '5%': -2.9147306250000002, '10%': -2.595137155612245}
Food 
ADF Statistic: -9.651293575453984
p-value: 1.432425313792439e-16
Critical Values: {'1%': -3.5463945337644063, '5%': -2.911939409384601, '10%': -2