# 广义矩估计练习

In [1]:
import wooldridge as woo
import numpy as np

In [2]:
mroz = woo.dataWoo('mroz')
mroz = mroz.dropna(subset=['lwage']) # restrict to non-missing wage observations
mroz.head()

Unnamed: 0,inlf,hours,kidslt6,kidsge6,age,educ,wage,repwage,hushrs,husage,...,faminc,mtr,motheduc,fatheduc,unem,city,exper,nwifeinc,lwage,expersq
0,1,1610,1,0,32,12,3.354,2.65,2708,34,...,16310.0,0.7215,12,7,5.0,0,14,10.91006,1.210154,196
1,1,1656,0,2,30,12,1.3889,2.65,2310,30,...,21800.0,0.6615,7,7,11.0,1,5,19.499981,0.328512,25
2,1,1980,1,3,35,12,4.5455,4.04,3072,40,...,21040.0,0.6915,12,7,5.0,0,15,12.03991,1.514138,225
3,1,456,0,3,34,12,1.0965,3.25,1920,53,...,7300.0,0.7815,7,7,5.0,0,6,6.799996,0.092123,36
4,1,1568,1,2,31,14,4.5918,3.6,2000,32,...,27300.0,0.6215,12,14,9.5,1,7,20.100058,1.524272,49


### 求解模型：$ln(wage) = \beta_0 + \beta_1 \cdot educ + \beta_2 \cdot exper  + \beta_3 \cdot expersq  + \epsilon$
- 其中，$educ$为内生变量，其工具变量为 $constant$, $exper$, $expersq$, $motheduc$, $fatheduc$

#### 两阶段最小二乘：

##### 第1阶段：用工具变量对内生变量回归，得到内生变量的拟合值
- 模型：$educ = \beta_0 + \beta_1 \cdot exper  + \beta_2 \cdot expersq  + \beta_3 \cdot motheduc + \beta_4 \cdot fatheduc + \epsilon$
- 结果：$\hat{educ} = 9.1026 + 0.0452 \cdot exper -0.0010 \cdot expersq +  0.1576 \cdot motheduc + 0.1895 \cdot fatheduc$

##### 第2阶段：将内生变量的拟合值和其他非内生变量相结合X，然后用y对其回归
- 模型：$ln(wage) = \beta_0 + \beta_1 \cdot \hat{educ} + \beta_2 \cdot exper  + \beta_3 \cdot expersq  + \epsilon$
- 结果：$\hat{ln(wage)} = 0.0481 +  0.0614 \cdot \hat{educ} + 0.0442 \cdot exper - 0.0009 \cdot expersq $

In [3]:
print(f'=======第一阶段结果=======')
y = mroz['educ']
x1 = np.ones(y.shape)
x2 = mroz['exper']
x3 = mroz['expersq']
x4 = mroz['motheduc']
x5 = mroz['fatheduc']
X_1st = np.c_[x1,x2,x3,x4,x5]
beta_1st = np.linalg.inv(X_1st.T @ X_1st) @ X_1st.T @ y

print(f'cons_1st: {beta_1st[0]:.4f}')
print(f'exper_1st: {beta_1st[1]:.4f}')
print(f'expersq_1st: {beta_1st[2]:.4f}')
print(f'motheduc_1st: {beta_1st[3]:.4f}')
print(f'fatheduc_1st: {beta_1st[4]:.4f}')

print(f'=======第二阶段结果=======')
educ_hat = X_1st @ beta_1st
ln_wage = np.log(mroz['wage'])
cons = np.ones(ln_wage.shape)
exper = mroz['exper']
expersq = mroz['expersq']
X_2ed = np.c_[cons,educ_hat,exper,expersq]
beta_2ed = np.linalg.inv(X_2ed.T @ X_2ed) @ X_2ed.T @ ln_wage
ln_wage_hat = X_2ed @ beta_2ed

print(f'cons_2nd: {beta_2ed[0]:.4f}')
print(f'educ_2nd: {beta_2ed[1]:.4f}')
print(f'exper_2nd: {beta_2ed[2]:.4f}')
print(f'expersq_2nd: {beta_2ed[3]:.4f}')

cons_1st: 9.1026
exper_1st: 0.0452
expersq_1st: -0.0010
motheduc_1st: 0.1576
fatheduc_1st: 0.1895
cons_2nd: 0.0481
educ_2nd: 0.0614
exper_2nd: 0.0442
expersq_2nd: -0.0009


#### 两阶段最小二乘：

##### 直接公式：$ \beta = (X'Z(Z'Z)^{-1}Z'X)^{-1} X'Z(Z'Z)^{-1}Z'y$
- 模型：$ln(wage) = \beta_0 + \beta_1 \cdot educ + \beta_2 \cdot exper  + \beta_3 \cdot expersq  + \epsilon$
- 结果：$\hat{ln(wage)} = 0.0481 +  0.0614 \cdot educ + 0.0442 \cdot exper - 0.0009 \cdot expersq $


In [4]:
X = np.c_[cons,mroz['educ'],exper,expersq]
Z = np.c_[cons,mroz['exper'], mroz['expersq'], mroz['motheduc'], mroz['fatheduc']]

temp1 = np.linalg.inv(Z.T @ Z)
beta = np.linalg.inv(X.T @ Z @ temp1 @ Z.T @ X) @ X.T @ Z @ temp1 @ Z.T @ ln_wage

print(f'cons: {beta[0]:.4f}')
print(f'educ: {beta[1]:.4f}')
print(f'exper: {beta[2]:.4f}')
print(f'expersq: {beta[3]:.4f}')

cons: 0.0481
educ: 0.0614
exper: 0.0442
expersq: -0.0009


#### 广义矩估计：$\hat{\beta}_{gmm} = (X' Z \Phi Z' X)^{-1} X' Z \Phi Z' y$

##### 任务1：最优权重矩阵为单位阵 $\Phi = I$
- 模型：$ln(wage) = \beta_0 + \beta_1 \cdot educ + \beta_2 \cdot exper  + \beta_3 \cdot expersq  + \epsilon$
- 结果：$\hat{ln(wage)} = -0.9703 +  0.1285 \cdot educ + 0.0639 \cdot exper - 0.0014 \cdot expersq $

In [5]:
X = np.c_[cons, mroz['educ'], exper, expersq]
Z = np.c_[cons, mroz['exper'], mroz['expersq'], mroz['motheduc'], mroz['fatheduc']]
Phi_1 = np.eye(Z.shape[1])

beta_gmm1 = np.linalg.inv(X.T @ Z @ Phi_1 @ Z.T @ X) @ X.T @ Z @ Phi_1 @ Z.T @ ln_wage
print(f'cons: {beta_gmm1[0]:.4f}')
print(f'educ: {beta_gmm1[1]:.4f}')
print(f'exper: {beta_gmm1[2]:.4f}')
print(f'expersq: {beta_gmm1[3]:.4f}')

cons: -0.9703
educ: 0.1285
exper: 0.0639
expersq: -0.0014


#### 广义矩估计：$\hat{\beta}_{gmm} = (X' Z \Phi Z' X)^{-1} X' Z \Phi Z' y$

##### 任务2：最优权重矩阵为单位阵 $\Phi = \frac{1}{\sigma^2}(Z'Z)^{-1}$
- 模型：$ln(wage) = \beta_0 + \beta_1 \cdot educ + \beta_2 \cdot exper  + \beta_3 \cdot expersq  + \epsilon$
- 结果：$\hat{ln(wage)} = 0.0481 +  0.0614 \cdot educ + 0.0442 \cdot exper - 0.0009 \cdot expersq $


In [6]:
X = np.c_[cons, mroz['educ'], exper, expersq]
Z = np.c_[cons, mroz['exper'], mroz['expersq'], mroz['motheduc'], mroz['fatheduc']]
Phi_2 = np.linalg.inv(Z.T @ Z)

beta_gmm2 = np.linalg.inv(X.T @ Z @ Phi_2 @ Z.T @ X) @ X.T @ Z @ Phi_2 @ Z.T @ ln_wage

print(f'cons: {beta_gmm2[0]:.4f}')
print(f'educ: {beta_gmm2[1]:.4f}')
print(f'exper: {beta_gmm2[2]:.4f}')
print(f'expersq: {beta_gmm2[3]:.4f}')

cons: 0.0481
educ: 0.0614
exper: 0.0442
expersq: -0.0009


#### 广义矩估计：$\hat{\beta}_{gmm} = (X' Z \Phi Z' X)^{-1} X' Z \Phi Z' y$

##### 任务3：最优权重矩阵为单位阵 $\Phi = (Z'\hat{\epsilon}^{(1)} \hat{\epsilon}^{(1)'}Z)^{-1}$，其中 $\hat{\epsilon}^{(1)}$ 在 $\Phi = (Z'Z)^{-1}$下估计得到
- 模型：$ln(wage) = \beta_0 + \beta_1 \cdot educ + \beta_2 \cdot exper  + \beta_3 \cdot expersq  + \epsilon$
- 结果：：$\hat{ln(wage)} = 0.0477 +  0.0611 \cdot educ + 0.0451 \cdot exper - 0.0009 \cdot expersq $

In [7]:
u_1st = ln_wage - X @ beta_gmm2
u_1st = np.array(u_1st)
u_1st_reshaped = u_1st[:, np.newaxis]  # 添加一个新的轴，使其成为 (428, 1)

X = np.c_[cons, mroz['educ'], exper, expersq]
Z = np.c_[cons, mroz['exper'], mroz['expersq'], mroz['motheduc'], mroz['fatheduc']]

Z_new= Z*u_1st_reshaped
Phi_3 = np.linalg.inv(Z_new.T @ Z_new / len(ln_wage) )
# Sigma_hat = np.diagflat(u_1st_reshaped ** 2) / len(ln_wage)  # 确保 Sigma_hat 是 (428, 428)
# Phi_3 = np.linalg.inv(Z.T @ Sigma_hat @ Z)  # 计算最优权重矩阵

beta_gmm3 = np.linalg.inv(X.T @ Z @ Phi_3 @ Z.T @ X) @ X.T @ Z @ Phi_3 @ Z.T @ ln_wage

print(f'cons: {beta_gmm3[0]:.4f}')
print(f'educ: {beta_gmm3[1]:.4f}')
print(f'exper: {beta_gmm3[2]:.4f}')
print(f'expersq: {beta_gmm3[3]:.4f}')

cons: 0.0477
educ: 0.0611
exper: 0.0451
expersq: -0.0009


In [8]:
import scipy.stats as stats

u_gmm = ln_wage - X @ beta_gmm3 # 计算 GMM 残差
g_hat = (Z.T @ u_gmm) / len(ln_wage)  # 计算 g(beta) 的均值，形状为 (m, 1)

Sigma_hat = np.diagflat(u_1st_reshaped ** 2)/ len(ln_wage) # 计算误差协方差矩阵 Σ_hat
Phi_3 = np.linalg.inv(Z.T @ Sigma_hat @ Z)  # 计算最优权重矩阵 Phi_3
J_value = len(ln_wage) * (g_hat.T @ Phi_3 @ g_hat) # 计算 Hansen's J 统计量
df = Z.shape[1] - X.shape[1] # 计算自由度 df（矩条件数 - 参数数）
p_value = 1 - stats.chi2.cdf(J_value, df) # 计算 p 值

# 输出 J 统计量及 p 值
print(f"Hansen's J 统计量: {J_value:.4f}")
print(f"自由度 (df): {df}")
print(f"p-value: {p_value:.4f}")

# 结果解释
if p_value < 0.05:
    print("J 统计量显著，说明过度识别约束可能不成立，模型设定可能有误。")
else:
    print("J 统计量不显著，过度识别约束未被拒绝，GMM 设定合理。")

Hansen's J 统计量: 0.4435
自由度 (df): 1
p-value: 0.5055
J 统计量不显著，过度识别约束未被拒绝，GMM 设定合理。


In [9]:
from linearmodels.iv import IVGMM

exog = np.c_[cons,exper,expersq]
endog = mroz['educ']
instruments = np.c_[mroz['motheduc'], mroz['fatheduc']]
model = IVGMM(ln_wage, exog, endog, instruments)
results1 = model.fit()
print(results1.summary)

# 显示 Hansen's J 统计量
print(f"Hansen's J 统计量: {results1.j_stat.stat:.4f}")
print(f"p-value: {results1.j_stat.pval:.4f}")
print(f"自由度 (df): {results1.j_stat.df}")

                          IV-GMM Estimation Summary                           
Dep. Variable:                   wage   R-squared:                      0.1354
Estimator:                     IV-GMM   Adj. R-squared:                 0.1293
No. Observations:                 428   F-statistic:                    18.655
Date:                Sat, Mar 29 2025   P-value (F-stat)                0.0003
Time:                        09:26:42   Distribution:                  chi2(3)
Cov. Estimator:                robust                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
exog.0         0.0477     0.4277     0.1114     0.9113     -0.7907      0.8860
exog.1         0.0451     0.0154     2.9269     0.00

#### 广义矩估计（拓展）：$\hat{\beta}_{gmm} = (X' Z \Phi Z' X)^{-1} X' Z \Phi Z' y$

##### 任务3：迭代超过两次呢？即，$\hat{\epsilon}^{(2)}$ 的情况下
- 模型：$ln(wage) = \beta_0 + \beta_1 \cdot educ + \beta_2 \cdot exper  + \beta_3 \cdot expersq  + \epsilon$
- 结果：$\hat{ln(wage)} = 0.0473 +  0.0611 \cdot educ + 0.0451 \cdot exper - 0.0009 \cdot expersq $

In [10]:
u_2ed = ln_wage - X @ beta_gmm3
u_2ed = np.array(u_2ed)
u_2ed_reshaped = u_2ed[:, np.newaxis]  # 添加一个新的轴，使其成为 (428, 1)

X = np.c_[cons, mroz['educ'], exper, expersq]
Z = np.c_[cons, mroz['exper'], mroz['expersq'], mroz['motheduc'], mroz['fatheduc']]

Z_new= Z*u_2ed_reshaped
Phi_4 = np.linalg.inv(Z_new.T @ Z_new)

beta_gmm4 = np.linalg.inv(X.T @ Z @ Phi_4 @ Z.T @ X) @ X.T @ Z @ Phi_4 @ Z.T @ ln_wage

print(f'cons: {beta_gmm4[0]:.4f}')
print(f'educ: {beta_gmm4[1]:.4f}')
print(f'exper: {beta_gmm4[2]:.4f}')
print(f'expersq: {beta_gmm4[3]:.4f}')

cons: 0.0473
educ: 0.0611
exper: 0.0451
expersq: -0.0009
