In [1]:
import numpy as np
import statsmodels.api as sm
import linearmodels as lm

np.random.seed(94305)

from torchonometrics.gmm import GMMEstimator  # noqa: E402

Single endogeneous variable and single instrument DGP with varying instrument strength ($\pi$) and degree of endogeneity($\rho$).


In [2]:
def dgp(n = 100_000,
        beta = np.array([-0.5, 1.2]),
        rho = 0.7,
        pi = np.array([0.5, -0.1])):
    ε = np.random.normal(0, 1, n)
    z = np.random.normal(0, 1, n * pi.shape[0]).reshape(n, pi.shape[0])
    # Generate endogenous x, influenced by the instrument
    x = z @ pi + ε * rho + np.random.normal(0, 1, n)
    X = np.c_[np.ones(n), x]
    # heteroskedasticity
    y = X @ beta + ε + (X[:, 1] > 0) * np.random.normal(0, 1, n)
    return y, X, z

# No Endogeneity 

OLS and IV with X as its own instrument should produce the same estimates.

In [3]:
y, X, z = dgp(pi = np.array([0]), rho = 0)
print(sm.OLS(y, X).fit(cov_type = "HC2").summary().tables[1])

                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.4994      0.004   -128.506      0.000      -0.507      -0.492
x1             1.1953      0.004    308.588      0.000       1.188       1.203


### GMM using Scipy Minimization

In [4]:
%%time
ψ = lambda z, y, x, beta: z * (y - x @ beta)[:, np.newaxis]
gmm_scipy = GMMEstimator(ψ)
gmm_scipy.fit(np.c_[np.ones(z.shape[0]), X[:, 1]], y, X)
gmm_scipy.summary()

CPU times: user 81.6 ms, sys: 11.5 ms, total: 93.1 ms
Wall time: 92.1 ms


Unnamed: 0,coef,std err,t,p-value,[0.025,0.975]
0,-0.4994,0.0039,-128.5068,0.0,-0.507,-0.4918
1,1.1953,0.0039,308.5945,0.0,1.1877,1.2029


### GMM using Torch Minimization

In [5]:
%%time
def moment_cond(z, y, x, beta):
    residuals = (y - x @ beta).unsqueeze(-1)
    return z * residuals

gmm = GMMEstimator(moment_cond, backend  = "torch")
gmm.fit(np.c_[np.ones(z.shape[0]), X[:, 1]], y, X)
gmm.summary()

CPU times: user 391 ms, sys: 97 ms, total: 488 ms
Wall time: 673 ms


Unnamed: 0,coef,std err,t,p-value,[0.025,0.975]
0,-0.4994,0.0039,-128.5069,0.0,-0.507,-0.4918
1,1.1953,0.0039,308.5946,0.0,1.1877,1.2029


Faster optimizer (default argument is limited-memory BFGS, but BFGS works faster for small problems)

In [6]:
%%time
def moment_cond(z, y, x, beta):
    residuals = (y - x @ beta).unsqueeze(-1)
    return z * residuals

gmm = GMMEstimator(moment_cond, backend = "torch")
gmm.fit(np.c_[np.ones(z.shape[0]), X[:, 1]], y, X, fit_method='bfgs')
gmm.summary()

CPU times: user 65.5 ms, sys: 33.4 ms, total: 98.9 ms
Wall time: 67.1 ms


Unnamed: 0,coef,std err,t,p-value,[0.025,0.975]
0,-0.4994,0.0039,-128.5068,0.0,-0.507,-0.4918
1,1.1953,0.0039,308.5945,0.0,1.1877,1.2029


Identical estimates and standard errors.

# With Endogeneity 

Over-identified: 2 instruments and 1 endogenous variable.

OLS is inconsistent. Also confirm `GMMEstimator` returns the same answer as IV2SLS.

In [7]:
y, X, z = dgp()
print(sm.OLS(y, X).fit().summary().tables[1])

                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.4981      0.003   -143.251      0.000      -0.505      -0.491
x1             1.5971      0.003    608.599      0.000       1.592       1.602


In [8]:
%%time
ψ = lambda z, y, x, beta: z * (y - x @ beta)[:, np.newaxis]
gmm = GMMEstimator(ψ)
gmm.fit(np.c_[np.ones(z.shape[0]), z], y, X)
gmm.summary()

CPU times: user 101 ms, sys: 14.5 ms, total: 116 ms
Wall time: 113 ms


Unnamed: 0,coef,std err,t,p-value,[0.025,0.975]
0,-0.4983,0.0039,-129.0528,0.0,-0.5059,-0.4908
1,1.1964,0.0076,157.7119,0.0,1.1815,1.2113


In [9]:
%%time
def moment_cond(z, y, x, beta):
    residuals = (y - x @ beta).unsqueeze(-1)
    return z * residuals

gmm = GMMEstimator(moment_cond, backend = "torch")
gmm.fit(np.c_[np.ones(z.shape[0]), z], y, X)
gmm.summary()

CPU times: user 80.7 ms, sys: 26.7 ms, total: 107 ms
Wall time: 73.3 ms


Unnamed: 0,coef,std err,t,p-value,[0.025,0.975]
0,-0.4983,0.0039,-129.0516,0.0,-0.5059,-0.4907
1,1.1964,0.0076,157.7132,0.0,1.1815,1.2113


Faster optimizer in torch

In [10]:
%%time
def moment_cond(z, y, x, beta):
    residuals = (y - x @ beta).unsqueeze(-1)
    return z * residuals

gmm = GMMEstimator(moment_cond, backend = "torch")
gmm.fit(np.c_[np.ones(z.shape[0]), z], y, X, fit_method='bfgs')
gmm.summary()

CPU times: user 95.1 ms, sys: 30.1 ms, total: 125 ms
Wall time: 87.3 ms


Unnamed: 0,coef,std err,t,p-value,[0.025,0.975]
0,-0.4983,0.0039,-129.0435,0.0,-0.5058,-0.4907
1,1.1964,0.0076,157.7158,0.0,1.1816,1.2113


Confirm with linearmodels

In [11]:
lm.iv.model.IV2SLS(y, None, X, np.c_[np.ones(z.shape[0]), z]).fit().summary.tables[1]

  return vecs @ np.diag(1 / np.sqrt(vals)) @ vecs.T


0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
endog.0,-0.4983,0.0039,-129.06,0.0000,-0.5059,-0.4907
endog.1,1.1965,0.0076,157.72,0.0000,1.1816,1.2113


Identical estimates and standard errors.