## Source Code for Appendix C.1

In [None]:
import numpy as np
from numpy.linalg import eigvals, norm, inv
import matplotlib.pyplot as plt
np.random.seed(1)

# Set sample size and parameters
N = 50
p = 5
q = 10
noise = 1

# Initialize lists to store the computed values
min_eigenvalues_Z = []
min_eigenvalues_X = []
spectral_norms = []

# Number of simulations
num_simulations = 10000

for _ in range(num_simulations):
    # Step 1: Generate Instrument Z
    # Z is an (N x q) matrix
    Z = np.random.multivariate_normal(mean=np.zeros(q), cov=np.eye(q), size=N)
    
    # Theta is a (q x p) matrix
    Theta = np.random.multivariate_normal(mean=np.zeros(p), cov=np.eye(p), size=q)
    
    # beta is a (p,) vector
    beta = np.random.normal(0, 1, p)
    
    # Phi is a (p x p) matrix
    Phi = np.random.multivariate_normal(mean=np.zeros(p), cov=np.eye(p), size=p)
    
    # phi is a (p,) vector
    phi = np.random.normal(0, 1, p)
    
    # Step 2: Generate Endogenous X
    # U is an (N x p) matrix
    U = np.random.multivariate_normal(mean=np.zeros(p), cov=np.eye(p), size=N)  # Unobserved variable causing endogeneity
    
    # E2 is an (N x p) matrix
    E2 = noise * (U @ Phi) + np.random.multivariate_normal(mean=np.zeros(p), cov=np.eye(p), size=N)
    
    # X is influenced by Z and U; it's an (N x p) matrix
    X = (Z @ Theta) + E2
    
    # E1 is an (N,) vector
    E1 = noise * (U @ phi) + np.random.normal(0, 1, N)
    
    # Y is influenced by X and U directly; it's an (N,) vector
    Y = (X @ beta) + E1
    
    # Compute eigenvalues of (Z^T Z) / N
    eigenvalues_Z = eigvals((Z.T @ Z) / N)
    min_eigen_Z = np.min(eigenvalues_Z)
    min_eigenvalues_Z.append(min_eigen_Z)
    
    # Compute projection matrix P onto the column space of Z
    ZTZ_inv = inv(Z.T @ Z)
    P = Z @ ZTZ_inv @ Z.T
    
    # Compute eigenvalues of (X^T P X) / N
    eigenvalues_X = eigvals((X.T @ P @ X) / N)
    min_eigen_X = np.min(eigenvalues_X)
    min_eigenvalues_X.append(min_eigen_X)
    
    # Compute the spectral norm of (Z^T E2) / N
    spectral_norm = norm((Z.T @ E2) / N, ord=2)
    spectral_norms.append(spectral_norm)

# Calculate the 1% cutoff points
cutoff_min_eigen_Z = np.percentile(min_eigenvalues_Z, 0.33)
cutoff_min_eigen_X = np.percentile(min_eigenvalues_X, 0.33)
cutoff_spectral_norm = np.percentile(spectral_norms, 99.67)

# Plot histograms of the computed values with 1% cutoff lines
# Histogram of minimum eigenvalues of (Z^T Z) / N
plt.figure(figsize=(10, 6))
plt.hist(min_eigenvalues_Z, bins=30, color='skyblue', edgecolor='black')
plt.axvline(cutoff_min_eigen_Z, color='red', linestyle='dashed', linewidth=2, label=f'0.33% Quantile: {cutoff_min_eigen_Z:.4f}')
plt.title(r'$\lambda_\min(\frac{Z^T Z}{N})$', fontsize=24)
plt.xlabel('Minimum Eigenvalue', fontsize=24)
plt.ylabel('Frequency', fontsize=24)
plt.legend(fontsize=18)
plt.tick_params(axis='both', which='major', labelsize=18)
plt.grid(True)
plt.savefig("figures/sampcond1.png", dpi=300, bbox_inches='tight')
plt.show()

# Histogram of minimum eigenvalues of (X^T P X) / N
plt.figure(figsize=(10, 6))
plt.hist(min_eigenvalues_X, bins=30, color='lightgreen', edgecolor='black')
plt.axvline(cutoff_min_eigen_X, color='red', linestyle='dashed', linewidth=2, label=f'0.33% Quantile: {cutoff_min_eigen_X:.4f}')
plt.title(r'$\lambda_\min(\frac{X^T P X)}{N})$', fontsize=24)
plt.xlabel('Minimum Eigenvalue', fontsize=24)
plt.ylabel('Frequency', fontsize=24)
plt.legend(fontsize=18)
plt.tick_params(axis='both', which='major', labelsize=18)
plt.grid(True)
plt.savefig("figures/sampcond2.png", dpi=300, bbox_inches='tight')
plt.show()

# Histogram of spectral norms of (Z^T E2) / N
plt.figure(figsize=(10, 6))
plt.hist(spectral_norms, bins=30, color='salmon', edgecolor='black')
plt.axvline(cutoff_spectral_norm, color='red', linestyle='dashed', linewidth=2, label=f'99.67% Quantile: {cutoff_spectral_norm:.4f}')
plt.title(r'$\|\frac{Z^T \mathcal{E}_2}{N}\|$', fontsize=24)
plt.xlabel('Spectral Norm', fontsize=24)
plt.ylabel('Frequency', fontsize=24)
plt.legend(fontsize=18)
plt.tick_params(axis='both', which='major', labelsize=18)
plt.grid(True)
plt.savefig("figures/sampcond3.png", dpi=300, bbox_inches='tight')
plt.show()


## Source Code for Appendix C.2

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import norm
from scipy.stats import multivariate_normal as mvn
from statsmodels.api import OLS
import matplotlib.pyplot as plt

# Set seed for reproducibility
np.random.seed(1)

# Sample size and dimensions
N = 50
p = 5
q = 10
noise = 1
iterations = 100

# Step 1: Generate Instrument Z
Z = mvn.rvs(mean=np.zeros(q), cov=np.eye(q), size=N)
Theta = mvn.rvs(mean=np.zeros(p), cov=np.eye(p), size=q)
beta = norm.rvs(0, 1, size=p)
Phi = mvn.rvs(mean=np.zeros(p), cov=np.eye(p), size=p)
phi = norm.rvs(0, 1, size=p)

# Step 2: Generate Endogenous X
U = mvn.rvs(mean=np.zeros(p), cov=np.eye(p), size=N)
X = Z @ Theta + noise * U @ Phi + mvn.rvs(mean=np.zeros(p), cov=np.eye(p), size=N)

# Generate test data
N_test = 100
Z_test = mvn.rvs(mean=np.zeros(q), cov=np.eye(q), size=N_test)
U_test = mvn.rvs(mean=np.zeros(p), cov=np.eye(p), size=N_test)
X_test = Z_test @ Theta + mvn.rvs(mean=np.zeros(p), cov=np.eye(p), size=N_test)

# Step 3: Generate Outcome Y
Y = X @ beta + noise * U @ phi + norm.rvs(0, 1, size=N)
Y_test = X_test @ beta + norm.rvs(0, 1, size=N_test)

# Prepare the data for IV regression
data = pd.DataFrame(np.hstack([Y.reshape(-1, 1), X, Z]), columns=['Y'] + [f'X{i}' for i in range(p)] + [f'Z{i}' for i in range(q)])
data_test = pd.DataFrame(np.hstack([Y_test.reshape(-1, 1), X_test, Z_test]), columns=['Y'] + [f'X{i}' for i in range(p)] + [f'Z{i}' for i in range(q)])

# Perform IV regression: Y ~ X, instrumented by Z
inner_fit = OLS(data[[f'X{i}' for i in range(p)]], data[[f'Z{i}' for i in range(q)]]).fit()
X_hat = Z @ inner_fit.params
for i in range(p):
    data[f'X_hat{i}'] = X_hat.iloc[:, i]

outer_fit = OLS(data['Y'], data[[f'X_hat{i}' for i in range(p)]]).fit()
beta_2sls = outer_fit.params.values
print(f"OLS_IV beta MSE: {np.sqrt(np.sum((beta_2sls - beta) ** 2))}")

# Additional code to compute gamma and kappa and add to the labels
def compute_lambda(alpha, eta, Theta_hat_true, Z, q, p):
    matrix_gamma = np.eye(p) - alpha * Theta_hat_true.T @ Z.T @ Z @ Theta_hat_true
    eigenvalues_gamma = np.linalg.eigvals(matrix_gamma)
    gamma = max(abs(eigenvalues_gamma))
    
    matrix_kappa = np.eye(q) - eta * Z.T @ Z
    eigenvalues_kappa = np.linalg.eigvals(matrix_kappa)
    kappa = max(abs(eigenvalues_kappa))
    
    Lambda = max(gamma, kappa)
    return Lambda, gamma, kappa

Theta_hat_true = np.array(inner_fit.params)

# Gradient method with different learning rates for first figure
alpha_1 = 0.0012
diffs_dict_1 = {}
learning_rates_1 = [0.002, 0.01, 0.02, 0.022]
for eta in learning_rates_1:
    beta_hat = np.zeros(p)
    Theta_hat = np.zeros((q, p))
    diffs = []
    for _ in range(iterations):
        beta_hat -= alpha_1 * Theta_hat.T @ Z.T @ (Z @ Theta_hat @ beta_hat - Y)
        Theta_hat -= eta * Z.T @ (Z @ Theta_hat - X)
        diffs.append(np.sqrt(np.sum((beta_hat - beta_2sls) ** 2)))
    Lambda, gamma, kappa = compute_lambda(alpha_1, eta, Theta_hat_true, Z, q, p)
    label = f'$\eta$={eta}, $\Lambda$=max({gamma:.2f}, {kappa:.2f})={Lambda:.2f}'
    diffs_dict_1[label] = diffs

# Gradient method with different learning rates for second figure
eta_2 = 0.01
diffs_dict_2 = {}
learning_rates_2 = [0.0005, 0.001, 0.0015, 0.002]
for alpha in learning_rates_2:
    beta_hat = np.zeros(p)
    Theta_hat = np.zeros((q, p))
    diffs = []
    for _ in range(iterations):
        beta_hat -= alpha * Theta_hat.T @ Z.T @ (Z @ Theta_hat @ beta_hat - Y)
        Theta_hat -= eta_2 * Z.T @ (Z @ Theta_hat - X)
        diffs.append(np.sqrt(np.sum((beta_hat - beta_2sls) ** 2)))
    Lambda, gamma, kappa = compute_lambda(alpha, eta_2, Theta_hat_true, Z, q, p)
    label = f'$\\alpha$={alpha}, $\\Lambda$=max({gamma:.2f}, {kappa:.2f})={Lambda:.2f}'
    diffs_dict_2[label] = diffs

# Plot the differences over iterations for different learning rates (first figure)
fig1, ax1 = plt.subplots(figsize=(14, 10))

for label, diffs in diffs_dict_1.items():
    ax1.plot(diffs, label=label, linewidth=3)
ax1.set_xlabel('Iteration', fontsize=28)
ax1.set_ylabel(r'$||\hat{\beta}^{(t)} - \hat{\beta}_{2sls}||$', fontsize=28)
ax1.set_title(f'Convergence of IV Estimate with Gradient Method ($\\alpha$={alpha_1})', fontsize=28, pad=20)
ax1.legend(loc='upper right', fontsize=24)
ax1.grid(True)
ax1.tick_params(axis='both', which='major', labelsize=22)
plt.ylim(0,2.7)

plt.savefig("figures/gradconv1.png", dpi=300, bbox_inches='tight')
plt.show()

# Plot the differences over iterations for different learning rates (second figure)
fig2, ax2 = plt.subplots(figsize=(14, 10))

for label, diffs in diffs_dict_2.items():
    ax2.plot(diffs, label=label, linewidth=3)
ax2.set_xlabel('Iteration', fontsize=28)
ax2.set_ylabel(r'$||\hat{\beta}^{(t)} - \hat{\beta}_{2sls}||$', fontsize=28)
ax2.set_title(f'Convergence of IV Estimate with Gradient Method ($\\eta$={eta_2})', fontsize=28, pad=20)
ax2.legend(loc='upper right', fontsize=24)
ax2.grid(True)
ax2.tick_params(axis='both', which='major', labelsize=22)
plt.ylim(0,2.7)

plt.savefig("figures/gradconv2.png", dpi=300, bbox_inches='tight')
plt.show()



In [None]:
import numpy as np
import pandas as pd
from scipy.stats import norm
from scipy.stats import multivariate_normal as mvn
from statsmodels.api import OLS
import matplotlib.pyplot as plt

# Set seed for reproducibility
np.random.seed(1)

# Sample sizes to compare
sample_sizes = [50, 100, 150]
p = 5
q = 10
noise = 1
# alpha = 0.00002
# eta = 0.0008
num_simulations = 100

# Different iteration counts for GD estimator
iterations_list = range(1,101,5)

# Function to generate data
def generate_data(N, q, p, noise):
    Z = mvn.rvs(mean=np.zeros(q), cov=np.eye(q), size=N)
    Theta = mvn.rvs(mean=np.zeros(p), cov=np.eye(p), size=q)
    beta = norm.rvs(0, 1, size=p)
    Phi = mvn.rvs(mean=np.zeros(p), cov=np.eye(p), size=p)
    phi = norm.rvs(0, 1, size=p)
    U = mvn.rvs(mean=np.zeros(p), cov=np.eye(p), size=N)
    X = Z @ Theta + noise * U @ Phi + mvn.rvs(mean=np.zeros(p), cov=np.eye(p), size=N)
    Y = X @ beta + noise * U @ phi + norm.rvs(0, 1, size=N)
    return Y, X, Z, beta, Theta

def compute_lambda(alpha, eta, Theta_hat_true, Z, q, p):
    matrix_gamma = np.eye(p) - alpha * Theta_hat_true.T @ Z.T @ Z @ Theta_hat_true
    eigenvalues_gamma = np.linalg.eigvals(matrix_gamma)
    gamma = max(abs(eigenvalues_gamma))
    
    matrix_kappa = np.eye(q) - eta * Z.T @ Z
    eigenvalues_kappa = np.linalg.eigvals(matrix_kappa)
    kappa = max(abs(eigenvalues_kappa))
    
    Lambda = max(gamma, kappa)
    return Lambda, gamma, kappa

# Store the results
gd_results = {N: {iterations: [] for iterations in iterations_list} for N in sample_sizes}
ols_results = []
iv_results = []

# We will calculate OLS and IV for N=150
ols_results_N150 = []
iv_results_N150 = []

for N in sample_sizes:
    for iterations in iterations_list:
        gd_final_diffs = []
        ols_final_diffs = []
        iv_final_diffs = []
        for _ in range(num_simulations):
            # Generate data
            Y, X, Z, beta, Theta = generate_data(N, q, p, noise)
            Theta_hat_true = np.array(OLS(X, Z).fit().params)
            alpha = 1 / (np.linalg.svd(Z @ Theta_hat_true, compute_uv=False)[0] ** 2)
            eta = 1 / (np.linalg.svd(Z, compute_uv=False)[0] ** 2)
            # alpha = alpha * 1.9
            # eta = eta * 0.1
            # print(compute_lambda(alpha, eta, Theta_hat_true, Z, q, p))
            # GD Estimator for different iteration counts
            inner_fit = OLS(X, Z).fit()
            X_hat = Z @ inner_fit.params
            beta_gd_hat = np.zeros(p)
            Theta_hat = np.zeros((q, p))

            for _ in range(iterations):
                beta_gd_hat -= alpha * Theta_hat.T @ Z.T @ (Z @ Theta_hat @ beta_gd_hat - Y)
                Theta_hat -= eta * Z.T @ (Z @ Theta_hat - X)

            gd_final_diff = np.sqrt(np.sum((beta_gd_hat - beta) ** 2))
            gd_final_diffs.append(gd_final_diff)

            if N == 150:
                # OLS Estimator
                ols_fit = OLS(Y, X).fit()
                beta_ols_hat = ols_fit.params
                ols_final_diff = np.sqrt(np.sum((beta_ols_hat - beta) ** 2))
                ols_final_diffs.append(ols_final_diff)

                # IV Estimator
                inner_fit = OLS(X, Z).fit()
                X_hat = Z @ inner_fit.params
                iv_fit = OLS(Y, X_hat).fit()
                beta_iv_hat = iv_fit.params
                iv_final_diff = np.sqrt(np.sum((beta_iv_hat - beta) ** 2))
                iv_final_diffs.append(iv_final_diff)

        avg_gd_final_diff = np.mean(gd_final_diffs)
        gd_results[N][iterations].append(avg_gd_final_diff)

    if N == 150:
        avg_ols_final_diff = np.mean(ols_final_diffs)
        avg_iv_final_diff = np.mean(iv_final_diffs)
        ols_results_N150 = avg_ols_final_diff
        iv_results_N150 = avg_iv_final_diff

# Plot the results
fig, ax = plt.subplots(figsize=(14, 10))
colors = ['b', 'g', 'r']
for idx, N in enumerate(sample_sizes):
    ax.plot(iterations_list, [np.mean(gd_results[N][it]) for it in iterations_list], marker='o', linestyle='-', linewidth=3, markersize=10,
            label=f'GD Estimator (N={N})', color=colors[idx])

# Replicate OLS and IV results across the iterations_list for N=150
ax.plot(iterations_list, [ols_results_N150] * len(iterations_list), marker='s', linestyle='--', linewidth=3, markersize=10,
        label='OLS Estimator (N=150)', color='c')
ax.plot(iterations_list, [iv_results_N150] * len(iterations_list), marker='^', linestyle='-.', linewidth=3, markersize=10,
        label='2SLS Estimator (N=150)', color='m')

ax.set_xlabel('Iterations', fontsize=28)
ax.set_ylabel(r'$||\hat{\beta}^{(t)} - \beta||$', fontsize=28)
ax.set_title('Bias of Estimators across Iterations', fontsize=28, pad=20)
ax.legend(loc='upper right', fontsize=24)
ax.grid(True)
ax.tick_params(axis='both', which='major', labelsize=22)

plt.savefig("figures/bias_vs_iteration.png", dpi=300, bbox_inches='tight')
plt.show()
