# Creating the data to test in STATA and Python

In [None]:
import numpy as np
import pandas as pd
from jive1 import *
from jive2 import *


#Pick a vector length:
n = 1000

#Getting our Z's and making a Z matrix:
Z = np.random.randn(n, 1)
column_of_ones = np.ones((Z.shape[0], 1))
Z = np.hstack((column_of_ones, Z))

#Parameter vectors:
α = np.array([1, 1])
β = np.array([1,2])

#Error terms:
e1 = np.random.normal(0,7,n)
e2 = np.random.normal(0,7,n)
δ = np.random.normal(0,1)
ε = 5*e1 - 5*e2 + 8

#Making our endogenous variable:
x = np.dot(Z,α) + .2*e1
X = np.column_stack((column_of_ones, x))

#Outcome vector:
Y = np.dot(X,β) + ε

#OLS benchmark:
bhat_ols = np.dot(np.linalg.inv(np.dot(X.T,X)), np.dot(X.T, Y))

#2sls comparison:
Zt_Z = np.dot(Z.T, Z)
Zt_Z_inv = np.linalg.inv(Zt_Z)
pz = np.dot(np.dot(Z, Zt_Z_inv), Z.T)
proj_x = np.dot(pz, X)
first = np.linalg.inv(np.dot(proj_x.T, X))
second = np.dot(proj_x.T, Y)
bhat_2sls = np.dot(first, second)


jive1 = JIVE1(Y,X,Z,talk=True)
jive2 = JIVE2(Y,X,Z,talk=True)

# Combine matrices into a single DataFrame
df = pd.DataFrame({
    "Y": Y,  # Outcome vector
    **{f"X{i}": X[:, i] for i in range(X.shape[1])},  # Endogenous variables
    **{f"Z{i}": Z[:, i] for i in range(Z.shape[1])}   # Instrumental variables
})

# Save the DataFrame to a CSV file
df.to_csv('data.csv', index=False)

# Print the DataFrame to verify
#print(df)


#Compare them:
print("OLS:", bhat_ols[1])
print("2SLS:", bhat_2sls[1])
print("Jive 1:", jive1['beta'])
print("Jive 2:",jive2['beta'])

Y has 1000 rows.

X has 1000 rows and 2 columns.

Z has 1000 rows and 2 columns.



LinAlgError: Singular matrix

In [4]:
import numpy as np
import pandas as pd
from jive1 import *
from jive2 import *

# Pick a vector length:
n = 1000

# Getting our Z's and making a Z matrix:
Z = np.random.randn(n, 2)  # Z now has two independent columns

# Parameter vectors:
α = np.array([1, 1])  # Matches the number of columns in Z
β = np.array([1, 2])  # Matches the number of columns in X

# Error terms:
e1 = np.random.normal(0, 7, n)
e2 = np.random.normal(0, 7, n)
δ = np.random.normal(0, 1, n)
ε = 5 * e1 - 5 * e2 + δ

# Making our endogenous variable:
x = np.dot(Z, α) + 0.2 * e1
X = x.reshape(-1, 1)  # Ensure X is a 2D array with shape (n, 1)

# Outcome vector:
Y = np.dot(X, β[:1]) + ε  # Use only the first element of β to match X's shape

# OLS benchmark:
bhat_ols = np.dot(np.linalg.inv(np.dot(X.T, X)), np.dot(X.T, Y))

# 2SLS comparison:
Zt_Z = np.dot(Z.T, Z)
Zt_Z_inv = np.linalg.inv(Zt_Z)
pz = np.dot(np.dot(Z, Zt_Z_inv), Z.T)
proj_x = np.dot(pz, X)
first = np.linalg.inv(np.dot(proj_x.T, X))
second = np.dot(proj_x.T, Y)
bhat_2sls = np.dot(first, second)

# JIVE estimators:
jive1 = JIVE1(Y, X, Z, talk=True)
jive2 = JIVE2(Y, X, Z, talk=True)

# Combine matrices into a single DataFrame
df = pd.DataFrame({
    "Y": Y,  # Outcome vector
    **{f"X{i}": X[:, i] for i in range(X.shape[1])},  # Endogenous variables
    **{f"Z{i}": Z[:, i] for i in range(Z.shape[1])}   # Instrumental variables
})

# Save the DataFrame to a CSV file
df.to_csv('data.csv', index=False)

# Print the DataFrame to verify
#print(df)

# Compare them:
print("OLS:", bhat_ols[0])  # Adjusted to match the single coefficient
print("2SLS:", bhat_2sls[0])  # Adjusted to match the single coefficient
print("Jive 1:", jive1['beta'])
print("Jive 2:", jive2['beta'])

Y has 1000 rows.

X has 1000 rows and 1 columns.

Z has 1000 rows and 2 columns.

  P = Z @ np.linalg.inv(Z.T @ Z) @ Z.T
  P = Z @ np.linalg.inv(Z.T @ Z) @ Z.T
  P = Z @ np.linalg.inv(Z.T @ Z) @ Z.T
  fit = P @ X #  Z @ np.linalg.inv(Z.T @ Z) @ Z.T @ X
  fit = P @ X #  Z @ np.linalg.inv(Z.T @ Z) @ Z.T @ X
  fit = P @ X #  Z @ np.linalg.inv(Z.T @ Z) @ Z.T @ X
Fitted values obtained.

  leverage = np.diag(Z @ np.linalg.inv(Z.T @ Z) @ Z.T) # np.diag(P)
  leverage = np.diag(Z @ np.linalg.inv(Z.T @ Z) @ Z.T) # np.diag(P)
  leverage = np.diag(Z @ np.linalg.inv(Z.T @ Z) @ Z.T) # np.diag(P)
Leverage values obtained.

Second pass complete.

JIVE1 Estimates:
[0.38671312 1.81928666]

  yfit = X @ beta_jive1
  yfit = X @ beta_jive1
  yfit = X @ beta_jive1
  F = ((np.sum((yfit-ybar)**2)) / (q-1)) / ((e.T @ e)/(N-q))
  F = ((np.sum((yfit-ybar)**2)) / (q-1)) / ((e.T @ e)/(N-q))
  F = ((np.sum((yfit-ybar)**2)) / (q-1)) / ((e.T @ e)/(N-q))
  fs_fit = Z @ np.linalg.inv(Z.T @ Z) @ Z.T @ X_fs
  fs_fit = Z

OLS: 14.82454088648348
2SLS: 1.8998111971791454
Jive 1: [0.38671312 1.81928666]
Jive 2: [0.38660219 1.81578268]


  yfit = X @ beta_jive2
  yfit = X @ beta_jive2
  yfit = X @ beta_jive2
  F = ((np.sum((yfit-ybar)**2)) / (q-1)) / ((e.T @ e)/(N-q))
  F = ((np.sum((yfit-ybar)**2)) / (q-1)) / ((e.T @ e)/(N-q))
  F = ((np.sum((yfit-ybar)**2)) / (q-1)) / ((e.T @ e)/(N-q))
  fs_fit = Z @ np.linalg.inv(Z.T @ Z) @ Z.T @ X_fs
  fs_fit = Z @ np.linalg.inv(Z.T @ Z) @ Z.T @ X_fs
  fs_fit = Z @ np.linalg.inv(Z.T @ Z) @ Z.T @ X_fs
  fs_F = ((np.sum((fs_fit - xbar) ** 2))/(q_fs-1))/((e_fs.T @ e_fs)/(N-q_fs))
  fs_F = ((np.sum((fs_fit - xbar) ** 2))/(q_fs-1))/((e_fs.T @ e_fs)/(N-q_fs))
  fs_F = ((np.sum((fs_fit - xbar) ** 2))/(q_fs-1))/((e_fs.T @ e_fs)/(N-q_fs))
