In [1]:
from mliv.inference import Vanilla2SLS
from mliv.utils import CausalDataset

In [2]:
import pandas as pd

df = pd.read_stata("angrist.dta")
df = df.rename(columns={col: col.replace('v', 'x') for col in df.columns})
df = df.rename(columns={'x18': 'z1', 'x4': 't1', 'x9': 'y1'})
df = df.dropna()

In [3]:
df

Unnamed: 0,x1,x2,x3,t1,x5,x6,x7,x8,y1,x10,...,z1,x19,x20,x21,x22,x23,x24,x25,x26,x27
0,40,40.50,1,11,0,0,13,8.955383,5.023558,1,...,3,0,1,1,10.0,5,0,0,0,1929
1,41,41.00,1,12,0,0,14,8.993365,5.061540,1,...,1,0,0,1,10.0,5,0,0,0,1929
2,41,41.50,1,12,0,0,14,9.310141,5.378315,1,...,3,0,0,1,10.0,5,0,0,0,1928
3,46,46.25,1,12,0,0,14,9.110465,5.178639,1,...,4,0,0,1,10.0,5,0,0,0,1923
4,46,46.00,1,16,0,0,18,10.310601,6.378776,1,...,1,0,0,1,10.0,5,0,0,1,1924
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1063629,30,1930.50,1,12,0,0,15,9.432283,5.481040,0,...,3,1,0,0,49.0,52,0,0,0,49
1063630,34,1934.25,2,16,0,0,18,9.616138,5.664895,1,...,4,1,0,0,49.0,52,0,0,0,45
1063631,33,1933.50,2,18,0,0,20,9.852457,6.163578,1,...,3,1,0,0,49.0,40,0,0,0,46
1063632,35,1935.00,2,12,0,0,14,9.473089,5.521845,1,...,1,1,0,0,49.0,52,0,0,0,45


In [4]:
from sklearn.model_selection import train_test_split
df = df[['y1', 't1', 'z1', "x1", "x10", "x19"]]
train_df, temp_df = train_test_split(df, test_size=0.3, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)
train_df.to_csv("train.csv", index=False)
val_df.to_csv("valid.csv", index=False)
test_df.to_csv("test.csv", index=False)

In [5]:
df.columns

Index(['y1', 't1', 'z1', 'x1', 'x10', 'x19'], dtype='object')

In [6]:
from linearmodels.iv import IV2SLS

# Define variables
y = "y1"  # Replace with your dependent variable (e.g., test scores)
treatment = "t1"  # Replace with your treatment variable
Z = ["z1"]  # Replace with valid instruments
# X = ['x1', 'x2', 'x3', 'x5', 'x6', 'x7', 'x8', 'x10', 'x11',
    #    'x12', 'x13', 'x14', 'x15', 'x16', 'x17', 'x19', 'x20', 'x21',
    #    'x22', 'x23', 'x24', 'x25', 'x26', 'x27']  # Replace with exogenous controls
X = ['x1', 'x10', 'x19']
# Ensure no missing values
# df = df.dropna()
# print(df[Z + X].corr())

# # Define 2SLS model
model = IV2SLS(dependent=df[y], exog=df[X], endog=df[treatment], instruments=df[Z])

# # Fit the model
results = model.fit()

# # Print summary
print(results.summary)

                          IV-2SLS Estimation Summary                          
Dep. Variable:                     y1   R-squared:                      0.9680
Estimator:                    IV-2SLS   Adj. R-squared:                 0.9680
No. Observations:              899225   F-statistic:                 2.721e+07
Date:                Tue, Feb 25 2025   P-value (F-stat)                0.0000
Time:                        19:14:03   Distribution:                  chi2(4)
Cov. Estimator:                robust                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
x1             0.0308     0.0004     86.257     0.0000      0.0301      0.0315
x10            0.2504     0.0035     70.814     0.00

In [7]:
import warnings

warnings.filterwarnings('ignore')

In [None]:
import torch
print("CUDA Available:", torch.cuda.is_available())
print("CUDA Version:", torch.version.cuda)
print("GPU Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU Found")

CUDA Available: True
CUDA Version: 11.8
GPU Name: NVIDIA GeForce RTX 3080 Ti


In [9]:
from mliv.inference import NN2SLS

data = CausalDataset('./Data/Demand/0.5_1.0_0.0_10000/1/')

model = NN2SLS()
model.fit(data)
ITE = model.predict(data.train)
ATE,_ = model.ATE(data.train)

Run -1-th experiment for NN2SLS. 
<class 'torch.Tensor'>
Epoch 0 ended: train - 554.5031, valid - 549.3373.
<class 'torch.Tensor'>
Epoch 5 ended: train - 3.8782, valid - 3.8856.
<class 'torch.Tensor'>
Epoch 10 ended: train - 3.0731, valid - 3.0676.
<class 'torch.Tensor'>
Epoch 15 ended: train - 2.6925, valid - 2.7056.
<class 'torch.Tensor'>
Epoch 20 ended: train - 2.4826, valid - 2.5081.
<class 'torch.Tensor'>
Epoch 25 ended: train - 2.3603, valid - 2.3918.
<class 'torch.Tensor'>
Epoch 30 ended: train - 2.2420, valid - 2.2802.
<class 'torch.Tensor'>
Epoch 35 ended: train - 2.1887, valid - 2.2408.
<class 'torch.Tensor'>
Epoch 40 ended: train - 2.1281, valid - 2.1893.
<class 'torch.Tensor'>
Epoch 45 ended: train - 2.1169, valid - 2.1862.
<class 'torch.Tensor'>
Epoch 50 ended: train - 2.0954, valid - 2.1758.
<class 'torch.Tensor'>
Epoch 55 ended: train - 2.0180, valid - 2.1039.
<class 'torch.Tensor'>
Epoch 60 ended: train - 1.9158, valid - 1.9962.
<class 'torch.Tensor'>
Epoch 65 ended: tr

In [29]:
from mliv.utils.loaddata import CausalDataset
from mliv.inference import NN2SLS

# Load data using CausalDataset (must have train.csv, valid.csv, test.csv)
data = CausalDataset('C:\\Users\\jxiong3\\Documents\\ecma-final-project\\test_folder\\')

# Move data to CPU or CUDA (if available)
data.to('cuda' if torch.cuda.is_available() else 'cpu')

# Convert to PyTorch tensors
data.tensor()


In [None]:
# Initialize and Train NN2SLS
model = NN2SLS()
model.fit(data)

# Predict Individual Treatment Effect (ITE)
ITE = model.predict(data.train)

# Estimate the Average Treatment Effect (ATE)
ATE, _ = model.ATE(data.train)

print(f"Estimated ATE: {ATE}")


Run -1-th experiment for NN2SLS. 
<class 'torch.Tensor'>
Epoch 0 ended: train - 9.8133, valid - 9.8157.
<class 'torch.Tensor'>
Epoch 5 ended: train - 9.7377, valid - 9.7320.
<class 'torch.Tensor'>
Epoch 10 ended: train - 9.7563, valid - 9.7491.
<class 'torch.Tensor'>
Epoch 15 ended: train - 9.6978, valid - 9.6926.
<class 'torch.Tensor'>
Epoch 20 ended: train - 9.7203, valid - 9.7156.
<class 'torch.Tensor'>
Epoch 25 ended: train - 9.7151, valid - 9.7110.
<class 'torch.Tensor'>
Epoch 30 ended: train - 9.7047, valid - 9.7003.
<class 'torch.Tensor'>
Epoch 35 ended: train - 9.6974, valid - 9.6928.


In [22]:
from mliv.utils import CausalDataset

train_causal = CausalDataset(
    y=train_data["y1"].reshape(-1, 1),
    t=train_data["t1"].reshape(-1, 1),
    z=train_data["z1"].reshape(-1, 1),
    x=cat([train_data["x1"], train_data["x10"], train_data["x19"]])
)

val_causal = CausalDataset(
    y=val_data["y1"].reshape(-1, 1),
    t=val_data["t1"].reshape(-1, 1),
    z=val_data["z1"].reshape(-1, 1),
    x=cat([val_data["x1"], val_data["x10"], val_data["x19"]])
)

test_causal = CausalDataset(
    y=test_data["y1"].reshape(-1, 1),
    t=test_data["t1"].reshape(-1, 1),
    z=test_data["z1"].reshape(-1, 1),
    x=cat([test_data["x1"], test_data["x10"], test_data["x19"]])
)


NameError: name 'cat' is not defined

In [16]:
ATE

np.float32(-12.550736)

In [None]:
from mliv.inference import Poly2SLS
from mliv.inference import NN2SLS
from mliv.inference import OneSIV
from mliv.inference import KernelIV
from mliv.inference import DualIV
from mliv.inference import DFL
from mliv.inference import AGMM
from mliv.inference import DeepGMM
from mliv.inference import DFIV
# from mliv.inference import DeepIV           # Tensorflow & keras

for mod in [Vanilla2SLS,Poly2SLS,NN2SLS]:
    model = mod()
    model.config['num'] = 100
    model.config['epochs'] = 10
    model.fit(data)
    ATE,_ = model.ATE(data.train)
    print(ATE)
#     print(mod)

Run -1-th experiment for Vanilla2SLS. 
End. --------------------
-12.550736
Run -1-th experiment for Poly2SLS. 


AxisError: axis 1 is out of bounds for array of dimension 1

In [None]:
from mliv.dataset.demand import gen_data
from mliv.utils import CausalDataset
gen_data()

The path: ./Data/Demand/0.5_1.0_0.0_10000/
Generate Demand datasets - 0/10. 
Generate Demand datasets - 1/10. 
Generate Demand datasets - 2/10. 
Generate Demand datasets - 3/10. 
Generate Demand datasets - 4/10. 
Generate Demand datasets - 5/10. 
Generate Demand datasets - 6/10. 
Generate Demand datasets - 7/10. 
Generate Demand datasets - 8/10. 
Generate Demand datasets - 9/10. 


{'dataName': 'Demand',
 'exps': 10,
 'num': 10000,
 'rho': 0.5,
 'alpha': 1.0,
 'beta': 0.0,
 'seed': 2022,
 'num_val': 10000,
 'seed_val': 3033,
 'seed_tst': 4044}