In [None]:
import numpy as np
import pandas as pd
import keras
import torch
from econml.iv.nnet import DeepIV

# Paths to the 10 training and testing datasets
train_files = [
    'data/sec5.1/train_Zbin_g_lin_f_lin.csv',
    'data/sec5.1/train_Zcont_g_lin_f_lin.csv',
    'data/sec5.1/train_Zbin_g_lin_f_log_case.csv',
    'data/sec5.1/train_Zcont_g_lin_f_log_case.csv',
    'data/sec5.1/train_Zbin_g_lin_f_sin_lin.csv',
    'data/sec5.1/train_Zcont_g_lin_f_sin_lin.csv'
]

test_files = [
    'data/sec5.1/test_Zbin_g_lin_f_lin.csv',
    'data/sec5.1/test_Zcont_g_lin_f_lin.csv',
    'data/sec5.1/test_Zbin_g_lin_f_log_case.csv',
    'data/sec5.1/test_Zcont_g_lin_f_log_case.csv',
    'data/sec5.1/test_Zbin_g_lin_f_sin_lin.csv',
    'data/sec5.1/test_Zcont_g_lin_f_sin_lin.csv'
]

# Define the treatment and response models
treatment_model = keras.Sequential([keras.layers.Dense(256, activation='relu', input_shape=(2,)),
                                    keras.layers.Dropout(0.17),
                                    keras.layers.Dense(128, activation='relu'),
                                    keras.layers.Dropout(0.17),
                                    keras.layers.Dense(64, activation='relu'),
                                    keras.layers.Dropout(0.17),
                                    keras.layers.Dense(32, activation='relu'),
                                    keras.layers.Dropout(0.17)])

response_model = keras.Sequential([keras.layers.Dense(256, activation='relu', input_shape=(2,)),
                                   keras.layers.Dropout(0.17),
                                   keras.layers.Dense(128, activation='relu'),
                                   keras.layers.Dropout(0.17),
                                   keras.layers.Dense(64, activation='relu'),
                                   keras.layers.Dropout(0.17),
                                   keras.layers.Dense(32, activation='relu'),
                                   keras.layers.Dropout(0.17),
                                   keras.layers.Dense(1)])

# Options for Keras model training
keras_fit_options = { "epochs": 30,
                      "validation_split": 0.2,
                      "callbacks": [keras.callbacks.EarlyStopping(patience=2, restore_best_weights=True)]}

# Loop over each dataset
for idx, (train_file, test_file) in enumerate(zip(train_files, test_files), start=1):
    
    # Load train and test data
    train_data = np.genfromtxt(train_file, delimiter=',', skip_header=1)
    test_data = np.genfromtxt(test_file, delimiter=',', skip_header=1)

    data_length = train_data.shape[0]
    print(f'Train data size for DGP{idx}: {data_length}')

    # Extract variables
    z = train_data[:, 1]
    t = train_data[:, 2]
    y = train_data[:, 3]
    x = np.zeros(data_length)  # No exogenous variables
    
    # Initialize the DeepIV model
    deepIvEst = DeepIV(n_components = 10,
                       m = lambda z, x : treatment_model(keras.layers.concatenate([z,x])), 
                       h = lambda t, x : response_model(keras.layers.concatenate([t,x])),
                       n_samples = 1,
                       use_upper_bound_loss = False,
                       n_gradient_samples = 1,
                       optimizer='adam',
                       first_stage_options = keras_fit_options,
                       second_stage_options = keras_fit_options)

    # Prepare test data as tensors
    X_test = torch.tensor(test_data[:, 0].astype(np.float32)).squeeze()
    X_test_grid = torch.tensor(test_data[:, 4].astype(np.float32)).squeeze()

    # Initialize DataFrames for storing results
    df_mse = pd.DataFrame()
    df_plot = pd.DataFrame()

    # Run training and predictions 10 times for each dataset
    for run in range(10):
        # Fit the DeepIV model
        deepIvEst.fit(Y=y, T=t, X=x, Z=z)

        # Generate predictions for each x_temp
        y_hat_deepiv = deepIvEst.predict(X_test, np.full_like(X_test, x[0]))
        y_hat_deepiv_grid = deepIvEst.predict(X_test_grid, np.full_like(X_test_grid, x[0]))

        # Add the predictions as a new column to the DataFrame
        df_mse[f'Run_{run+1}'] = y_hat_deepiv
        df_plot[f'Run_{run+1}'] = y_hat_deepiv_grid

    # Save the DataFrames to CSV files for each dataset
    df_mse.to_csv(f'results/sec5.1/deepiv_result_mse_dgp{idx}.csv', index=False)
    df_plot.to_csv(f'results/sec5.1/deepiv_result_plot_dgp{idx}.csv', index=False)