# Import Libraries

In [1]:
import warnings
warnings.filterwarnings("ignore")

# Basic libraries
import numpy  as np
import pandas as pd

# Sklearn library
#
from sklearn.preprocessing   import StandardScaler
from sklearn.ensemble        import RandomForestRegressor


# User libraries
from utils.metrics import PEHE, ATE
from utils.data_loading import Synthetic_dataset, TWINS_dataset, IHDP_dataset, ACIC_dataset
print('[INFO] All libraries were imported')

[INFO] All libraries were imported


# Get files

In [3]:
problem = "TWINS" # {"IHDP", "Synthetic", "TWINS", "ACIC"}
path = "Data/TWINS/" # {"Data/Synthetic/", "Data/IHDP/", "Data/TWINS/", "Data/ACIC/"}
filename = f"./Results/{problem}_R-Forest.csv"


if "Synthetic" in problem:
    DataLoader = Synthetic_dataset(path=path)
elif "IHDP" in problem:
    DataLoader = IHDP_dataset(path=path)
elif "TWINS" in problem:
    DataLoader = TWINS_dataset(path=path)
elif "ACIC" in problem:
    DataLoader = ACIC_dataset(path=path, train_size=0.8, random_state=1983)  

[INFO] Number of cases:  36


# Simulations

In [4]:
results = {'ATE': [], 'Error_ATE': [], 'Error_PEHE':[]}

for idx in range(DataLoader.nProblems):
    
    
    # Load training data
    #
    trainX, trainT, trainY, train_potential_Y = DataLoader.getTraining( idx )

    # Load testing data
    #
    testX, testT, testY, test_potential_Y     = DataLoader.getTesting( idx )
    #
    print('Simulation: ', idx)
    print('[INFO] Dataset imported')
    
    
    # Setup scaler for features
    #
    scalerX = StandardScaler()
    trainX  = scalerX.fit_transform( trainX )
    testX   = scalerX.transform( testX )
    
    
    
    
    # Setup model
    #
    model = RandomForestRegressor(n_jobs=-1)
    
    
    
    # Train model
    #
    model.fit(np.concatenate([trainX, trainT.reshape(-1,1)], axis=1), trainY);
    print('[INFO] Model trained')
    
    
    
    
    
    
    # Calculate number of instances in testing set
    #
    nInstances = testX.shape[0]

    
    # Initialize an empty array
    #
    test_y_hat = np.empty([nInstances, 2], dtype=float)


    # Get predictions
    #
    # \hat{m}(x, 0)
    test_y_hat[:,0] = model.predict( np.concatenate([testX, np.zeros([nInstances,1])], axis=1) )
    #
    # \hat{m}(x, 1)
    test_y_hat[:,1] = model.predict( np.concatenate([testX, np.ones([nInstances,1])],  axis=1) )
    
    

    # ATE
    #
    real_ATE = ( test_potential_Y[:,1] - test_potential_Y[:,0] ).mean()
    
    
    # Error PEHE
    #
    Error_PEHE = PEHE(test_potential_Y, test_y_hat)
    
    
    # Error ATE
    #
    Error_ATE = ATE(test_potential_Y, test_y_hat)  
    
        
    # Store errors of PEHE and ATE
    #
    results['ATE']            += [ np.round(real_ATE,   6) ]
    results['Error_ATE']      += [ np.round(Error_ATE,  6) ]
    results['Error_PEHE']     += [ np.round(Error_PEHE, 6) ]

    print('[INFO] Error of PEHE and ATE computed\n\n')

    
    
    # Save results (at each iteration)
    df = pd.DataFrame( results )
    df['Problem'] = [f"{problem} {x}" for x in df.index]
    df.to_csv(filename, index=False)

Simulation:  0
[INFO] Dataset imported
[INFO] Model trained
[INFO] Error of PEHE and ATE computed


Simulation:  1
[INFO] Dataset imported
[INFO] Model trained
[INFO] Error of PEHE and ATE computed


Simulation:  2
[INFO] Dataset imported
[INFO] Model trained
[INFO] Error of PEHE and ATE computed


Simulation:  3
[INFO] Dataset imported
[INFO] Model trained
[INFO] Error of PEHE and ATE computed


Simulation:  4
[INFO] Dataset imported
[INFO] Model trained
[INFO] Error of PEHE and ATE computed


Simulation:  5
[INFO] Dataset imported
[INFO] Model trained
[INFO] Error of PEHE and ATE computed


Simulation:  6
[INFO] Dataset imported
[INFO] Model trained
[INFO] Error of PEHE and ATE computed


Simulation:  7
[INFO] Dataset imported
[INFO] Model trained
[INFO] Error of PEHE and ATE computed


Simulation:  8
[INFO] Dataset imported
[INFO] Model trained
[INFO] Error of PEHE and ATE computed


Simulation:  9
[INFO] Dataset imported
[INFO] Model trained
[INFO] Error of PEHE and ATE computed


