Causal-Forest.
    
    - References 
        - Wager, S., & Athey, S. (2018). Estimation and inference of heterogeneous treatment effects using random forests. Journal of the American Statistical Association, 113(523), 1228-1242.
    
        - Athey and Imbens, 2016, Estimation and Inference of Heterogeneous Treatment Effects using Random Forests
    
        - Athey and Wager, 2019, Recursive partitioning for heterogeneous causal effects

        - Athey, Tibshirani and Wager, 2019, Generalized random forests

    
    - Link: https://github.com/timmens/causal-forest
    
    - Link: https://causal-forest.readthedocs.io/en/latest/getting_started/example.html
  

# Import Libraries

In [None]:
import warnings
warnings.filterwarnings("ignore")

# Basic libraries
import pandas as pd
import numpy  as np

# Sklearn library
from sklearn.preprocessing   import StandardScaler

# User libraries
from utils.data_loading import Synthetic_dataset, TWINS_dataset, IHDP_dataset, ACIC_dataset
from econml.grf import CausalForest


print('[INFO] All libraries were imported')

# Get files

In [None]:
problem = "ACIC" # {"IHDP", "Synthetic", "TWINS", "ACIC"}
path = "Data/ACIC/" # {"Data/Synthetic/", "Data/IHDP/", "Data/TWINS/", "Data/ACIC/"}
filename = f"./Results/{problem}_C-Forest.csv"


if "Synthetic" in problem:
    DataLoader = Synthetic_dataset(path=path)
elif "IHDP" in problem:
    DataLoader = IHDP_dataset(path=path)
elif "TWINS" in problem:
    DataLoader = TWINS_dataset(path=path)
elif "ACIC" in problem:
    DataLoader = ACIC_dataset(path=path, train_size=0.8, random_state=1983)  

# Parameters

In [None]:
num_trees                = 100
min_samples_leaf         = 5
max_depth                = 25

# Simulations

In [None]:
results = {'ATE': [], 'Error_ATE': [], 'Error_PEHE':[]}

for idx in range(DataLoader.nProblems):
    # Load training data
    #
    trainX, trainT, trainY, train_potential_Y = DataLoader.getTraining( idx )

    # Load testing data
    #
    testX, testT, testY, test_potential_Y     = DataLoader.getTesting( idx )
    #
    print('Simulation: ', idx)
    print('[INFO] Dataset imported')
    

    # Setup scaler for inputs
    scalerX = StandardScaler()
    #
    trainX  = scalerX.fit_transform( trainX )
    testX   = scalerX.transform( testX )
    
    
    
    # Create model
    #
#     model = CausalForestDML(criterion='het', n_estimators=100,       
#                           min_samples_leaf=10, 
#                           max_depth=10, max_samples=0.5,
#                           discrete_treatment=False,
#                           model_t=LassoCV(), model_y=LassoCV())

    model = CausalForest(criterion = 'mse', 
                         n_estimators = num_trees, 
                         min_samples_leaf = min_samples_leaf, 
                         max_depth = max_depth,
                         min_var_fraction_leaf = None, 
                         min_var_leaf_on_val   = True,
                         random_state=42,
                         min_impurity_decrease = 0.0, max_samples=0.45, min_balancedness_tol=.45,
                         warm_start=False, inference=True, fit_intercept=True, subforest_size=4,
                         honest=True, verbose=0, n_jobs=-1
                        )


    model.fit(trainX, trainT, trainY)
    print('[INFO] Model trained')
     

    

    ITE = model.predict( testX )
    

    
    # ATE
    #
    real_ATE = ( test_potential_Y[:,1] - test_potential_Y[:,0] ).mean()
    
    
    # Error PEHE
    #
    Error_PEHE = ((test_potential_Y[:,1] - test_potential_Y[:,0] - ITE)**2 ).mean()
    
    
    # Error ATE
    #
    Error_ATE = abs( real_ATE - np.mean(ITE) )
    
    # Store errors of PEHE and ATE
    #
    results['ATE']            += [ np.round(real_ATE,   6) ]
    results['Error_ATE']      += [ np.round(Error_ATE,  6) ]
    results['Error_PEHE']     += [ np.round(Error_PEHE, 6) ]
    print('[INFO] Error of PEHE and ATE computed\n\n')
    
    
    
    
    # Save results (at each iteration)
    df = pd.DataFrame( results )
    df['Problem'] = [f"{problem} {x}" for x in df.index]
    df.to_csv(filename, index=False)   