**Methodology:** For each of the control group, the nearest instance on the treatment group was identified and concatenate and vise-versa. i.e. In case data contain 30 features, the new dataset contain 30+30 features

**Utilized metric:** 'euclidean'

# Import Libraries

In [None]:
import warnings
warnings.filterwarnings('ignore')

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Basic libraries
import time
import random
import numpy  as np
import pandas as pd


# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Sklearn library
from sklearn.preprocessing   import StandardScaler


# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Tensorflow library
import tensorflow                as     tf
from tensorflow.keras.optimizers import *
from tensorflow.keras.callbacks  import TerminateOnNaN
from tensorflow.keras.callbacks  import EarlyStopping
from tensorflow.keras.callbacks  import ReduceLROnPlateau
from tensorflow.keras.utils      import plot_model


# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# User libraries
from utils.data_loading import Synthetic_dataset, TWINS_dataset, IHDP_dataset, ACIC_dataset
from utils.metrics import PEHE, ATE
from utils.Loss import *
from utils.Modified_DragonNet import *
from utils.kNearestNeighbors import *
print('[INFO] All libraries were imported')



# Random generators initialization
seed=42
random.seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
print('[INFO] Random generators were initialized')

# Parameters

In [None]:
targeted_regularization    = True # {True, False}

output_dir                 = ''
knob_loss                  = dragonnet_loss_binarycross
ratio                      = 1.
validation_split           = 0.2
batch_size                 = 64
verbose                    = False


metrics = [regression_loss, binary_classification_loss, treatment_accuracy, track_epsilon]

if targeted_regularization:
    loss = make_tarreg_loss(ratio=ratio, dragonnet_loss=knob_loss)
else:
    loss = knob_loss

metric  = 'euclidean' # 'euclidean', 'cityblock', 'chebyshev'
k       = 11

In [None]:
problem = "Synthetic" # {"IHDP", "Synthetic", "TWINS", "ACIC"}
path = "Data/Synthetic/" # {"Data/Synthetic/", "Data/IHDP/", "Data/TWINS/", "Data/ACIC/"}
filename = f"./Results/{problem}_kNN-DragonNet-{metric}-k={k}.csv"
MODEL_PATH    = './checkpoints/{}kNN-DragonNet-{}-k={}'.format(problem, metric, k)

if "Synthetic" in problem:
    DataLoader = Synthetic_dataset(path=path)
elif "IHDP" in problem:
    DataLoader = IHDP_dataset(path=path)
elif "TWINS" in problem:
    DataLoader = TWINS_dataset(path=path)
elif "ACIC" in problem:
    DataLoader = ACIC_dataset(path=path, train_size=0.8, random_state=1983)  

# Simulations

In [None]:
results = {'ATE': [], 'Error_ATE': [], 'Error_PEHE':[]}
for idx in range(DataLoader.nProblems):

    # Start timer
    #
    start1 = time.time()
    
    
    # Load training data
    #
    trainX, trainT, trainY, train_potential_Y = DataLoader.getTraining( idx )

    # Load testing data
    #
    testX, testT, testY, test_potential_Y     = DataLoader.getTesting( idx )
    #
    print('Simulation: ', idx)
    print('[INFO] Dataset imported')
    
    
    
    

    
    # Setup scaler for inputs
    scalerX = StandardScaler()
    #
    trainX  = scalerX.fit_transform( trainX )
    testX   = scalerX.transform( testX )
    
    
    
    # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
    # ***                                     New framework                                     ***
    # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
    #
    # Include information about the outcomes
    knn = KNearestNeighbors(k        = [k], 
                            metrics  = [metric], 
                            verbose  = True)
    knn(trainX, trainT, trainY)

    knn.calculateDistances( trainX )
    train_knn_Y = knn.getEstimatedOutcomes()        

    knn.calculateDistances( testX  )
    test_knn_Y  = knn.getEstimatedOutcomes()        
    # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
    
    
    
    
    

    
    # Setup DragonNet
    #
    dragonnet = make_modified_dragonnet(trainX.shape[1], 0.01)
    

    # Create outputs for DragonNet (concatenate Y & T)
    #
    yt_train = np.concatenate([trainY.reshape(-1,1), trainT.reshape(-1,1)], axis = 1)
    yt_train = yt_train.astype('float64')
    
    
    #
    #
    # *** Training - Phase I ***
    #
    #
    
    # Compile network
    #
    dragonnet.compile(optimizer = Adam(lr = 1e-3), 
                      loss      = loss, 
                      metrics   = metrics)

    # Setup callbacks
    callbacks = [TerminateOnNaN(),
                 EarlyStopping(monitor   = 'val_loss', 
                               patience  = 2, 
                               min_delta = 0.),
                 ReduceLROnPlateau(monitor   = 'loss', 
                                   factor    = 0.5, 
                                   patience  = 5, 
                                   verbose   = verbose, 
                                   mode      = 'auto', 
                                   min_delta = 1e-8, 
                                   cooldown  = 0, 
                                   min_lr    = 0)]

    start_time = time.time()

    # Training
    #
    dragonnet.fit([trainX, train_knn_Y[:,0], train_knn_Y[:,1]], yt_train, 
                  callbacks        = callbacks,
                  validation_split = validation_split,
                  epochs           = 100,
                  batch_size       = batch_size, 
                  verbose          = verbose)


    # Save trained network
    #
    dragonnet.save_weights( MODEL_PATH )
    
    print("[INFO] Training - Phase I - Time %.2f secs" % (time.time() - start_time) )

    
    
    
    
    
    
    #
    #
    # *** Training - Phase II ***
    #
    #    

    # Setup callbacks
    #
    callbacks = [TerminateOnNaN(),
                 EarlyStopping(monitor   = 'val_loss', 
                               patience  = 40, 
                               min_delta = 0.),
                 ReduceLROnPlateau(monitor   = 'loss', 
                                   factor    = 0.5, 
                                   patience  = 5, 
                                   verbose   = verbose, 
                                   mode      = 'auto',
                                   min_delta = 0., 
                                   cooldown  = 0, 
                                   min_lr    = 0)
    ]

    # Compile network
    #
    dragonnet.compile(optimizer = SGD(lr=1e-5, momentum=0.9, nesterov=True), 
                      loss      = loss,
                      metrics   = metrics)




    start_time = time.time()

    # Training
    #
    dragonnet.fit([trainX, train_knn_Y[:,0], train_knn_Y[:,1]], yt_train, 
                  callbacks        = callbacks,
                  validation_split = validation_split,
                  epochs           = 300,
                  batch_size       = batch_size, 
                  verbose          = verbose)

    print("[INFO] Training - Phase II - Time %.2f secs" % (time.time() - start_time) )


    
    
    
    #
    #
    # *** Predictions ***
    #
    #       
    yt_hat_test  = dragonnet.predict( [testX, test_knn_Y[:,0], test_knn_Y[:,1]] )


    # In case Dragonnet's training has not been converged, the trained network from Phase I is reloaded
    if ( np.isnan( yt_hat_test ).any() ):
        dragonnet.load_weights( MODEL_PATH )
        yt_hat_test  = dragonnet.predict( [testX, test_knn_Y[:,0], test_knn_Y[:,1]] )


    # Get predictions
    #
    test_y_hat = yt_hat_test[:,:2]
    
    # Get propensity score
    #
    propensity_score = yt_hat_test[:,2]
   
    

    
    
    
    # ATE
    #
    real_ATE = ( test_potential_Y[:,1] - test_potential_Y[:,0] ).mean()
    
    
    # Error PEHE
    #
    Error_PEHE = PEHE(test_potential_Y, test_y_hat)
    
    
    # Error ATE
    #
    Error_ATE = ATE(test_potential_Y, test_y_hat)  
    
        
    # Store errors of PEHE and ATE
    #
    results['ATE']            += [ np.round(real_ATE,   6) ]
    results['Error_ATE']      += [ np.round(Error_ATE,  6) ]
    results['Error_PEHE']     += [ np.round(Error_PEHE, 6) ]

    print('[INFO] Error of PEHE and ATE computed')
    print('[INFO] Time %.2f\n\n' % (time.time() - start1))
    
    
    
    # Save results (at each iteration)
    df = pd.DataFrame( results )
    df['Problem'] = [f"{problem} {x}" for x in df.index]
    df.to_csv(filename, index=False)