**Summary**

Σε αυτό το αρχείο έχω:
1) Synthetic dataset

2) NedNet

    - Reference: Shi, C., Blei, D. M., & Veitch, V. (2019). Adapting neural networks for the estimation of treatment effects. arXiv preprint arXiv:1906.02120.
    
    - Link: https://github.com/claudiashi57/dragonnet

# Import Libraries

In [None]:
import warnings
warnings.filterwarnings('ignore')
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

# Basic libraries
import time
import random
import numpy  as np
import pandas as pd
from   os     import listdir


# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Sklearn library
from sklearn.preprocessing   import StandardScaler


# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Tensorflow library
#
import tensorflow                as tf
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks  import TerminateOnNaN
from tensorflow.keras.callbacks  import EarlyStopping
from tensorflow.keras.callbacks  import ReduceLROnPlateau
from tensorflow.keras.utils      import plot_model

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# User libraries
#
from utils.data_loading import Synthetic_dataset, TWINS_dataset
from utils.metrics      import PEHE, ATE
from utils.utils        import data_sanity_check
from utils.Loss         import *
from utils.NedNet       import *
print('[INFO] All libraries were imported')



# Random generators initialization
seed=42
random.seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
print('[INFO] Random generators were initialized')

# Parameters

In [None]:
targeted_regularization    = True # {True, False}

output_dir                 = ''
knob_loss                  = dragonnet_loss_binarycross
ratio                      = 1.
validation_split           = 0.2
batch_size                 = 8
verbose                    = False


metrics_ned = [ ned_loss        ]
metrics_cut = [ regression_loss ]




problem = "TWINS"
path = "Data/Twins/" # {"Data/Synthetic/5000/", "Data/Synthetic/5000_large/", "Data/Twins/"}
limited_number_of_instances = 5
ratio = 0.005
filename = f"./Results/{problem}_NedNet-ratio={ratio}.csv"


if "Synthetic" in problem:
    DataLoader = Synthetic_dataset(path=path)
elif problem == "TWINS":
    DataLoader = TWINS_dataset(path=path)

# Sanity check
# * Ensure that all categories obtain enough datapoints *
data_sanity_check(DataLoader=DataLoader, ratio=ratio)

# Simulations

In [None]:
results = {'ATE': [], 'Error_ATE': [], 'Error_PEHE':[]}
for idx in range(DataLoader.nProblems):

    # Start timer
    #
    start1 = time.time()
    
    
    # Load training data
    trainX, trainT, trainY, train_potential_Y = DataLoader.getTraining( idx )
    # Get sampling set
    from utils.data_loading import create_sample_dataset
    indices = create_sample_dataset(T=trainT, Y=trainY, ratio=ratio, limited_number_of_instances=limited_number_of_instances)
    trainX, trainT, trainY, train_potential_Y = trainX[indices], trainT[indices], trainY[indices], train_potential_Y[indices]

    # Load testing data
    testX, testT, testY, test_potential_Y  = DataLoader.getTesting( idx )
    #
    print('Simulation: ', idx)
    print('[INFO] Dataset imported')
    print('[INFO] Number of training instances: ', trainX.shape[0])
    
    
    # Setup scaler for inputs
    scalerX = StandardScaler()
    trainX  = scalerX.fit_transform( trainX )
    testX   = scalerX.transform( testX )
    
    

    
    # Setup NedNet
    #
    nednet = make_ned(trainX.shape[1], 0.01)
    
    
    # Create outputs for DragonNet (concatenate Y & T)
    #
    yt_train = np.concatenate([trainY.reshape(-1,1), trainT.reshape(-1,1)], axis = 1)
    
    
    #
    #
    # *** Training - Phase I ***
    #
    #
    # Compile network
    #
    nednet.compile(optimizer = Adam(lr=1e-3), 
                   loss      = ned_loss, 
                   metrics   = metrics_ned)

    # Setup callbacks
    #
    callbacks = [TerminateOnNaN(),
                 EarlyStopping(monitor   = 'val_loss', 
                               patience  = 2, 
                               min_delta = 0.),
                 ReduceLROnPlateau(monitor   = 'loss', 
                                   factor    = 0.5, 
                                   patience  = 5, 
                                   verbose   = verbose, 
                                   mode      = 'auto', 
                                   min_delta = 1e-8, 
                                   cooldown  = 0, 
                                   min_lr    = 0)]

    start_time = time.time()

    # Training
    #
    nednet.fit(trainX, yt_train, 
               callbacks        = callbacks,
               validation_split = validation_split,
               epochs           = 100,
               batch_size       = batch_size, 
               verbose          = verbose)


    print("[INFO] Training - Phase I - Time %.2f secs" % (time.time() - start_time) )

    
    
    
    
    
    
    #
    #
    # *** Training - Phase II ***
    #
    #    
    # Compile network
    #
    nednet.compile(optimizer = SGD(lr=1e-5, momentum=0.9, nesterov=True),
                   loss      = dead_loss, 
                   metrics   = metrics_cut)



    # Setup callbacks
    callbacks = [TerminateOnNaN(),
                 EarlyStopping(monitor   = 'val_loss', 
                               patience  = 40, 
                               min_delta = 0.),
                 ReduceLROnPlateau(monitor   = 'loss', 
                                   factor    = 0.5, 
                                   patience  = 5, 
                                   verbose   = verbose, 
                                   mode      = 'auto', 
                                   min_delta = 1e-8, 
                                   cooldown  = 0, 
                                   min_lr    = 0)]



    start_time = time.time()

    # Training
    #
    nednet.fit(trainX, yt_train, 
               callbacks        = callbacks,
               validation_split = validation_split,
               epochs           = 300,
               batch_size       = batch_size, 
               verbose          = verbose)

    print("[INFO] Training - Phase II - Time %.2f secs" % (time.time() - start_time) )


    
    

    # Get predictions
    #
    t_hat_train = nednet.predict(trainX)[:, 1]
    t_hat_test = nednet.predict(testX)[:, 1]
    
    # Cutting the activation layer
    #
    cut_net = post_cut(nednet, trainX.shape[1], 0.01)
    
    
    
    
    #
    #
    # *** Training - Phase III ***
    #
    #   
    # Compile network
    #
    cut_net.compile(optimizer = Adam(lr=1e-3), 
                    loss      = dead_loss, 
                    metrics   = metrics_cut)

    # Setup callbacks
    #
    callbacks = [TerminateOnNaN(),
                 EarlyStopping(monitor   = 'val_loss', 
                               patience  = 2, 
                               min_delta = 0.),
                 ReduceLROnPlateau(monitor   = 'loss', 
                                   factor    = 0.5, 
                                   patience  = 5, 
                                   verbose   = verbose, 
                                   mode      = 'auto', 
                                   min_delta = 1e-8, 
                                   cooldown  = 0, 
                                   min_lr    = 0)]

    start_time = time.time()

    # Training
    #
    cut_net.fit(trainX, yt_train, 
               callbacks        = callbacks,
               validation_split = validation_split,
               epochs           = 100,
               batch_size       = batch_size, 
               verbose          = verbose)

    
    print("[INFO] Training - Phase III - Time %.2f secs" % (time.time() - start_time) )
    
    
    
    
    
    
    
    
    #
    #
    # *** Training - Phase IV ***
    #
    #   
    # Compile network
    #
    cut_net.compile(optimizer = SGD(lr=1e-5, momentum=0.9, nesterov=True),
                    loss      = dead_loss, 
                    metrics   = metrics_cut)



    # Setup callbacks
    callbacks = [TerminateOnNaN(),
                 EarlyStopping(monitor   = 'val_loss', 
                               patience  = 40, 
                               min_delta = 0.),
                 ReduceLROnPlateau(monitor   = 'loss', 
                                   factor    = 0.5, 
                                   patience  = 5, 
                                   verbose   = verbose, 
                                   mode      = 'auto', 
                                   min_delta = 1e-8, 
                                   cooldown  = 0, 
                                   min_lr    = 0)]



    start_time = time.time()

    # Training
    #
    cut_net.fit(trainX, yt_train, 
                callbacks        = callbacks,
                validation_split = validation_split,
                epochs           = 300,
                batch_size       = batch_size, 
                verbose          = verbose)

    print("[INFO] Training - Phase IV - Time %.2f secs" % (time.time() - start_time) )
    
    
    
    
    #
    #
    # *** Predictions ***
    #
    #       
    y_hat_test  = cut_net.predict(testX)

    yt_hat_test = np.concatenate([y_hat_test, t_hat_test.reshape(-1, 1)], 1)


    # Get predictions
    #
    test_y_hat = yt_hat_test[:,:2]

    
    
    # Get propensity score
    #
    propensity_score = yt_hat_test[:,2]
   
    

    
    
    
    # ATE
    #
    real_ATE = ( test_potential_Y[:,1] - test_potential_Y[:,0] ).mean()
    
    
    # Error PEHE
    #
    Error_PEHE = PEHE(test_potential_Y, test_y_hat)
    
    
    # Error ATE
    #
    Error_ATE = ATE(test_potential_Y, test_y_hat)  
    
        
    # Store errors of PEHE and ATE
    #
    results['ATE']            += [ np.round(real_ATE,  6)  ]
    results['Error_ATE']      += [ np.round(Error_ATE,  6) ]
    results['Error_PEHE']     += [ np.round(Error_PEHE, 6) ]

    print('[INFO] Error of PEHE and ATE computed')
    print('[INFO] Time %.2f\n\n' % (time.time() - start1))
    
    
    
    # Save results (at each iteration)
    df = pd.DataFrame( results )
    df['Problem'] = [f"{problem} {x}" for x in df.index]
    df.to_csv(filename, index=False)