In [1]:
import numpy as np
import pandas as pd
import optuna
from sklearn import preprocessing
#import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from sklearn.manifold import TSNE
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torch import optim
#from geomloss import SamplesLoss
from torch.autograd import Function
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.nn.functional import normalize
#from torchmetrics.classification import BinaryAccuracy
#from torchmetrics.classification import BinaryF1Score
from sklearn.linear_model import LogisticRegression
torch.manual_seed(0)

  from .autonotebook import tqdm as notebook_tqdm


<torch._C.Generator at 0x7fd18c0f2a50>

In [2]:
class TarNet(nn.Module):
    def __init__(self,params):
        super(TarNet, self).__init__()
        self.encoder1 = nn.Linear(25, params['RL11'])
        self.encoder2 = nn.Linear(params['RL11'], params['RL21'])
        self.encoder3 = nn.Linear(params['RL21'], params['RL32'])

        self.regressor1_y0 = nn.Sequential(
            nn.Linear(params['RL32'], params['RG012']),
            nn.ELU(),
            nn.Dropout(p=.01),
        )
        self.regressor2_y0 = nn.Sequential(
            nn.Linear(params['RG012'], params['RG022']),
            nn.ELU(),
            nn.Dropout(p=.01),
        )
        self.regressorO_y0 = nn.Linear(params['RG022'], 1)

        self.regressor1_y1 = nn.Sequential(
            nn.Linear(params['RL32'], params['RG112']),
            nn.ELU(),
            nn.Dropout(p=.01),
        )
        self.regressor2_y1 = nn.Sequential(
            nn.Linear(params['RG112'], params['RG122']),
            nn.ELU(),
            nn.Dropout(p=.01),
        )
        self.regressorO_y1 = nn.Linear(params['RG122'], 1)


    def forward(self, inputs):
        x = nn.functional.elu(self.encoder1(inputs))
        x = nn.functional.elu(self.encoder2(x))
        phi = nn.functional.elu(self.encoder3(x))

        out_y0 = self.regressor1_y0(phi)
        out_y0 = self.regressor2_y0(out_y0)
        y0 = self.regressorO_y0(out_y0)

        out_y1 = self.regressor1_y1(phi)
        out_y1 = self.regressor2_y1(out_y1)
        y1 = self.regressorO_y1(out_y1)

        concat = torch.cat((y0, y1), 1)
        return concat,phi

In [3]:
def objective(trial,i):

    params = {
          'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
          'optimizer': trial.suggest_categorical("optimizer", ["Adam", "SGD"]),
          'batch_size':trial.suggest_int('batch_size', 8, 256),
          'RL11':trial.suggest_int('RL11', 16, 512),
          'RL21': trial.suggest_int('RL21', 16, 512),
          'RL32': trial.suggest_int('RL32', 16, 512),
          'RG012':trial.suggest_int('RG012', 16, 512),
        'RG022':trial.suggest_int('RG022', 16, 512),
        'RG112':trial.suggest_int('RG112', 16, 512),
        'RG122':trial.suggest_int('RG122', 16, 512),
          
          }

    model = TarNet(params)

    pehe,model= train_evaluate(params, model, trial,i)

    return pehe

In [4]:
class Data(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X.astype(np.float32))
        self.y = torch.from_numpy(y.astype(np.float32))
        self.len = self.X.shape[0]
       
    def __getitem__(self, index):
        return self.X[index], self.y[index]
   
    def __len__(self):
        return self.len

In [5]:
def get_data(data_type,file_num):

    if(data_type=='train'):
        data=pd.read_csv(f"Dataset/IHDP_a/ihdp_npci_train_{file_num}.csv")
    else:
        data = pd.read_csv(f"Dataset/IHDP_a/ihdp_npci_test_{file_num}.csv")

    x_data=pd.concat([data.iloc[:,0], data.iloc[:, 1:30]], axis = 1)
    #x_data=data.iloc[:, 5:30]
    y_data=data.iloc[:, 1]
    return x_data,y_data

In [6]:
def get_dataloader(x_data,y_data,batch_size):

    x_train_sr=x_data[x_data['treatment']==0]
    y_train_sr=y_data[x_data['treatment']==0]
    x_train_tr=x_data[x_data['treatment']==1]
    y_train_tr=y_data[x_data['treatment']==1]


    train_data_sr = Data(np.array(x_train_sr), np.array(y_train_sr))
    train_dataloader_sr = DataLoader(dataset=train_data_sr, batch_size=batch_size)

    train_data_tr = Data(np.array(x_train_tr), np.array(y_train_tr))
    train_dataloader_tr = DataLoader(dataset=train_data_tr, batch_size=batch_size)


    return train_dataloader_sr, train_dataloader_tr

In [7]:
def regression_loss(concat_true, concat_pred):
    #computes a standard MSE loss for TARNet
    y_true = concat_true[:, 0] #get individual vectors
    t_true = concat_true[:, 1]

    y0_pred = concat_pred[:, 0]
    y1_pred = concat_pred[:, 1]

    #Each head outputs a prediction for both potential outcomes
    #We use t_true as a switch to only calculate the factual loss
    loss0 = torch.sum((1. - t_true) * torch.square(y_true - y0_pred))
    loss1 = torch.sum(t_true * torch.square(y_true - y1_pred))
    #note Shi uses tf.reduce_sum for her losses instead of tf.reduce_mean.
    #They should be equivalent but it's possible that having larger gradients accelerates convergence.
    #You can always try changing it!
    return loss0 + loss1

In [8]:
def cal_pehe(data,y,model):
    #data,y=get_data('test',i)

    data=data.to_numpy()
    data=torch.from_numpy(data.astype(np.float32))



    concat_pred,phi=model(data[:,5:30])
    #dont forget to rescale the outcome before estimation!
    #y0_pred = data['y_scaler'].inverse_transform(concat_pred[:, 0].reshape(-1, 1))
    #y1_pred = data['y_scaler'].inverse_transform(concat_pred[:, 1].reshape(-1, 1))
    cate_pred=concat_pred[:,1]-concat_pred[:,0]
    cate_true=data[:,4]-data[:,3] #Hill's noiseless true values


    cate_err=torch.mean( torch.square( ( (cate_true) - (cate_pred) ) ) )

    return torch.sqrt(cate_err).item()


In [9]:
def loss_cal(X_data,y_data,net):
    
    x_train_sr=X_data[X_data['treatment']==0]
    y_train_sr=y_data[X_data['treatment']==0]
    x_train_tr=X_data[X_data['treatment']==1]
    y_train_tr=y_data[X_data['treatment']==1]
    xs_t=x_train_sr.iloc[:,0].to_numpy()
    xt_t=x_train_tr.iloc[:,0].to_numpy()
    
    xs=x_train_sr.iloc[:,5:30].to_numpy()
    xt=x_train_tr.iloc[:,5:30].to_numpy()
    xs_t=torch.from_numpy(xs_t.astype(np.float32))
    xt_t=torch.from_numpy(xt_t.astype(np.float32))
    y_train_sr=y_train_sr.to_numpy()
    y_train_tr=y_train_tr.to_numpy()
    xs=torch.from_numpy(xs.astype(np.float32))
    xt=torch.from_numpy(xt.astype(np.float32))
    
    y_train_sr=torch.from_numpy(y_train_sr.astype(np.float32))
    y_train_tr=torch.from_numpy(y_train_tr.astype(np.float32))
    
    
    input_data=torch.cat((xs,xt),0)
    true_y=torch.unsqueeze(torch.cat((y_train_sr,y_train_tr),0), dim=1)
    true_t=torch.unsqueeze(torch.cat((xs_t,xt_t),0), dim=1)
    
    
    concat_true=torch.cat((true_y,true_t),1)
    concat_pred,phi=net(input_data)
    loss=regression_loss(concat_true, concat_pred)
    
    return loss.item()

    

In [10]:
y_MSE=nn.MSELoss()
#criterion_reg=nn.MSELoss()
#criterion_reg=regression_loss(concat_true,concat_pred)
epochs=300
#batch_size=32

In [11]:
train_loss=[]
val_loss=[]
pehe_error=[]
batch_loss=0
num_files=2
lambda_=0.3
def train_evaluate(param, model, trial,file_num):
    #for nf in range(1,num_files):
    x_data,y_data=get_data('train',file_num)
    X_train, X_val,y_train, y_val = train_test_split(x_data,y_data ,
                                       random_state=42, 
                                       test_size=0.20)
    
    #net=TarNet(25,.01)
    #opt_net = torch.optim.Adam(net.parameters(), lr=1e-4)
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    
    optimizer = getattr(optim, param['optimizer'])(model.parameters(), lr= param['learning_rate'])
    
   
        #model = model.cuda()
    #model = model
        #criterion = criterion.cuda()

    for ep in range(1,epochs+1 ):
        #print('epoch',ep)
        batch_loss=0

        train_dataloader_sr, train_dataloader_tr=get_dataloader(X_train,y_train,param['batch_size'])

        for batch_idx, (train_source_data, train_target_data) in enumerate(zip(train_dataloader_sr, train_dataloader_tr)):

            xs,ys=train_source_data
            xt,yt=train_target_data

            xs_train=xs[:,5:30]
            xt_train=xt[:,5:30]

            train_x=torch.cat((xs_train,xt_train),0)
            train_y=torch.unsqueeze(torch.cat((ys,yt),0), dim=1)
            true_t=torch.unsqueeze(torch.cat((xs[:,0],xt[:,0]),0), dim=1)
            concat_true=torch.cat((train_y,true_t),1)
            concat_pred,phi=model(train_x)
            
            
            # p(t|x)
            t_batch=torch.cat((xs[:,0],xt[:,0]),0).numpy()
            #t_com=t_batch
            t_com=X_train.iloc[:,0].to_numpy()
            x_data_com=X_train.iloc[:,5:30].to_numpy()
            x_data_com = torch.from_numpy(x_data_com.astype(np.float32))
            concat_pred_com,phi_com=model(x_data_com)
            
            torch.nan_to_num(phi_com,nan=0.0)
            phi_com=phi_com.detach().numpy()
            #print(phi_com)
            #phi_com[np.isnan(phi_com)] = 0
            
            
            
            #scaler = preprocessing.StandardScaler().fit(phi_com)
            #tranformed=scaler.transform(phi_com)
            clf = LogisticRegression(max_iter=300,random_state=0).fit(phi_com, t_com) #
            #clf = LogisticRegression(random_state=0).fit(trans_comp, t_comp)
            #t_comp=torch.cat((xs[:,0],xt[:,0]),0).detach().numpy()
            trans_comp=phi.detach().numpy()
            p=clf.predict_proba(trans_comp)
            #true_prob=[1-t_comp.mean(), t_comp.mean()]
            true_prob=[1-t_batch.mean(), t_batch.mean()]

            model.zero_grad()

            #source_mse=criterion_reg(y0,ys)
            #target_mse=criterion_reg(y1,yt)
            ptx=torch.unsqueeze(torch.from_numpy(p[:,1].astype(np.float32)), dim=1)
            pt=torch.empty(ptx.shape[0],1).fill_(true_prob[1])
            
            sel_loss=torch.sum(torch.abs(ptx-pt))
            #May be there is another way
            combined_loss=(lambda_)*regression_loss(concat_true,concat_pred)+(1-lambda_)*sel_loss # think about tradeoff and other
            # loss function, check in paper
            #print('Training loss: ',combined_loss.item())
            # backward propagation
            combined_loss.backward()
            losss=batch_loss+combined_loss.item()

            # optimize
            optimizer.step()
        #train_loss.append(loss_cal(X_train,y_train,net))
        #val_loss.append(loss_cal(X_val,y_val,net))
        train_loss.append(losss)
        
        # Add prune mechanism
        #trial.report(accuracy, ep)

        #if trial.should_prune():
        #   raise optuna.exceptions.TrialPruned()
            
    #return cal_pehe(X_val,y_val,model),model
    return loss_cal(X_val,y_val,model),model

        
        

In [None]:
pehe_total=[]
for i in range(1,51):
    func = lambda trial: objective(trial, i)
    study = optuna.create_study(direction="minimize", sampler=optuna.samplers.TPESampler(seed=42))
    study.optimize(func, n_trials=10)
    best_trial = study.best_trial
    print('Files completed so far = ',i)
    best_model=TarNet(study.best_trial.params)
    train_loss.clear()
    best_val,model=train_evaluate(study.best_trial.params, best_model, study.best_trial,i)
    data,y=get_data('test',i)
    pehe=cal_pehe(data,y,model)

    pehe_total.append(pehe)


[32m[I 2023-05-19 10:22:28,360][0m A new study created in memory with name: no-name-45a91771-8605-4abd-8f90-a77a8b19a3eb[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 10:22:53,392][0m Trial 0 finished with value: 1191.39697265625 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 1191.39697265625.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 10:24:10,194][0m Trial 1 finished with value: 274.9856262207031 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 274.9856262207031.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[3

Files completed so far =  1


[32m[I 2023-05-19 10:35:12,237][0m A new study created in memory with name: no-name-88de8301-385e-409f-8a86-1eb4c6d4f8ec[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 10:35:52,290][0m Trial 0 finished with value: 965.4296875 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 965.4296875.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 10:37:11,748][0m Trial 1 finished with value: 182.2666473388672 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 182.2666473388672.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-

Files completed so far =  2


[32m[I 2023-05-19 10:47:25,754][0m A new study created in memory with name: no-name-fda686ae-8862-41c7-b809-ce6a20f15e54[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 10:47:47,420][0m Trial 0 finished with value: 867.0318603515625 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 867.0318603515625.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 10:49:08,645][0m Trial 1 finished with value: 197.13330078125 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 197.13330078125.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m

Files completed so far =  3


[32m[I 2023-05-19 10:59:45,391][0m A new study created in memory with name: no-name-3cfb53ee-9d84-4216-a7db-1cef812b791a[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 11:00:24,337][0m Trial 0 finished with value: 514.27734375 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 514.27734375.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 11:01:45,244][0m Trial 1 finished with value: 206.55368041992188 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 206.55368041992188.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2

Files completed so far =  4


[32m[I 2023-05-19 11:12:07,839][0m A new study created in memory with name: no-name-6fb917a2-e11c-46a1-a2e6-8ba97e5c2deb[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 11:12:33,509][0m Trial 0 finished with value: 753.320068359375 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 753.320068359375.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 11:13:54,813][0m Trial 1 finished with value: 188.09725952148438 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 188.09725952148438.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),


Files completed so far =  5


[32m[I 2023-05-19 11:23:47,123][0m A new study created in memory with name: no-name-c3379859-b2cb-4c05-a622-c019888af552[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 11:24:09,592][0m Trial 0 finished with value: 2857.922607421875 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 2857.922607421875.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 11:25:28,637][0m Trial 1 finished with value: 222.5182647705078 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 222.5182647705078.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),


[32m[I 2023-05-19 11:33:50,019][0m Trial 8 finished with value: 249.37734985351562 and parameters: {'learning_rate': 7.712811947156355e-05, 'optimizer': 'Adam', 'batch_size': 185, 'RL11': 394, 'RL21': 294, 'RL32': 399, 'RG012': 261, 'RG022': 275, 'RG112': 228, 'RG122': 28}. Best is trial 5 with value: 205.360595703125.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 11:35:10,861][0m Trial 9 finished with value: 578.18994140625 and parameters: {'learning_rate': 1.2820100418916903e-05, 'optimizer': 'SGD', 'batch_size': 86, 'RL11': 268, 'RL21': 467, 'RL32': 139, 'RG012': 219, 'RG022': 391, 'RG112': 129, 'RG122': 54}. Best is trial 5 with value: 205.360595703125.[0m


Files completed so far =  6


[32m[I 2023-05-19 11:38:57,240][0m A new study created in memory with name: no-name-51c68be9-c119-4271-936e-eeb639f4c1f1[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 11:39:26,844][0m Trial 0 finished with value: 322.67840576171875 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 322.67840576171875.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 11:40:45,986][0m Trial 1 finished with value: 222.6527099609375 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 222.6527099609375.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),

Files completed so far =  7


[32m[I 2023-05-19 11:50:29,659][0m A new study created in memory with name: no-name-d57c6bab-79ec-4fd6-8929-11fac34bcda4[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 11:51:08,293][0m Trial 0 finished with value: 938.3861083984375 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 938.3861083984375.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 11:52:28,252][0m Trial 1 finished with value: 184.46995544433594 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 184.46995544433594.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),

Files completed so far =  8


[32m[I 2023-05-19 12:02:44,202][0m A new study created in memory with name: no-name-a4ed63e5-f3b5-4712-971c-3dd781f43b98[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 12:03:10,658][0m Trial 0 finished with value: 2389.44921875 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 2389.44921875.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 12:04:29,367][0m Trial 1 finished with value: 275.0518798828125 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 275.0518798828125.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2

Files completed so far =  9


[32m[I 2023-05-19 12:15:17,477][0m A new study created in memory with name: no-name-119904d0-475c-46a6-be97-19ea96af5100[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 12:15:44,861][0m Trial 0 finished with value: 3593.77490234375 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 3593.77490234375.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 12:17:10,189][0m Trial 1 finished with value: 285.946044921875 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 285.946044921875.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m

Files completed so far =  10


[32m[I 2023-05-19 12:30:31,296][0m A new study created in memory with name: no-name-eb6a3e8d-0036-4fbe-9809-cde206fe1c07[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 12:31:09,158][0m Trial 0 finished with value: 2780.090087890625 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 2780.090087890625.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 12:32:26,204][0m Trial 1 finished with value: 239.6426544189453 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 239.6426544189453.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Files completed so far =  11


[32m[I 2023-05-19 12:43:30,825][0m A new study created in memory with name: no-name-3b1e943d-168c-4354-a8ae-e6f67f0f6ff4[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 12:44:08,358][0m Trial 0 finished with value: 567.165283203125 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 567.165283203125.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 12:45:29,190][0m Trial 1 finished with value: 196.6753387451172 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 196.6753387451172.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[3

Files completed so far =  12


[32m[I 2023-05-19 12:55:22,919][0m A new study created in memory with name: no-name-250e644f-c4b8-4b79-b73d-fcc6c5961d43[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 12:55:46,729][0m Trial 0 finished with value: 434.30169677734375 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 434.30169677734375.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 12:57:06,091][0m Trial 1 finished with value: 267.4280090332031 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 267.4280090332031.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),

Files completed so far =  13


[32m[I 2023-05-19 13:09:16,892][0m A new study created in memory with name: no-name-ad713977-c98a-4d95-ae3e-2643da08f135[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 13:09:55,571][0m Trial 0 finished with value: 3421.4091796875 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 3421.4091796875.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 13:11:12,638][0m Trial 1 finished with value: 334.1187438964844 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 334.1187438964844.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
[32m[I 2023-05-19 13:19:23,792][0m Trial 8 finished with value: 409.0843505859375 and parameters: {'learning_rate': 7.712811947156355e-05, 'optimizer': 'Adam', 'batch_size': 185, 'RL11': 394, 'RL21': 294, 'RL32': 399, 'RG012': 261, 'RG022': 275, 'R

Files completed so far =  14


[32m[I 2023-05-19 13:24:26,480][0m A new study created in memory with name: no-name-4b8804cd-65f9-4ec3-a868-0334bd5547b3[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 13:24:50,716][0m Trial 0 finished with value: 3744.007080078125 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 3744.007080078125.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 13:26:11,211][0m Trial 1 finished with value: 503.1016845703125 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 503.1016845703125.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),


Files completed so far =  15


[32m[I 2023-05-19 13:39:07,622][0m A new study created in memory with name: no-name-32f97384-3637-4da5-b9f7-9f232d98096c[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 13:39:31,903][0m Trial 0 finished with value: 1746.9688720703125 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 1746.9688720703125.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 13:40:38,953][0m Trial 1 finished with value: 227.77761840820312 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 227.77761840820312.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4

Files completed so far =  16


[32m[I 2023-05-19 13:51:30,965][0m A new study created in memory with name: no-name-2416057a-3272-4317-8d77-0729577257bb[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 13:52:08,287][0m Trial 0 finished with value: 449.24810791015625 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 449.24810791015625.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 13:53:25,978][0m Trial 1 finished with value: 173.05946350097656 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 173.05946350097656.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4

Files completed so far =  17


[32m[I 2023-05-19 14:02:58,468][0m A new study created in memory with name: no-name-45bbbaf7-8590-425c-9948-69b185dde328[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 14:03:23,970][0m Trial 0 finished with value: 187.70980834960938 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 187.70980834960938.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 14:04:43,143][0m Trial 1 finished with value: 179.9090576171875 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 179.9090576171875.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),

Files completed so far =  18


[32m[I 2023-05-19 14:16:36,837][0m A new study created in memory with name: no-name-15440065-cfe2-48a9-8027-93bc03135f66[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 14:17:03,550][0m Trial 0 finished with value: 2482.160888671875 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 2482.160888671875.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 14:18:49,939][0m Trial 1 finished with value: 157.42938232421875 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 157.42938232421875.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
[32m[I 2023-05-19 14:27:08,662][0m Trial 8 finished with value: 281.5582275390625 and parameters: {'learning_rate': 7.712811947156355e-05, 'optimizer': 'Adam', 'batch_size': 185, 'RL11': 394, 'RL21': 294, 'RL32': 399, 'RG012': 261, 'RG022': 275, 'R

Files completed so far =  19


[32m[I 2023-05-19 14:30:03,236][0m A new study created in memory with name: no-name-5a356d66-9605-4a3e-9eda-0a058915935b[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 14:30:27,122][0m Trial 0 finished with value: 2306.45458984375 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 2306.45458984375.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-19 14:31:45,367][0m Trial 1 finished with value: 393.6147766113281 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 393.6147766113281.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[3

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
[32m[I 2023-05-19 14:39:50,480][0m Trial 8 finished with value: 197.34255981445312 and parameters: {'learning_rate': 7.712811947156355e-05, 'optimizer': 'Adam', 'batch_size': 185, 'RL11': 394, 'RL21': 294, 'RL32': 399, 'RG012': 261, 'RG022': 275, '

Files completed so far =  20


In [None]:
print(np.mean(pehe_total))

In [None]:
np.savetxt("v3_Ours_sel_loss_1_50_(IHDPa-Hyper_val_300ep_outsample).csv", pehe_total,delimiter =", ", fmt ='% s')

In [15]:
pehe_total

[0.7473039031028748,
 0.6983595490455627,
 0.9662187099456787,
 0.7418673634529114,
 0.8065203428268433,
 1.1107197999954224,
 0.4231216311454773,
 0.5315221548080444,
 0.8781972527503967,
 1.4119138717651367,
 1.3753641843795776,
 0.5007650256156921,
 0.6108106374740601,
 1.3233189582824707,
 0.8348250389099121,
 0.6897068023681641,
 0.662876546382904,
 0.5743263959884644,
 0.8990809321403503,
 1.0691567659378052,
 0.7262060642242432,
 0.7096218466758728,
 0.6990131139755249,
 0.5855550169944763,
 0.48488813638687134,
 0.9634784460067749,
 0.963718593120575,
 0.7788012027740479,
 1.54386305809021,
 0.6514948010444641,
 0.4629833400249481,
 0.7672489285469055,
 0.6974350810050964,
 0.778927743434906,
 0.6609624624252319,
 0.48041534423828125,
 0.45375707745552063,
 0.48724886775016785,
 1.1568771600723267,
 2.1152191162109375,
 1.7150038480758667,
 0.6617791056632996,
 0.32774782180786133,
 0.9956676959991455,
 0.753425121307373,
 0.7662010192871094,
 0.8731015920639038,
 1.33742964267

In [None]:
#plt.plot(train_loss)

In [29]:
np.mean(pehe_total)

0.9699601031313039

In [44]:
#for key, value in best_trial.params.items():
#    print("{}: {}".format(key, value))

[]

In [None]:
#ate_pred=torch.mean(cate_pred)
#print("Estimated ATE (True is 4):", ate_pred.detach().numpy(),'\n\n')

#print("Individualized CATE Estimates: BLUE")
#print(pd.Series(cate_pred.detach().numpy()).plot.kde(color='blue'))
#print("Individualized CATE True: Green")
#print(pd.Series(cate_true.detach().numpy()).plot.kde(color='green'))

#print("\nError CATE Estimates: RED")
#print(pd.Series(cate_pred.detach().numpy()-cate_true.detach().numpy()).plot.kde(color='red'))