In [134]:
import numpy as np
import pandas as pd
import optuna
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from sklearn.manifold import TSNE
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torch import optim
from geomloss import SamplesLoss
from torch.autograd import Function
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.nn.functional import normalize
#from torchmetrics.classification import BinaryAccuracy
from torchmetrics.classification import BinaryF1Score
torch.manual_seed(0)

<torch._C.Generator at 0x7fb2b3177e90>

In [135]:
class TarNet(nn.Module):
    def __init__(self,params):
        super(TarNet, self).__init__()
        self.encoder1 = nn.Linear(25, params['RL11'])
        self.encoder2 = nn.Linear(params['RL11'], params['RL21'])
        self.encoder3 = nn.Linear(params['RL21'], params['RL32'])

        self.regressor1_y0 = nn.Sequential(
            nn.Linear(params['RL32'], params['RG012']),
            nn.ELU(),
            nn.Dropout(p=.01),
        )
        self.regressor2_y0 = nn.Sequential(
            nn.Linear(params['RG012'], params['RG022']),
            nn.ELU(),
            nn.Dropout(p=.01),
        )
        self.regressorO_y0 = nn.Linear(params['RG022'], 1)

        self.regressor1_y1 = nn.Sequential(
            nn.Linear(params['RL32'], params['RG112']),
            nn.ELU(),
            nn.Dropout(p=.01),
        )
        self.regressor2_y1 = nn.Sequential(
            nn.Linear(params['RG112'], params['RG122']),
            nn.ELU(),
            nn.Dropout(p=.01),
        )
        self.regressorO_y1 = nn.Linear(params['RG122'], 1)


    def forward(self, inputs):
        x = nn.functional.elu(self.encoder1(inputs))
        x = nn.functional.elu(self.encoder2(x))
        phi = nn.functional.elu(self.encoder3(x))

        out_y0 = self.regressor1_y0(phi)
        out_y0 = self.regressor2_y0(out_y0)
        y0 = self.regressorO_y0(out_y0)

        out_y1 = self.regressor1_y1(phi)
        out_y1 = self.regressor2_y1(out_y1)
        y1 = self.regressorO_y1(out_y1)

        concat = torch.cat((y0, y1), 1)
        return concat

In [136]:
def objective(trial,i):

    params = {
          'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),
          'optimizer': trial.suggest_categorical("optimizer", ["Adam", "SGD"]),
          'batch_size':trial.suggest_int('batch_size', 8, 256),
          'RL11':trial.suggest_int('RL11', 16, 512),
          'RL21': trial.suggest_int('RL21', 16, 512),
          'RL32': trial.suggest_int('RL32', 16, 512),
          'RG012':trial.suggest_int('RG012', 16, 512),
        'RG022':trial.suggest_int('RG022', 16, 512),
        'RG112':trial.suggest_int('RG112', 16, 512),
        'RG122':trial.suggest_int('RG122', 16, 512),
          
          }

    model = TarNet(params)

    pehe,model= train_evaluate(params, model, trial,i)

    return pehe

In [137]:
class Data(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X.astype(np.float32))
        self.y = torch.from_numpy(y.astype(np.float32))
        self.len = self.X.shape[0]
       
    def __getitem__(self, index):
        return self.X[index], self.y[index]
   
    def __len__(self):
        return self.len

In [138]:
def get_data(data_type,file_num):

    if(data_type=='train'):
        data=pd.read_csv(f"Dataset/IHDP_a/ihdp_npci_train_{file_num}.csv")
    else:
        data = pd.read_csv(f"Dataset/IHDP_a/ihdp_npci_test_{file_num}.csv")

    x_data=pd.concat([data.iloc[:,0], data.iloc[:, 1:30]], axis = 1)
    #x_data=data.iloc[:, 5:30]
    y_data=data.iloc[:, 1]
    return x_data,y_data

In [139]:
def get_dataloader(x_data,y_data,batch_size):

    x_train_sr=x_data[x_data['treatment']==0]
    y_train_sr=y_data[x_data['treatment']==0]
    x_train_tr=x_data[x_data['treatment']==1]
    y_train_tr=y_data[x_data['treatment']==1]


    train_data_sr = Data(np.array(x_train_sr), np.array(y_train_sr))
    train_dataloader_sr = DataLoader(dataset=train_data_sr, batch_size=batch_size)

    train_data_tr = Data(np.array(x_train_tr), np.array(y_train_tr))
    train_dataloader_tr = DataLoader(dataset=train_data_tr, batch_size=batch_size)


    return train_dataloader_sr, train_dataloader_tr

In [140]:
def regression_loss(concat_true, concat_pred):
    #computes a standard MSE loss for TARNet
    y_true = concat_true[:, 0] #get individual vectors
    t_true = concat_true[:, 1]

    y0_pred = concat_pred[:, 0]
    y1_pred = concat_pred[:, 1]

    #Each head outputs a prediction for both potential outcomes
    #We use t_true as a switch to only calculate the factual loss
    loss0 = torch.sum((1. - t_true) * torch.square(y_true - y0_pred))
    loss1 = torch.sum(t_true * torch.square(y_true - y1_pred))
    #note Shi uses tf.reduce_sum for her losses instead of tf.reduce_mean.
    #They should be equivalent but it's possible that having larger gradients accelerates convergence.
    #You can always try changing it!
    return loss0 + loss1

In [141]:
def cal_pehe(data,y,model):
    #data,y=get_data('test',i)
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    data=data.to_numpy()
    data=torch.from_numpy(data.astype(np.float32)).to(device)



    concat_pred=model(data[:,5:30])
    #dont forget to rescale the outcome before estimation!
    #y0_pred = data['y_scaler'].inverse_transform(concat_pred[:, 0].reshape(-1, 1))
    #y1_pred = data['y_scaler'].inverse_transform(concat_pred[:, 1].reshape(-1, 1))
    cate_pred=concat_pred[:,1]-concat_pred[:,0]
    cate_true=data[:,4]-data[:,3] #Hill's noiseless true values


    cate_err=torch.mean( torch.square( ( (cate_true) - (cate_pred) ) ) )

    return torch.sqrt(cate_err).item()


In [142]:


def loss_cal(X_data,y_data,net,device):
    
    x_train_sr=X_data[X_data['treatment']==0]
    y_train_sr=y_data[X_data['treatment']==0]
    x_train_tr=X_data[X_data['treatment']==1]
    y_train_tr=y_data[X_data['treatment']==1]
    xs_t=x_train_sr.iloc[:,0].to_numpy()
    xt_t=x_train_tr.iloc[:,0].to_numpy()
    
    xs=x_train_sr.iloc[:,5:30].to_numpy()
    xt=x_train_tr.iloc[:,5:30].to_numpy()
    xs_t=torch.from_numpy(xs_t.astype(np.float32))
    xt_t=torch.from_numpy(xt_t.astype(np.float32))
    y_train_sr=y_train_sr.to_numpy()
    y_train_tr=y_train_tr.to_numpy()
    xs=torch.from_numpy(xs.astype(np.float32))
    xt=torch.from_numpy(xt.astype(np.float32))
    
    y_train_sr=torch.from_numpy(y_train_sr.astype(np.float32))
    y_train_tr=torch.from_numpy(y_train_tr.astype(np.float32))
    
    
    input_data=torch.cat((xs,xt),0).to(device)
    true_y=torch.unsqueeze(torch.cat((y_train_sr,y_train_tr),0), dim=1).to(device)
    true_t=torch.unsqueeze(torch.cat((xs_t,xt_t),0), dim=1).to(device)
    
    
    concat_true=torch.cat((true_y,true_t),1)
    concat_pred=net(input_data)
    loss=regression_loss(concat_true, concat_pred)
    loss_2=y_MSE(concat_pred[0],concat_pred[1])
    return loss.item()

def cf_loss(xs,xt):

        col =  ["treatment", "y_factual", "y_cfactual",]
        for i in range(1,28):
            col.append("x"+str(i))

        df_datac=pd.DataFrame(xs.numpy(),columns=col)
        df_datat=pd.DataFrame(xt.numpy(),columns=col)
        
                
        PhiC=xs[:,5:30]
        PhiT=xt[:,5:30]
              
        
        dists = torch.sqrt(torch.cdist(PhiC, PhiT))
        c_index=torch.argmin(dists, dim=0).tolist()
        t_index=torch.argmin(dists, dim=1).tolist()
    
        yT_nn=df_datac.iloc[c_index]['y_factual']
        yC_nn=df_datat.iloc[t_index]['y_factual']
        yT_nn=yT_nn.to_numpy()
        yT_nn=torch.from_numpy(yT_nn.astype(np.float32))
        yC_nn=yC_nn.to_numpy()
        yC_nn=torch.from_numpy(yC_nn.astype(np.float32))
        #y_nn = torch.cat([yT_nn, yC_nn],0) 
        return yC_nn,yT_nn

In [143]:

y_MSE=nn.MSELoss()
#criterion_reg=regression_loss(concat_true,concat_pred)
epochs=300
#batch_size=32

In [144]:
torch.cuda.is_available()

True

In [145]:
train_loss=[]
val_loss=[]
pehe_error=[]
num_files=2
def train_evaluate(param, model, trial,file_num):
    #for nf in range(1,num_files):
    x_data,y_data=get_data('train',file_num)
    X_train, X_val,y_train, y_val = train_test_split(x_data,y_data ,
                                       random_state=42, 
                                       test_size=0.20)
    
    #net=TarNet(25,.01)
    #opt_net = torch.optim.Adam(net.parameters(), lr=1e-4)
    
   
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    print(device)
    optimizer = getattr(optim, param['optimizer'])(model.parameters(), lr= param['learning_rate'])
    
    #if use_cuda:

        #model = model.cuda()
    model = model.to(device)
        #criterion = criterion.cuda()

    for ep in range(1,epochs+1 ):

        train_dataloader_sr, train_dataloader_tr=get_dataloader(X_train,y_train,param['batch_size'])

        for batch_idx, (train_source_data, train_target_data) in enumerate(zip(train_dataloader_sr, train_dataloader_tr)):

            xs,ys=train_source_data
            xt,yt=train_target_data
            yC_nn,yT_nn=cf_loss(xs,xt)
            #print(xs)

            xs_train=xs[:,5:30]
            xt_train=xt[:,5:30]

            train_x=torch.cat((xs_train,xt_train),0).to(device)
            train_y=torch.unsqueeze(torch.cat((ys,yt),0), dim=1).to(device)
            true_t=torch.unsqueeze(torch.cat((xs[:,0],xt[:,0]),0), dim=1).to(device)
            concat_true=torch.cat((train_y,true_t),1).to(device)
            concat_pred=model(train_x).to(device)

            model.zero_grad()

            #source_mse=criterion_reg(y0,ys)
            #target_mse=criterion_reg(y1,yt)

            #combined loss
            combined_loss=regression_loss(concat_true,concat_pred)+(y_MSE(concat_pred[0:xs_train.shape[0],1],yC_nn.to(device)))+(y_MSE(concat_pred[xs_train.shape[0]:,0],yT_nn.to(device)))
            #print('Training loss: ',combined_loss.item())
            # backward propagation
            combined_loss.backward()

            # optimize
            optimizer.step()
        #train_loss.append(loss_cal(X_train,y_train,net))
        #val_loss.append(loss_cal(X_val,y_val,net))
        
        # Add prune mechanism
        #trial.report(accuracy, ep)

        #if trial.should_prune():
        #   raise optuna.exceptions.TrialPruned()
            
    #return cal_pehe(X_val,y_val,model),model
    return loss_cal(X_val,y_val,model,device),model

        
        

In [146]:
pehe_total=[]
for i in range(5,7):
    func = lambda trial: objective(trial, i)
    study = optuna.create_study(direction="minimize", sampler=optuna.samplers.TPESampler(seed=42))
    study.optimize(func, n_trials=50)
    best_trial = study.best_trial
    best_model=TarNet(study.best_trial.params)
    best_val,model=train_evaluate(study.best_trial.params, best_model, study.best_trial,i)
    data,y=get_data('test',i)
    pehe=cal_pehe(data,y,model)

    pehe_total.append(pehe)


[32m[I 2023-05-16 17:15:45,245][0m A new study created in memory with name: no-name-f51b8bac-248c-4764-8e25-9b3f06640cfb[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:15:46,820][0m Trial 0 finished with value: 225.5309600830078 and parameters: {'learning_rate': 5.6115164153345e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 225.5309600830078.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:15:48,971][0m Trial 1 finished with value: 275.78924560546875 and parameters: {'learning_rate': 0.0008706020878304854, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 0 with value: 225.5309600830078.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:15:50,398][0m Trial 2 finished with value: 163.0251922607422 and parameters: {'learning_rate': 3.8396292998041685e-05, 'optimizer': 'SGD', 'batch_size': 203, 'RL11': 115, 'RL21': 271, 'RL32': 310, 'RG012': 39, 'RG022': 317, 'RG112': 100, 'RG122': 48}. Best is trial 2 with value: 163.0251922607422.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:15:52,704][0m Trial 3 finished with value: 297.7108154296875 and parameters: {'learning_rate': 0.000790261954970823, 'optimizer': 'Adam', 'batch_size': 83, 'RL11': 64, 'RL21': 356, 'RL32': 234, 'RG012': 76, 'RG022': 262, 'RG112': 33, 'RG122': 467}. Best is trial 2 with value: 163.0251922607422.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:15:54,396][0m Trial 4 finished with value: 192.56271362304688 and parameters: {'learning_rate': 3.292759134423613e-05, 'optimizer': 'Adam', 'batch_size': 137, 'RL11': 287, 'RL21': 107, 'RL32': 497, 'RG012': 401, 'RG022': 482, 'RG112': 460, 'RG122': 313}. Best is trial 2 with value: 163.0251922607422.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:15:58,969][0m Trial 5 finished with value: 318.8458251953125 and parameters: {'learning_rate': 0.0006978281265126031, 'optimizer': 'SGD', 'batch_size': 19, 'RL11': 177, 'RL21': 209, 'RL32': 150, 'RG012': 427, 'RG022': 193, 'RG112': 155, 'RG122': 285}. Best is trial 2 with value: 163.0251922607422.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:00,830][0m Trial 6 finished with value: 276.48590087890625 and parameters: {'learning_rate': 1.913588048769229e-05, 'optimizer': 'Adam', 'batch_size': 253, 'RL11': 399, 'RL21': 114, 'RL32': 18, 'RG012': 421, 'RG022': 367, 'RG112': 378, 'RG122': 399}. Best is trial 2 with value: 163.0251922607422.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:02,579][0m Trial 7 finished with value: 391.9149475097656 and parameters: {'learning_rate': 1.4063366777718176e-05, 'optimizer': 'Adam', 'batch_size': 222, 'RL11': 325, 'RL21': 180, 'RL32': 47, 'RG012': 170, 'RG022': 177, 'RG112': 378, 'RG122': 332}. Best is trial 2 with value: 163.0251922607422.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:04,226][0m Trial 8 finished with value: 235.14736938476562 and parameters: {'learning_rate': 0.000594874681321977, 'optimizer': 'Adam', 'batch_size': 185, 'RL11': 394, 'RL21': 294, 'RL32': 399, 'RG012': 261, 'RG022': 275, 'RG112': 228, 'RG122': 28}. Best is trial 2 with value: 163.0251922607422.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:06,169][0m Trial 9 finished with value: 184.7571258544922 and parameters: {'learning_rate': 1.6435497475111308e-05, 'optimizer': 'SGD', 'batch_size': 86, 'RL11': 268, 'RL21': 467, 'RL32': 139, 'RG012': 219, 'RG022': 391, 'RG112': 129, 'RG122': 54}. Best is trial 2 with value: 163.0251922607422.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:07,825][0m Trial 10 finished with value: 144.05978393554688 and parameters: {'learning_rate': 0.00010649438604541352, 'optimizer': 'SGD', 'batch_size': 207, 'RL11': 181, 'RL21': 409, 'RL32': 348, 'RG012': 23, 'RG022': 44, 'RG112': 34, 'RG122': 177}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:09,453][0m Trial 11 finished with value: 159.49745178222656 and parameters: {'learning_rate': 0.00011605061325789702, 'optimizer': 'SGD', 'batch_size': 201, 'RL11': 176, 'RL21': 419, 'RL32': 344, 'RG012': 21, 'RG022': 19, 'RG112': 20, 'RG122': 178}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:11,180][0m Trial 12 finished with value: 144.4960174560547 and parameters: {'learning_rate': 0.00014595432633183366, 'optimizer': 'SGD', 'batch_size': 255, 'RL11': 191, 'RL21': 504, 'RL32': 390, 'RG012': 115, 'RG022': 20, 'RG112': 37, 'RG122': 179}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:12,896][0m Trial 13 finished with value: 148.20870971679688 and parameters: {'learning_rate': 0.00013450549446761032, 'optimizer': 'SGD', 'batch_size': 246, 'RL11': 191, 'RL21': 511, 'RL32': 439, 'RG012': 127, 'RG022': 19, 'RG112': 204, 'RG122': 177}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:14,512][0m Trial 14 finished with value: 166.9627685546875 and parameters: {'learning_rate': 0.0002096949254617045, 'optimizer': 'SGD', 'batch_size': 177, 'RL11': 214, 'RL21': 392, 'RL32': 392, 'RG012': 334, 'RG022': 97, 'RG112': 79, 'RG122': 190}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[33m[W 2023-05-16 17:16:16,183][0m Trial 15 failed with parameters: {'learning_rate': 0.0002289712776988392, 'optimizer': 'SGD', 'batch_size': 232, 'RL11': 501, 'RL21': 18, 'RL32': 223, 'RG012': 119, 'RG022': 92, 'RG112': 179, 'RG122': 129} because of the following error: The value nan is not acceptable..[0m
[33m[W 2023-05-16 17:16:16,184][0m Trial 15 failed with value nan.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[33m[W 2023-05-16 17:16:17,850][0m Trial 16 failed with parameters: {'learning_rate': 0.00026914767644166604, 'optimizer': 'SGD', 'batch_size': 230, 'RL11': 25, 'RL21': 504, 'RL32': 485, 'RG012': 113, 'RG022': 86, 'RG112': 178, 'RG122': 126} because of the following error: The value nan is not acceptable..[0m
[33m[W 2023-05-16 17:16:17,851][0m Trial 16 failed with value nan.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[33m[W 2023-05-16 17:16:19,505][0m Trial 17 failed with parameters: {'learning_rate': 0.0002195820843111018, 'optimizer': 'SGD', 'batch_size': 226, 'RL11': 495, 'RL21': 505, 'RL32': 482, 'RG012': 107, 'RG022': 89, 'RG112': 186, 'RG122': 129} because of the following error: The value nan is not acceptable..[0m
[33m[W 2023-05-16 17:16:19,505][0m Trial 17 failed with value nan.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[33m[W 2023-05-16 17:16:21,380][0m Trial 18 failed with parameters: {'learning_rate': 0.0002665481042547526, 'optimizer': 'SGD', 'batch_size': 231, 'RL11': 37, 'RL21': 509, 'RL32': 499, 'RG012': 116, 'RG022': 90, 'RG112': 151, 'RG122': 230} because of the following error: The value nan is not acceptable..[0m
[33m[W 2023-05-16 17:16:21,381][0m Trial 18 failed with value nan.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[33m[W 2023-05-16 17:16:23,133][0m Trial 19 failed with parameters: {'learning_rate': 0.0002478278634130184, 'optimizer': 'SGD', 'batch_size': 230, 'RL11': 20, 'RL21': 512, 'RL32': 492, 'RG012': 114, 'RG022': 90, 'RG112': 186, 'RG122': 134} because of the following error: The value nan is not acceptable..[0m
[33m[W 2023-05-16 17:16:23,134][0m Trial 19 failed with value nan.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[33m[W 2023-05-16 17:16:24,810][0m Trial 20 failed with parameters: {'learning_rate': 0.00023400383905811692, 'optimizer': 'SGD', 'batch_size': 225, 'RL11': 499, 'RL21': 508, 'RL32': 495, 'RG012': 105, 'RG022': 87, 'RG112': 170, 'RG122': 132} because of the following error: The value nan is not acceptable..[0m
[33m[W 2023-05-16 17:16:24,811][0m Trial 20 failed with value nan.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:26,455][0m Trial 21 finished with value: 146.71310424804688 and parameters: {'learning_rate': 7.367344712091265e-05, 'optimizer': 'SGD', 'batch_size': 231, 'RL11': 488, 'RL21': 501, 'RL32': 511, 'RG012': 98, 'RG022': 82, 'RG112': 153, 'RG122': 231}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:28,172][0m Trial 22 finished with value: 163.43223571777344 and parameters: {'learning_rate': 0.0002789359962595687, 'optimizer': 'SGD', 'batch_size': 256, 'RL11': 21, 'RL21': 23, 'RL32': 219, 'RG012': 160, 'RG022': 94, 'RG112': 59, 'RG122': 100}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:29,781][0m Trial 23 finished with value: 162.7947998046875 and parameters: {'learning_rate': 0.00019533062940650593, 'optimizer': 'SGD', 'batch_size': 217, 'RL11': 233, 'RL21': 340, 'RL32': 367, 'RG012': 73, 'RG022': 66, 'RG112': 272, 'RG122': 137}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:31,337][0m Trial 24 finished with value: 197.3776092529297 and parameters: {'learning_rate': 8.174722873444712e-05, 'optimizer': 'SGD', 'batch_size': 120, 'RL11': 150, 'RL21': 439, 'RL32': 457, 'RG012': 342, 'RG022': 120, 'RG112': 484, 'RG122': 222}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[33m[W 2023-05-16 17:16:33,019][0m Trial 25 failed with parameters: {'learning_rate': 0.000358494130231387, 'optimizer': 'SGD', 'batch_size': 175, 'RL11': 345, 'RL21': 374, 'RL32': 317, 'RG012': 148, 'RG022': 233, 'RG112': 178, 'RG122': 128} because of the following error: The value nan is not acceptable..[0m
[33m[W 2023-05-16 17:16:33,020][0m Trial 25 failed with value nan.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[33m[W 2023-05-16 17:16:34,666][0m Trial 26 failed with parameters: {'learning_rate': 0.0002938556570512552, 'optimizer': 'SGD', 'batch_size': 165, 'RL11': 325, 'RL21': 371, 'RL32': 313, 'RG012': 169, 'RG022': 219, 'RG112': 186, 'RG122': 131} because of the following error: The value nan is not acceptable..[0m
[33m[W 2023-05-16 17:16:34,667][0m Trial 26 failed with value nan.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:36,297][0m Trial 27 finished with value: 187.84918212890625 and parameters: {'learning_rate': 0.00035705097909553244, 'optimizer': 'SGD', 'batch_size': 157, 'RL11': 334, 'RL21': 370, 'RL32': 310, 'RG012': 17, 'RG022': 18, 'RG112': 175, 'RG122': 131}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:38,068][0m Trial 28 finished with value: 158.3787841796875 and parameters: {'learning_rate': 0.00012706810749944428, 'optimizer': 'SGD', 'batch_size': 186, 'RL11': 247, 'RL21': 457, 'RL32': 423, 'RG012': 148, 'RG022': 148, 'RG112': 99, 'RG122': 371}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:39,971][0m Trial 29 finished with value: 161.284423828125 and parameters: {'learning_rate': 7.428493869850675e-05, 'optimizer': 'SGD', 'batch_size': 224, 'RL11': 496, 'RL21': 512, 'RL32': 512, 'RG012': 99, 'RG022': 68, 'RG112': 131, 'RG122': 244}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:41,783][0m Trial 30 finished with value: 150.43861389160156 and parameters: {'learning_rate': 6.02133479009758e-05, 'optimizer': 'SGD', 'batch_size': 233, 'RL11': 510, 'RL21': 478, 'RL32': 470, 'RG012': 105, 'RG022': 71, 'RG112': 16, 'RG122': 228}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:43,749][0m Trial 31 finished with value: 164.53355407714844 and parameters: {'learning_rate': 9.696821856699431e-05, 'optimizer': 'SGD', 'batch_size': 232, 'RL11': 446, 'RL21': 419, 'RL32': 347, 'RG012': 498, 'RG022': 233, 'RG112': 69, 'RG122': 269}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:45,571][0m Trial 32 finished with value: 185.86605834960938 and parameters: {'learning_rate': 0.00015954867018455252, 'optimizer': 'SGD', 'batch_size': 206, 'RL11': 307, 'RL21': 319, 'RL32': 417, 'RG012': 64, 'RG022': 123, 'RG112': 194, 'RG122': 133}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:47,332][0m Trial 33 finished with value: 162.53656005859375 and parameters: {'learning_rate': 9.817199773010843e-05, 'optimizer': 'SGD', 'batch_size': 238, 'RL11': 152, 'RL21': 488, 'RL32': 476, 'RG012': 120, 'RG022': 54, 'RG112': 140, 'RG122': 209}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:49,088][0m Trial 34 finished with value: 190.06002807617188 and parameters: {'learning_rate': 0.00016718377454763586, 'optimizer': 'SGD', 'batch_size': 167, 'RL11': 356, 'RL21': 404, 'RL32': 321, 'RG012': 194, 'RG022': 44, 'RG112': 257, 'RG122': 156}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:50,761][0m Trial 35 finished with value: 171.64720153808594 and parameters: {'learning_rate': 4.887255759216168e-05, 'optimizer': 'SGD', 'batch_size': 123, 'RL11': 215, 'RL21': 439, 'RL32': 380, 'RG012': 58, 'RG022': 215, 'RG112': 62, 'RG122': 286}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:52,605][0m Trial 36 finished with value: 154.33045959472656 and parameters: {'learning_rate': 8.948063412375047e-05, 'optimizer': 'SGD', 'batch_size': 202, 'RL11': 144, 'RL21': 381, 'RL32': 277, 'RG012': 308, 'RG022': 131, 'RG112': 111, 'RG122': 104}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:54,294][0m Trial 37 finished with value: 162.5286865234375 and parameters: {'learning_rate': 5.7223484846347835e-05, 'optimizer': 'SGD', 'batch_size': 143, 'RL11': 71, 'RL21': 512, 'RL32': 185, 'RG012': 193, 'RG022': 96, 'RG112': 47, 'RG122': 250}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:56,123][0m Trial 38 finished with value: 147.54518127441406 and parameters: {'learning_rate': 7.187519318371559e-05, 'optimizer': 'SGD', 'batch_size': 243, 'RL11': 466, 'RL21': 458, 'RL32': 441, 'RG012': 84, 'RG022': 498, 'RG112': 167, 'RG122': 334}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:57,997][0m Trial 39 finished with value: 155.69903564453125 and parameters: {'learning_rate': 6.477942869311063e-05, 'optimizer': 'SGD', 'batch_size': 255, 'RL11': 462, 'RL21': 462, 'RL32': 447, 'RG012': 102, 'RG022': 446, 'RG112': 164, 'RG122': 360}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:16:59,818][0m Trial 40 finished with value: 148.98403930664062 and parameters: {'learning_rate': 0.00011260693187092923, 'optimizer': 'SGD', 'batch_size': 213, 'RL11': 451, 'RL21': 435, 'RL32': 487, 'RG012': 46, 'RG022': 500, 'RG112': 275, 'RG122': 314}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:17:01,759][0m Trial 41 finished with value: 154.44464111328125 and parameters: {'learning_rate': 4.506181054974253e-05, 'optimizer': 'SGD', 'batch_size': 236, 'RL11': 401, 'RL21': 482, 'RL32': 405, 'RG012': 80, 'RG022': 388, 'RG112': 332, 'RG122': 446}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:17:03,862][0m Trial 42 finished with value: 148.75193786621094 and parameters: {'learning_rate': 7.540632963321562e-05, 'optimizer': 'Adam', 'batch_size': 194, 'RL11': 484, 'RL21': 489, 'RL32': 433, 'RG012': 131, 'RG022': 443, 'RG112': 90, 'RG122': 195}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:17:05,761][0m Trial 43 finished with value: 156.35504150390625 and parameters: {'learning_rate': 3.694124264658774e-05, 'optimizer': 'SGD', 'batch_size': 224, 'RL11': 119, 'RL21': 327, 'RL32': 351, 'RG012': 27, 'RG022': 330, 'RG112': 232, 'RG122': 437}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:17:07,612][0m Trial 44 finished with value: 171.35833740234375 and parameters: {'learning_rate': 0.0001447184593495064, 'optimizer': 'SGD', 'batch_size': 243, 'RL11': 423, 'RL21': 240, 'RL32': 512, 'RG012': 88, 'RG022': 273, 'RG112': 116, 'RG122': 276}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:17:10,660][0m Trial 45 finished with value: 198.59507751464844 and parameters: {'learning_rate': 3.0885909091845585e-05, 'optimizer': 'Adam', 'batch_size': 100, 'RL11': 278, 'RL21': 446, 'RL32': 286, 'RG012': 46, 'RG022': 42, 'RG112': 38, 'RG122': 155}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:17:15,865][0m Trial 46 finished with value: 176.3148956298828 and parameters: {'learning_rate': 6.986662264417684e-05, 'optimizer': 'SGD', 'batch_size': 25, 'RL11': 372, 'RL21': 411, 'RL32': 462, 'RG012': 249, 'RG022': 171, 'RG112': 313, 'RG122': 315}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:17:18,584][0m Trial 47 finished with value: 169.7120819091797 and parameters: {'learning_rate': 9.563299777591729e-05, 'optimizer': 'Adam', 'batch_size': 57, 'RL11': 430, 'RL21': 360, 'RL32': 247, 'RG012': 182, 'RG022': 334, 'RG112': 201, 'RG122': 351}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:17:20,353][0m Trial 48 finished with value: 172.84349060058594 and parameters: {'learning_rate': 2.5963512662012436e-05, 'optimizer': 'SGD', 'batch_size': 215, 'RL11': 96, 'RL21': 288, 'RL32': 370, 'RG012': 139, 'RG022': 88, 'RG112': 429, 'RG122': 407}. Best is trial 10 with value: 144.05978393554688.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-3),


cuda


[32m[I 2023-05-16 17:17:22,344][0m Trial 49 finished with value: 151.86009216308594 and parameters: {'learning_rate': 0.00011632326671874174, 'optimizer': 'SGD', 'batch_size': 244, 'RL11': 196, 'RL21': 507, 'RL32': 430, 'RG012': 220, 'RG022': 20, 'RG112': 204, 'RG122': 501}. Best is trial 10 with value: 144.05978393554688.[0m


cuda


NameError: name 'use_cuda' is not defined

In [68]:
print(np.mean(pehe_total))

0.7888284027576447


[0.7274773120880127, 0.8501794934272766]

In [14]:
#np.mean(pehe_total[0:99])

In [15]:
pehe_total

[0.7245088219642639,
 1.0319709777832031,
 0.8922997713088989,
 0.766045093536377,
 1.176272988319397,
 1.3779881000518799,
 0.33619603514671326,
 0.9627038240432739,
 0.8635401725769043,
 1.3651361465454102,
 0.9023053050041199,
 0.7707643508911133,
 0.6479784250259399,
 1.3890775442123413,
 1.2930279970169067,
 0.8717934489250183,
 0.5604594945907593,
 0.4141177237033844,
 0.9313862323760986,
 1.3393807411193848,
 0.9354116320610046,
 0.5097612142562866,
 0.8281165957450867,
 0.6784378290176392,
 0.9889419674873352,
 0.8551837205886841,
 0.976718544960022,
 0.8554731607437134,
 1.5037286281585693,
 0.6007119417190552,
 0.5339930653572083,
 0.6747851967811584,
 0.815698504447937,
 0.8607384562492371,
 0.7166107892990112,
 0.6790837049484253,
 0.567247748374939,
 0.45495477318763733,
 1.0859042406082153,
 1.9381632804870605,
 1.7002204656600952,
 0.7606197595596313,
 0.34149885177612305,
 0.8747959733009338,
 0.5832961797714233,
 0.5637949705123901,
 0.8216122984886169,
 1.402480363845

In [16]:
np.savetxt("CFR_y_loss_1_100_(IHDPa-Hyper_val_300ep_outsample).csv", pehe_total,delimiter =", ", fmt ='% s')

In [44]:
#for key, value in best_trial.params.items():
#    print("{}: {}".format(key, value))

[]

In [None]:
#ate_pred=torch.mean(cate_pred)
#print("Estimated ATE (True is 4):", ate_pred.detach().numpy(),'\n\n')

#print("Individualized CATE Estimates: BLUE")
#print(pd.Series(cate_pred.detach().numpy()).plot.kde(color='blue'))
#print("Individualized CATE True: Green")
#print(pd.Series(cate_true.detach().numpy()).plot.kde(color='green'))

#print("\nError CATE Estimates: RED")
#print(pd.Series(cate_pred.detach().numpy()-cate_true.detach().numpy()).plot.kde(color='red'))