In [1]:
import numpy as np
import pandas as pd
import optuna
from sklearn import preprocessing
#import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from sklearn.manifold import TSNE
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torch import optim
#from geomloss import SamplesLoss
from torch.autograd import Function
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.nn.functional import normalize
#from torchmetrics.classification import BinaryAccuracy
#from torchmetrics.classification import BinaryF1Score
from sklearn.linear_model import LogisticRegression
torch.manual_seed(0)

  from .autonotebook import tqdm as notebook_tqdm


<torch._C.Generator at 0x7f307fa72a50>

In [2]:
class TarNet(nn.Module):
    def __init__(self,params):
        super(TarNet, self).__init__()
        self.encoder1 = nn.Linear(25, params['RL11'])
        self.encoder2 = nn.Linear(params['RL11'], params['RL21'])
        self.encoder3 = nn.Linear(params['RL21'], params['RL32'])

        self.regressor1_y0 = nn.Sequential(
            nn.Linear(params['RL32'], params['RG012']),
            nn.ELU(),
            nn.Dropout(p=.01),
        )
        self.regressor2_y0 = nn.Sequential(
            nn.Linear(params['RG012'], params['RG022']),
            nn.ELU(),
            nn.Dropout(p=.01),
        )
        self.regressorO_y0 = nn.Linear(params['RG022'], 1)

        self.regressor1_y1 = nn.Sequential(
            nn.Linear(params['RL32'], params['RG112']),
            nn.ELU(),
            nn.Dropout(p=.01),
        )
        self.regressor2_y1 = nn.Sequential(
            nn.Linear(params['RG112'], params['RG122']),
            nn.ELU(),
            nn.Dropout(p=.01),
        )
        self.regressorO_y1 = nn.Linear(params['RG122'], 1)


    def forward(self, inputs):
        x = nn.functional.elu(self.encoder1(inputs))
        x = nn.functional.elu(self.encoder2(x))
        phi = nn.functional.elu(self.encoder3(x))

        out_y0 = self.regressor1_y0(phi)
        out_y0 = self.regressor2_y0(out_y0)
        y0 = self.regressorO_y0(out_y0)

        out_y1 = self.regressor1_y1(phi)
        out_y1 = self.regressor2_y1(out_y1)
        y1 = self.regressorO_y1(out_y1)

        concat = torch.cat((y0, y1), 1)
        return concat,phi

In [3]:
def objective(trial,i):

    params = {
          'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
          'optimizer': trial.suggest_categorical("optimizer", ["Adam", "SGD"]),
          'batch_size':trial.suggest_int('batch_size', 8, 256),
          'RL11':trial.suggest_int('RL11', 16, 512),
          'RL21': trial.suggest_int('RL21', 16, 512),
          'RL32': trial.suggest_int('RL32', 16, 512),
          'RG012':trial.suggest_int('RG012', 16, 512),
        'RG022':trial.suggest_int('RG022', 16, 512),
        'RG112':trial.suggest_int('RG112', 16, 512),
        'RG122':trial.suggest_int('RG122', 16, 512),
          
          }

    model = TarNet(params)

    pehe,model= train_evaluate(params, model, trial,i)

    return pehe

In [4]:
class Data(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X.astype(np.float32))
        self.y = torch.from_numpy(y.astype(np.float32))
        self.len = self.X.shape[0]
       
    def __getitem__(self, index):
        return self.X[index], self.y[index]
   
    def __len__(self):
        return self.len

In [5]:
def get_data(data_type,file_num):

    if(data_type=='train'):
        data=pd.read_csv(f"Dataset/IHDP_a/ihdp_npci_train_{file_num}.csv")
    else:
        data = pd.read_csv(f"Dataset/IHDP_a/ihdp_npci_test_{file_num}.csv")

    x_data=pd.concat([data.iloc[:,0], data.iloc[:, 1:30]], axis = 1)
    #x_data=data.iloc[:, 5:30]
    y_data=data.iloc[:, 1]
    return x_data,y_data

In [6]:
def get_dataloader(x_data,y_data,batch_size):

    x_train_sr=x_data[x_data['treatment']==0]
    y_train_sr=y_data[x_data['treatment']==0]
    x_train_tr=x_data[x_data['treatment']==1]
    y_train_tr=y_data[x_data['treatment']==1]


    train_data_sr = Data(np.array(x_train_sr), np.array(y_train_sr))
    train_dataloader_sr = DataLoader(dataset=train_data_sr, batch_size=batch_size)

    train_data_tr = Data(np.array(x_train_tr), np.array(y_train_tr))
    train_dataloader_tr = DataLoader(dataset=train_data_tr, batch_size=batch_size)


    return train_dataloader_sr, train_dataloader_tr

In [7]:
def regression_loss(concat_true, concat_pred):
    #computes a standard MSE loss for TARNet
    y_true = concat_true[:, 0] #get individual vectors
    t_true = concat_true[:, 1]

    y0_pred = concat_pred[:, 0]
    y1_pred = concat_pred[:, 1]

    #Each head outputs a prediction for both potential outcomes
    #We use t_true as a switch to only calculate the factual loss
    loss0 = torch.sum((1. - t_true) * torch.square(y_true - y0_pred))
    loss1 = torch.sum(t_true * torch.square(y_true - y1_pred))
    #note Shi uses tf.reduce_sum for her losses instead of tf.reduce_mean.
    #They should be equivalent but it's possible that having larger gradients accelerates convergence.
    #You can always try changing it!
    return loss0 + loss1

In [8]:
def cal_pehe(data,y,model):
    #data,y=get_data('test',i)

    data=data.to_numpy()
    data=torch.from_numpy(data.astype(np.float32))



    concat_pred,phi=model(data[:,5:30])
    #dont forget to rescale the outcome before estimation!
    #y0_pred = data['y_scaler'].inverse_transform(concat_pred[:, 0].reshape(-1, 1))
    #y1_pred = data['y_scaler'].inverse_transform(concat_pred[:, 1].reshape(-1, 1))
    cate_pred=concat_pred[:,1]-concat_pred[:,0]
    cate_true=data[:,4]-data[:,3] #Hill's noiseless true values


    cate_err=torch.mean( torch.square( ( (cate_true) - (cate_pred) ) ) )

    return torch.sqrt(cate_err).item()


In [9]:
def loss_cal(X_data,y_data,net):
    
    x_train_sr=X_data[X_data['treatment']==0]
    y_train_sr=y_data[X_data['treatment']==0]
    x_train_tr=X_data[X_data['treatment']==1]
    y_train_tr=y_data[X_data['treatment']==1]
    xs_t=x_train_sr.iloc[:,0].to_numpy()
    xt_t=x_train_tr.iloc[:,0].to_numpy()
    
    xs=x_train_sr.iloc[:,5:30].to_numpy()
    xt=x_train_tr.iloc[:,5:30].to_numpy()
    xs_t=torch.from_numpy(xs_t.astype(np.float32))
    xt_t=torch.from_numpy(xt_t.astype(np.float32))
    y_train_sr=y_train_sr.to_numpy()
    y_train_tr=y_train_tr.to_numpy()
    xs=torch.from_numpy(xs.astype(np.float32))
    xt=torch.from_numpy(xt.astype(np.float32))
    
    y_train_sr=torch.from_numpy(y_train_sr.astype(np.float32))
    y_train_tr=torch.from_numpy(y_train_tr.astype(np.float32))
    
    
    input_data=torch.cat((xs,xt),0)
    true_y=torch.unsqueeze(torch.cat((y_train_sr,y_train_tr),0), dim=1)
    true_t=torch.unsqueeze(torch.cat((xs_t,xt_t),0), dim=1)
    
    
    concat_true=torch.cat((true_y,true_t),1)
    concat_pred,phi=net(input_data)
    loss=regression_loss(concat_true, concat_pred)
    
    return loss.item()

    

In [10]:
y_MSE=nn.MSELoss()
#criterion_reg=nn.MSELoss()
#criterion_reg=regression_loss(concat_true,concat_pred)
epochs=300
#batch_size=32

In [11]:
train_loss=[]
val_loss=[]
pehe_error=[]
batch_loss=0
num_files=2
def train_evaluate(param, model, trial,file_num):
    #for nf in range(1,num_files):
    x_data,y_data=get_data('train',file_num)
    X_train, X_val,y_train, y_val = train_test_split(x_data,y_data ,
                                       random_state=42, 
                                       test_size=0.20)
    
    #net=TarNet(25,.01)
    #opt_net = torch.optim.Adam(net.parameters(), lr=1e-4)
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    
    optimizer = getattr(optim, param['optimizer'])(model.parameters(), lr= param['learning_rate'])
    
   
        #model = model.cuda()
    #model = model
        #criterion = criterion.cuda()

    for ep in range(1,epochs+1 ):
        #print('epoch',ep)
        batch_loss=0

        train_dataloader_sr, train_dataloader_tr=get_dataloader(X_train,y_train,param['batch_size'])

        for batch_idx, (train_source_data, train_target_data) in enumerate(zip(train_dataloader_sr, train_dataloader_tr)):

            xs,ys=train_source_data
            xt,yt=train_target_data

            xs_train=xs[:,5:30]
            xt_train=xt[:,5:30]

            train_x=torch.cat((xs_train,xt_train),0)
            train_y=torch.unsqueeze(torch.cat((ys,yt),0), dim=1)
            true_t=torch.unsqueeze(torch.cat((xs[:,0],xt[:,0]),0), dim=1)
            concat_true=torch.cat((train_y,true_t),1)
            concat_pred,phi=model(train_x)
            
            
            # p(t|x)
            t_batch=torch.cat((xs[:,0],xt[:,0]),0).numpy()
            #t_com=t_batch
            t_com=X_train.iloc[:,0].to_numpy()
            x_data_com=X_train.iloc[:,5:30].to_numpy()
            x_data_com = torch.from_numpy(x_data_com.astype(np.float32))
            concat_pred_com,phi_com=model(x_data_com)
            
            torch.nan_to_num(phi_com,nan=0.0)
            phi_com=phi_com.detach().numpy()
            #print(phi_com)
            #phi_com[np.isnan(phi_com)] = 0
            
            
            
            #scaler = preprocessing.StandardScaler().fit(phi_com)
            #tranformed=scaler.transform(phi_com)
            clf = LogisticRegression(max_iter=300,random_state=0).fit(phi_com, t_com) #
            #clf = LogisticRegression(random_state=0).fit(trans_comp, t_comp)
            #t_comp=torch.cat((xs[:,0],xt[:,0]),0).detach().numpy()
            trans_comp=phi.detach().numpy()
            p=clf.predict_proba(trans_comp)
            #true_prob=[1-t_comp.mean(), t_comp.mean()]
            true_prob=[1-t_batch.mean(), t_batch.mean()]

            model.zero_grad()

            #source_mse=criterion_reg(y0,ys)
            #target_mse=criterion_reg(y1,yt)
            ptx=torch.unsqueeze(torch.from_numpy(p[:,1].astype(np.float32)), dim=1)
            pt=torch.empty(ptx.shape[0],1).fill_(true_prob[1])
            
            sel_loss=torch.sum(torch.abs(ptx-pt))
            #May be there is another way
            combined_loss=regression_loss(concat_true,concat_pred)+sel_loss # think about tradeoff and other
            # loss function, check in paper
            #print('Training loss: ',combined_loss.item())
            # backward propagation
            combined_loss.backward()
            losss=batch_loss+combined_loss.item()

            # optimize
            optimizer.step()
        #train_loss.append(loss_cal(X_train,y_train,net))
        #val_loss.append(loss_cal(X_val,y_val,net))
        train_loss.append(losss)
        
        # Add prune mechanism
        #trial.report(accuracy, ep)

        #if trial.should_prune():
        #   raise optuna.exceptions.TrialPruned()
            
    #return cal_pehe(X_val,y_val,model),model
    return loss_cal(X_val,y_val,model),model

        
        

In [None]:
pehe_total=[]
for i in range(1,51):
    func = lambda trial: objective(trial, i)
    study = optuna.create_study(direction="minimize", sampler=optuna.samplers.TPESampler(seed=42))
    study.optimize(func, n_trials=10)
    best_trial = study.best_trial
    print('Files completed so far = ',i)
    best_model=TarNet(study.best_trial.params)
    train_loss.clear()
    best_val,model=train_evaluate(study.best_trial.params, best_model, study.best_trial,i)
    data,y=get_data('test',i)
    pehe=cal_pehe(data,y,model)

    pehe_total.append(pehe)


[32m[I 2023-05-18 16:53:52,197][0m A new study created in memory with name: no-name-8bec25a5-e723-4e8e-ad89-e23efeae174d[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-18 16:54:16,387][0m Trial 0 finished with value: 1191.39697265625 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 1191.39697265625.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-18 16:55:37,690][0m Trial 1 finished with value: 274.985595703125 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 274.985595703125.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m

Files completed so far =  1


[32m[I 2023-05-18 17:06:49,491][0m A new study created in memory with name: no-name-53fd931b-c298-4970-ac4a-68a399ea1e21[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-18 17:07:27,081][0m Trial 0 finished with value: 965.4296875 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 965.4296875.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-18 17:08:44,722][0m Trial 1 finished with value: 182.2666473388672 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 182.2666473388672.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-

Files completed so far =  2


[32m[I 2023-05-18 17:18:32,214][0m A new study created in memory with name: no-name-e398f940-a1cb-4b76-8bbe-ebbe2ca09824[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-18 17:18:58,549][0m Trial 0 finished with value: 867.0318603515625 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 867.0318603515625.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-18 17:20:20,217][0m Trial 1 finished with value: 197.13330078125 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 197.13330078125.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m

Files completed so far =  3


[32m[I 2023-05-18 17:31:01,689][0m A new study created in memory with name: no-name-3b9db501-55ce-4e5e-af1f-a8ec9d6b0e9a[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-18 17:31:38,522][0m Trial 0 finished with value: 514.27734375 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 514.27734375.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-18 17:32:54,098][0m Trial 1 finished with value: 206.55368041992188 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 206.55368041992188.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2

Files completed so far =  4


[32m[I 2023-05-18 17:43:24,693][0m A new study created in memory with name: no-name-d4bc5987-7062-4d0f-b005-728e2043bd1f[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-18 17:43:50,347][0m Trial 0 finished with value: 693.5117797851562 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 693.5117797851562.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-18 17:45:09,816][0m Trial 1 finished with value: 186.9933319091797 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 186.9933319091797.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),


Files completed so far =  5


[32m[I 2023-05-18 17:55:18,341][0m A new study created in memory with name: no-name-102472a9-b393-4c1f-b298-02dcfd586da0[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-18 17:55:42,952][0m Trial 0 finished with value: 2611.76220703125 and parameters: {'learning_rate': 2.368863950364079e-05, 'optimizer': 'Adam', 'batch_size': 157, 'RL11': 93, 'RL21': 93, 'RL32': 44, 'RG012': 446, 'RG022': 314, 'RG112': 367, 'RG122': 26}. Best is trial 0 with value: 2611.76220703125.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),
[32m[I 2023-05-18 17:56:59,099][0m Trial 1 finished with value: 224.34677124023438 and parameters: {'learning_rate': 9.330606024425662e-05, 'optimizer': 'Adam', 'batch_size': 53, 'RL11': 107, 'RL21': 167, 'RL32': 276, 'RG012': 230, 'RG022': 160, 'RG112': 320, 'RG122': 85}. Best is trial 1 with value: 224.34677124023438.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-4),


In [None]:
print(np.mean(pehe_total))

In [None]:
np.savetxt("v2_Ours_sel_loss_1_50_(IHDPa-Hyper_val_300ep_outsample).csv", pehe_total,delimiter =", ", fmt ='% s')

In [None]:
pehe_total

In [None]:
plt.plot(train_loss)

In [29]:
np.mean(pehe_total[0:99])

0.9699601031313039

In [27]:
pehe_total

[0.7243529558181763,
 1.0974527597427368,
 0.9394102096557617,
 0.5717839002609253,
 0.9096927046775818,
 1.1487631797790527,
 0.3492894768714905,
 0.7218011021614075,
 0.8568695187568665,
 1.5668202638626099,
 0.9571043252944946,
 0.8009145259857178,
 0.6833164691925049,
 1.4453110694885254,
 0.8261359930038452,
 0.8569546341896057,
 0.5389509201049805,
 0.44863349199295044,
 0.9471801519393921,
 1.5991759300231934,
 0.6929744482040405,
 0.6652252674102783,
 0.784331738948822,
 0.6690612435340881,
 0.6766694188117981,
 0.9668810367584229,
 0.9014957547187805,
 0.7697464823722839,
 1.324318528175354,
 0.621450662612915,
 0.37054741382598877,
 0.6356532573699951,
 0.7808274626731873,
 0.6976386308670044,
 0.6815195679664612,
 0.5998408198356628,
 0.4749812185764313,
 0.3917040228843689,
 1.1139411926269531,
 1.7644257545471191,
 1.743551254272461,
 0.5670009255409241,
 0.36650827527046204,
 0.9405661225318909,
 0.6831498742103577,
 0.5027104616165161,
 0.9520105719566345,
 1.12562572956

In [44]:
#for key, value in best_trial.params.items():
#    print("{}: {}".format(key, value))

[]

In [None]:
#ate_pred=torch.mean(cate_pred)
#print("Estimated ATE (True is 4):", ate_pred.detach().numpy(),'\n\n')

#print("Individualized CATE Estimates: BLUE")
#print(pd.Series(cate_pred.detach().numpy()).plot.kde(color='blue'))
#print("Individualized CATE True: Green")
#print(pd.Series(cate_true.detach().numpy()).plot.kde(color='green'))

#print("\nError CATE Estimates: RED")
#print(pd.Series(cate_pred.detach().numpy()-cate_true.detach().numpy()).plot.kde(color='red'))