In [50]:
import numpy as np
import pandas as pd
import optuna
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from sklearn.manifold import TSNE
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torch import optim
from geomloss import SamplesLoss
from torch.autograd import Function
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.nn.functional import normalize
#from torchmetrics.classification import BinaryAccuracy
from torchmetrics.classification import BinaryF1Score
torch.manual_seed(0)

<torch._C.Generator at 0x7f8367c892d0>

In [51]:
class TarNet(nn.Module):
    def __init__(self,params):
        super(TarNet, self).__init__()
        self.encoder1 = nn.Linear(25, params['RL11'])
        self.encoder2 = nn.Linear(params['RL11'], params['RL21'])
        self.encoder3 = nn.Linear(params['RL21'], params['RL32'])

        self.regressor1_y0 = nn.Sequential(
            nn.Linear(params['RL32'], params['RG012']),
            nn.ELU(),
            nn.Dropout(p=.01),
        )
        self.regressor2_y0 = nn.Sequential(
            nn.Linear(params['RG012'], params['RG022']),
            nn.ELU(),
            nn.Dropout(p=.01),
        )
        self.regressorO_y0 = nn.Linear(params['RG022'], 1)

        self.regressor1_y1 = nn.Sequential(
            nn.Linear(params['RL32'], params['RG112']),
            nn.ELU(),
            nn.Dropout(p=.01),
        )
        self.regressor2_y1 = nn.Sequential(
            nn.Linear(params['RG112'], params['RG122']),
            nn.ELU(),
            nn.Dropout(p=.01),
        )
        self.regressorO_y1 = nn.Linear(params['RG122'], 1)


    def forward(self, inputs):
        x = nn.functional.elu(self.encoder1(inputs))
        x = nn.functional.elu(self.encoder2(x))
        phi = nn.functional.elu(self.encoder3(x))

        out_y0 = self.regressor1_y0(phi)
        out_y0 = self.regressor2_y0(out_y0)
        y0 = self.regressorO_y0(out_y0)

        out_y1 = self.regressor1_y1(phi)
        out_y1 = self.regressor2_y1(out_y1)
        y1 = self.regressorO_y1(out_y1)

        concat = torch.cat((y0, y1), 1)
        return concat

In [52]:
def objective(trial,i):

    params = {
          'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-1),
          'optimizer': trial.suggest_categorical("optimizer", ["Adam", "SGD"]),
          'batch_size':trial.suggest_int('batch_size', 8, 256),
          'RL11':trial.suggest_int('RL11', 16, 512),
          'RL21': trial.suggest_int('RL21', 16, 512),
          'RL32': trial.suggest_int('RL32', 16, 512),
          'RG012':trial.suggest_int('RG012', 16, 512),
        'RG022':trial.suggest_int('RG022', 16, 512),
        'RG112':trial.suggest_int('RG112', 16, 512),
        'RG122':trial.suggest_int('RG122', 16, 512),
          
          }

    model = TarNet(params)

    pehe,model= train_evaluate(params, model, trial,i)

    return pehe

In [53]:
class Data(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X.astype(np.float32))
        self.y = torch.from_numpy(y.astype(np.float32))
        self.len = self.X.shape[0]
       
    def __getitem__(self, index):
        return self.X[index], self.y[index]
   
    def __len__(self):
        return self.len

In [54]:
def get_data(data_type,file_num):

    if(data_type=='train'):
        data=pd.read_csv(f"Dataset/IHDP_a/ihdp_npci_train_{file_num}.csv")
    else:
        data = pd.read_csv(f"Dataset/IHDP_a/ihdp_npci_test_{file_num}.csv")

    x_data=pd.concat([data.iloc[:,0], data.iloc[:, 1:30]], axis = 1)
    #x_data=data.iloc[:, 5:30]
    y_data=data.iloc[:, 1]
    return x_data,y_data

In [55]:
def get_dataloader(x_data,y_data,batch_size):

    x_train_sr=x_data[x_data['treatment']==0]
    y_train_sr=y_data[x_data['treatment']==0]
    x_train_tr=x_data[x_data['treatment']==1]
    y_train_tr=y_data[x_data['treatment']==1]


    train_data_sr = Data(np.array(x_train_sr), np.array(y_train_sr))
    train_dataloader_sr = DataLoader(dataset=train_data_sr, batch_size=batch_size)

    train_data_tr = Data(np.array(x_train_tr), np.array(y_train_tr))
    train_dataloader_tr = DataLoader(dataset=train_data_tr, batch_size=batch_size)


    return train_dataloader_sr, train_dataloader_tr

In [56]:
def regression_loss(concat_true, concat_pred):
    #computes a standard MSE loss for TARNet
    y_true = concat_true[:, 0] #get individual vectors
    t_true = concat_true[:, 1]

    y0_pred = concat_pred[:, 0]
    y1_pred = concat_pred[:, 1]

    #Each head outputs a prediction for both potential outcomes
    #We use t_true as a switch to only calculate the factual loss
    loss0 = torch.sum((1. - t_true) * torch.square(y_true - y0_pred))
    loss1 = torch.sum(t_true * torch.square(y_true - y1_pred))
    #note Shi uses tf.reduce_sum for her losses instead of tf.reduce_mean.
    #They should be equivalent but it's possible that having larger gradients accelerates convergence.
    #You can always try changing it!
    return loss0 + loss1

In [57]:
def cal_pehe(data,y,model):
    #data,y=get_data('test',i)

    data=data.to_numpy()
    data=torch.from_numpy(data.astype(np.float32))



    concat_pred=model(data[:,5:30])
    #dont forget to rescale the outcome before estimation!
    #y0_pred = data['y_scaler'].inverse_transform(concat_pred[:, 0].reshape(-1, 1))
    #y1_pred = data['y_scaler'].inverse_transform(concat_pred[:, 1].reshape(-1, 1))
    cate_pred=concat_pred[:,1]-concat_pred[:,0]
    cate_true=data[:,4]-data[:,3] #Hill's noiseless true values


    cate_err=torch.mean( torch.square( ( (cate_true) - (cate_pred) ) ) )

    return torch.sqrt(cate_err).item()


In [58]:
def loss_cal(X_data,y_data,net):
    
    x_train_sr=X_data[X_data['treatment']==0]
    y_train_sr=y_data[X_data['treatment']==0]
    x_train_tr=X_data[X_data['treatment']==1]
    y_train_tr=y_data[X_data['treatment']==1]
    xs_t=x_train_sr.iloc[:,0].to_numpy()
    xt_t=x_train_tr.iloc[:,0].to_numpy()
    
    xs=x_train_sr.iloc[:,5:30].to_numpy()
    xt=x_train_tr.iloc[:,5:30].to_numpy()
    xs_t=torch.from_numpy(xs_t.astype(np.float32))
    xt_t=torch.from_numpy(xt_t.astype(np.float32))
    y_train_sr=y_train_sr.to_numpy()
    y_train_tr=y_train_tr.to_numpy()
    xs=torch.from_numpy(xs.astype(np.float32))
    xt=torch.from_numpy(xt.astype(np.float32))
    
    y_train_sr=torch.from_numpy(y_train_sr.astype(np.float32))
    y_train_tr=torch.from_numpy(y_train_tr.astype(np.float32))
    
    
    input_data=torch.cat((xs,xt),0)
    true_y=torch.unsqueeze(torch.cat((y_train_sr,y_train_tr),0), dim=1)
    true_t=torch.unsqueeze(torch.cat((xs_t,xt_t),0), dim=1)
    
    
    concat_true=torch.cat((true_y,true_t),1)
    concat_pred=net(input_data)
    loss=regression_loss(concat_true, concat_pred)
    
    return loss.item()

    

In [59]:

#criterion_reg=nn.MSELoss()
#criterion_reg=regression_loss(concat_true,concat_pred)
epochs=300
#batch_size=32

In [60]:
train_loss=[]
val_loss=[]
pehe_error=[]
num_files=2
def train_evaluate(param, model, trial,file_num):
    #for nf in range(1,num_files):
    x_data,y_data=get_data('train',file_num)
    X_train, X_val,y_train, y_val = train_test_split(x_data,y_data ,
                                       random_state=42, 
                                       test_size=0.20)
    
    #net=TarNet(25,.01)
    #opt_net = torch.optim.Adam(net.parameters(), lr=1e-4)
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    
    optimizer = getattr(optim, param['optimizer'])(model.parameters(), lr= param['learning_rate'])
    
    if use_cuda:

        #model = model.cuda()
        model = model
        #criterion = criterion.cuda()

    for ep in range(1,epochs+1 ):

        train_dataloader_sr, train_dataloader_tr=get_dataloader(X_train,y_train,param['batch_size'])

        for batch_idx, (train_source_data, train_target_data) in enumerate(zip(train_dataloader_sr, train_dataloader_tr)):

            xs,ys=train_source_data
            xt,yt=train_target_data

            xs_train=xs[:,5:30]
            xt_train=xt[:,5:30]

            train_x=torch.cat((xs_train,xt_train),0)
            train_y=torch.unsqueeze(torch.cat((ys,yt),0), dim=1)
            true_t=torch.unsqueeze(torch.cat((xs[:,0],xt[:,0]),0), dim=1)
            concat_true=torch.cat((train_y,true_t),1)
            concat_pred=model(train_x)

            model.zero_grad()

            #source_mse=criterion_reg(y0,ys)
            #target_mse=criterion_reg(y1,yt)

            #combined loss
            combined_loss=regression_loss(concat_true,concat_pred)
            #print('Training loss: ',combined_loss.item())
            # backward propagation
            combined_loss.backward()

            # optimize
            optimizer.step()
        #train_loss.append(loss_cal(X_train,y_train,net))
        #val_loss.append(loss_cal(X_val,y_val,net))
        
        # Add prune mechanism
        #trial.report(accuracy, ep)

        #if trial.should_prune():
        #   raise optuna.exceptions.TrialPruned()
            
    return cal_pehe(X_val,y_val,model),model

        
        

In [61]:
pehe_total=[]
for i in range(1,101):
    func = lambda trial: objective(trial, i)
    study = optuna.create_study(direction="minimize", sampler=optuna.samplers.TPESampler())
    study.optimize(func, n_trials=30)
    best_trial = study.best_trial
    best_model=TarNet(study.best_trial.params)
    best_val,model=train_evaluate(study.best_trial.params, best_model, study.best_trial,i)
    data,y=get_data('test',i)
    pehe=cal_pehe(data,y,model)

    pehe_total.append(pehe)


[32m[I 2023-02-28 09:49:25,567][0m A new study created in memory with name: no-name-5402d137-0434-44d4-a2e7-dc961460991e[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-1),
[32m[I 2023-02-28 09:49:27,220][0m Trial 0 finished with value: 0.9412173628807068 and parameters: {'learning_rate': 9.929025330113483e-05, 'optimizer': 'SGD', 'batch_size': 226, 'RL11': 371, 'RL21': 196, 'RL32': 91, 'RG012': 326, 'RG022': 461, 'RG112': 32, 'RG122': 435}. Best is trial 0 with value: 0.9412173628807068.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-1),
[32m[I 2023-02-28 09:49:31,146][0m Trial 1 finished with value: 0.9045663475990295 and parameters: {'learning_rate': 7.530251425944107e-05, 'optimizer': 'SGD', 'batch_size': 13, 'RL11': 485, 'RL21': 176, 'RL32': 353, 'RG012': 420, 'RG022': 35, 'RG112': 447, 'RG122': 114}. Best is trial 1 with value: 0.9045663475990295.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-1

[33m[W 2023-02-28 09:49:58,548][0m Trial 12 failed with parameters: {'learning_rate': 0.0032661967682390738, 'optimizer': 'SGD', 'batch_size': 170, 'RL11': 370, 'RL21': 198, 'RL32': 470, 'RG012': 381, 'RG022': 60, 'RG112': 309, 'RG122': 215} because of the following error: The value nan is not acceptable..[0m
[33m[W 2023-02-28 09:49:58,549][0m Trial 12 failed with value nan.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-1),
[32m[I 2023-02-28 09:50:01,727][0m Trial 13 finished with value: 42.24101638793945 and parameters: {'learning_rate': 0.07984699173956449, 'optimizer': 'Adam', 'batch_size': 194, 'RL11': 423, 'RL21': 168, 'RL32': 414, 'RG012': 435, 'RG022': 230, 'RG112': 310, 'RG122': 63}. Best is trial 6 with value: 0.728326141834259.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-1),
[33m[W 2023-02-28 09:50:04,120][0m Trial 14 failed with parameters: {'learning_rate': 0.0006060172874334604, 'optimizer': 'SGD', 'batch_s

[33m[W 2023-02-28 09:50:23,540][0m Trial 24 failed with parameters: {'learning_rate': 0.0005865765148533919, 'optimizer': 'SGD', 'batch_size': 95, 'RL11': 25, 'RL21': 345, 'RL32': 173, 'RG012': 149, 'RG022': 499, 'RG112': 295, 'RG122': 323} because of the following error: The value nan is not acceptable..[0m
[33m[W 2023-02-28 09:50:23,541][0m Trial 24 failed with value nan.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-1),
[33m[W 2023-02-28 09:50:25,246][0m Trial 25 failed with parameters: {'learning_rate': 0.0007904542976131076, 'optimizer': 'SGD', 'batch_size': 105, 'RL11': 82, 'RL21': 16, 'RL32': 175, 'RG012': 312, 'RG022': 460, 'RG112': 293, 'RG122': 19} because of the following error: The value nan is not acceptable..[0m
[33m[W 2023-02-28 09:50:25,247][0m Trial 25 failed with value nan.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-1),
[33m[W 2023-02-28 09:50:27,352][0m Trial 26 failed with parameters: {'learning_

[33m[W 2023-02-28 09:50:50,595][0m Trial 6 failed with parameters: {'learning_rate': 0.034008976125369524, 'optimizer': 'SGD', 'batch_size': 28, 'RL11': 37, 'RL21': 345, 'RL32': 375, 'RG012': 275, 'RG022': 475, 'RG112': 249, 'RG122': 504} because of the following error: The value nan is not acceptable..[0m
[33m[W 2023-02-28 09:50:50,596][0m Trial 6 failed with value nan.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-1),
[33m[W 2023-02-28 09:50:53,231][0m Trial 7 failed with parameters: {'learning_rate': 0.037750369982401256, 'optimizer': 'SGD', 'batch_size': 252, 'RL11': 169, 'RL21': 439, 'RL32': 436, 'RG012': 392, 'RG022': 478, 'RG112': 90, 'RG122': 399} because of the following error: The value nan is not acceptable..[0m
[33m[W 2023-02-28 09:50:53,232][0m Trial 7 failed with value nan.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-1),
[33m[W 2023-02-28 09:50:55,719][0m Trial 8 failed with parameters: {'learning_rate'

  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-1),
[32m[I 2023-02-28 09:51:21,699][0m Trial 19 finished with value: 1.087802767753601 and parameters: {'learning_rate': 1.0077577279875214e-05, 'optimizer': 'SGD', 'batch_size': 83, 'RL11': 353, 'RL21': 404, 'RL32': 157, 'RG012': 201, 'RG022': 285, 'RG112': 314, 'RG122': 485}. Best is trial 13 with value: 0.6204575300216675.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-1),
[33m[W 2023-02-28 09:51:23,657][0m Trial 20 failed with parameters: {'learning_rate': 0.0006765829958730669, 'optimizer': 'SGD', 'batch_size': 48, 'RL11': 291, 'RL21': 430, 'RL32': 321, 'RG012': 322, 'RG022': 159, 'RG112': 177, 'RG122': 93} because of the following error: The value nan is not acceptable..[0m
[33m[W 2023-02-28 09:51:23,658][0m Trial 20 failed with value nan.[0m
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-1),
[33m[W 2023-02-28 09:51:25,407][0m Trial 21 failed with para

In [62]:
pehe_total

[0.5950204730033875, 0.6529917120933533]

In [44]:
pehe_total
#for key, value in best_trial.params.items():
#    print("{}: {}".format(key, value))

[]

In [None]:
#ate_pred=torch.mean(cate_pred)
#print("Estimated ATE (True is 4):", ate_pred.detach().numpy(),'\n\n')

#print("Individualized CATE Estimates: BLUE")
#print(pd.Series(cate_pred.detach().numpy()).plot.kde(color='blue'))
#print("Individualized CATE True: Green")
#print(pd.Series(cate_true.detach().numpy()).plot.kde(color='green'))

#print("\nError CATE Estimates: RED")
#print(pd.Series(cate_pred.detach().numpy()-cate_true.detach().numpy()).plot.kde(color='red'))