In [48]:
import tensorflow as tf
import numpy as np #numpy is the numerical computing package in python
import datetime 

In [49]:
from sklearn.preprocessing import StandardScaler
#!wget -nc http://www.fredjo.com/files/ihdp_npci_1-100.train.npz
#!wget -nc http://www.fredjo.com/files/ihdp_npci_1-100.test.npz 
def load_IHDP_data(training_data,testing_data,i=1):
    with open(training_data,'rb') as trf, open(testing_data,'rb') as tef:
        train_data=np.load(trf); test_data=np.load(tef)
        y=np.concatenate(   (train_data['yf'][:,i],   test_data['yf'][:,i])).astype('float32') #most GPUs only compute 32-bit floats
        t=np.concatenate(   (train_data['t'][:,i],    test_data['t'][:,i])).astype('float32')
        x=np.concatenate(   (train_data['x'][:,:,i],  test_data['x'][:,:,i]),axis=0).astype('float32')
        mu_0=np.concatenate((train_data['mu0'][:,i],  test_data['mu0'][:,i])).astype('float32')
        mu_1=np.concatenate((train_data['mu1'][:,i],  test_data['mu1'][:,i])).astype('float32')

        data={'x':x,'t':t,'y':y,'t':t,'mu_0':mu_0,'mu_1':mu_1}
        data['t']=data['t'].reshape(-1,1) #we're just padding one dimensional vectors with an additional dimension 
        data['y']=data['y'].reshape(-1,1)
        
        #rescaling y between 0 and 1 often makes training of DL regressors easier
        data['y_scaler'] = StandardScaler().fit(data['y'])
        data['ys'] = data['y_scaler'].transform(data['y'])

    return data

def load_IHDP_data_n(training_data,testing_data,i,dt):
    
    if(dt=='train'):
        
        with open(training_data,'rb') as trf, open(testing_data,'rb') as tef:
            train_data=np.load(trf); test_data=np.load(tef)
            y=train_data['yf'][:,i].astype('float32') #most GPUs only compute 32-bit floats
            t=train_data['t'][:,i].astype('float32')
            x=train_data['x'][:,:,i].astype('float32')
            mu_0=np.concatenate((train_data['mu0'][:,i],  test_data['mu0'][:,i])).astype('float32')
            mu_1=np.concatenate((train_data['mu1'][:,i],  test_data['mu1'][:,i])).astype('float32')

            data={'x':x,'t':t,'y':y,'t':t,'mu_0':mu_0,'mu_1':mu_1}
            data['t']=data['t'].reshape(-1,1) #we're just padding one dimensional vectors with an additional dimension 
            data['y']=data['y'].reshape(-1,1)

            #rescaling y between 0 and 1 often makes training of DL regressors easier
            data['y_scaler'] = StandardScaler().fit(data['y'])
            data['ys'] = data['y_scaler'].transform(data['y'])
    else:
           
        with open(testing_data,'rb') as tef:
            test_data=np.load(tef)
            y=  test_data['yf'][:,i].astype('float32') #most GPUs only compute 32-bit floats
            t= test_data['t'][:,i].astype('float32')
            x= test_data['x'][:,:,i].astype('float32')
            mu_0= test_data['mu0'][:,i].astype('float32')
            mu_1= test_data['mu1'][:,i].astype('float32')

            data={'x':x,'t':t,'y':y,'t':t,'mu_0':mu_0,'mu_1':mu_1}
            data['t']=data['t'].reshape(-1,1) #we're just padding one dimensional vectors with an additional dimension 
            data['y']=data['y'].reshape(-1,1)

            #rescaling y between 0 and 1 often makes training of DL regressors easier
            data['y_scaler'] = StandardScaler().fit(data['y'])
            data['ys'] = data['y_scaler'].transform(data['y'])

    return data


def cal_pehe(i,model):

    data=load_IHDP_data_n('./ihdp_npci_1-100.train.npz','./ihdp_npci_1-100.test.npz',i,'test')

    concat_pred=model.predict(data['x'])
    #dont forget to rescale the outcome before estimation!
    y0_pred = data['y_scaler'].inverse_transform(concat_pred[:, 0].reshape(-1, 1))
    y1_pred = data['y_scaler'].inverse_transform(concat_pred[:, 1].reshape(-1, 1))



    cate_pred=y1_pred-y0_pred
    cate_true=data['mu_1']-data['mu_0'] #Hill's noiseless true values
    #ate_pred=tf.reduce_mean(cate_pred)
    #print("Estimated ATE (True is 4):", ate_pred.numpy(),'\n\n')

    #print("Individualized CATE Estimates: BLUE")
    #print(pd.Series(cate_pred.squeeze()).plot.kde(color='blue'))

    #print("Individualized CATE True: Green")
    #print(pd.Series(cate_true.squeeze()).plot.kde(color='green'))

    #print("\nError CATE Estimates: RED")
    #print(pd.Series(cate_pred.squeeze()-cate_true.squeeze()).plot.kde(color='red'))


    cate_err=tf.reduce_mean( tf.square( ( (data['mu_1']-data['mu_0']) - (y1_pred-y0_pred) ) ) )

    return tf.math.sqrt(cate_err)

In [50]:

def pdist2sq(x,y):
    x2 = tf.reduce_sum(x ** 2, axis=-1, keepdims=True)
    y2 = tf.reduce_sum(y ** 2, axis=-1, keepdims=True)
    dist = x2 + tf.transpose(y2, (1, 0)) - 2. * x @ tf.transpose(y, (1, 0))
    return dist

from tensorflow.keras.losses import Loss

class CFRNet_Loss(Loss):
  #initialize instance attributes
    def __init__(self, alpha=1.,sigma=1.):
        super().__init__()
        self.alpha = alpha # balances regression loss and MMD IPM
        self.rbf_sigma=sigma #for gaussian kernel
        self.name='cfrnet_loss'
      
    def split_pred(self,concat_pred):
      #generic helper to make sure we dont make mistakes
        preds={}
        preds['y0_pred'] = concat_pred[:, 0]
        preds['y1_pred'] = concat_pred[:, 1]
        preds['phi'] = concat_pred[:, 2:]
        return preds

    def rbf_kernel(self, x, y):
        return tf.exp(-pdist2sq(x,y)/tf.square(self.rbf_sigma))

    def calc_mmdsq(self, Phi, t):
        Phic, Phit =tf.dynamic_partition(Phi,tf.cast(tf.squeeze(t),tf.int32),2)

        Kcc = self.rbf_kernel(Phic,Phic)
        Kct = self.rbf_kernel(Phic,Phit)
        Ktt = self.rbf_kernel(Phit,Phit)

        m = tf.cast(tf.shape(Phic)[0],Phi.dtype)
        n = tf.cast(tf.shape(Phit)[0],Phi.dtype)

        mmd = 1.0/(m*(m-1.0))*(tf.reduce_sum(Kcc))
        mmd = mmd + 1.0/(n*(n-1.0))*(tf.reduce_sum(Ktt))
        mmd = mmd - 2.0/(m*n)*tf.reduce_sum(Kct)
        return mmd * tf.ones_like(t)

    def mmdsq_loss(self, concat_true,concat_pred):
        t_true = concat_true[:, 1]
        p=self.split_pred(concat_pred)
        mmdsq_loss = tf.reduce_mean(self.calc_mmdsq(p['phi'],t_true))
        return mmdsq_loss

    def regression_loss(self,concat_true,concat_pred):
        y_true = concat_true[:, 0]
        t_true = concat_true[:, 1]
        p = self.split_pred(concat_pred)
        loss0 = tf.reduce_mean((1. - t_true) * tf.square(y_true - p['y0_pred']))
        loss1 = tf.reduce_mean(t_true * tf.square(y_true - p['y1_pred']))
        return loss0+loss1

    def cfr_loss(self,concat_true,concat_pred):
        lossR = self.regression_loss(concat_true,concat_pred)
        lossIPM = self.mmdsq_loss(concat_true,concat_pred)
        return lossR + self.alpha * lossIPM

      #return lossR + self.alpha * lossIPM

  #compute loss
    def call(self, concat_true, concat_pred):        
        return self.cfr_loss(concat_true,concat_pred)
     


In [51]:
from tensorflow.keras.callbacks import Callback
#https://towardsdatascience.com/implementing-macro-f1-score-in-keras-what-not-to-do-e9f1aa04029d
class Base_Metrics(Callback):
    def __init__(self,data, verbose=0):   
        super(Base_Metrics, self).__init__()
        self.data=data #feed the callback the full dataset
        self.verbose=verbose

        #needed for PEHEnn; Called in self.find_ynn
        self.data['o_idx']=tf.range(self.data['t'].shape[0])
        self.data['c_idx']=self.data['o_idx'][self.data['t'].squeeze()==0] #These are the indices of the control units
        self.data['t_idx']=self.data['o_idx'][self.data['t'].squeeze()==1] #These are the indices of the treated units
    
    def split_pred(self,concat_pred):
        preds={}
        preds['y0_pred'] = self.data['y_scaler'].inverse_transform(concat_pred[:, 0].reshape(-1, 1))
        preds['y1_pred'] = self.data['y_scaler'].inverse_transform(concat_pred[:, 1].reshape(-1, 1))
        preds['phi'] = concat_pred[:, 2:]
        return preds

    def find_ynn(self, Phi):
        #helper for PEHEnn
        PhiC, PhiT =tf.dynamic_partition(Phi,tf.cast(tf.squeeze(self.data['t']),tf.int32),2) #separate control and treated reps
        dists=tf.sqrt(pdist2sq(PhiC,PhiT)) #calculate squared distance then sqrt to get euclidean
        yT_nn_idx=tf.gather(self.data['c_idx'],tf.argmin(dists,axis=0),1) #get c_idxs of smallest distances for treated units
        yC_nn_idx=tf.gather(self.data['t_idx'],tf.argmin(dists,axis=1),1) #get t_idxs of smallest distances for control units
        yT_nn=tf.gather(self.data['y'],yT_nn_idx,1) #now use these to retrieve y values
        yC_nn=tf.gather(self.data['y'],yC_nn_idx,1)
        y_nn=tf.dynamic_stitch([self.data['t_idx'],self.data['c_idx']],[yT_nn,yC_nn]) #stitch em back up!
        return y_nn

    def PEHEnn(self,concat_pred):
        p = self.split_pred(concat_pred)
        y_nn = self.find_ynn(p['phi']) #now its 3 plus because 
        cate_nn_err=tf.reduce_mean( tf.square( (1-2*self.data['t']) * (y_nn-self.data['y']) - (p['y1_pred']-p['y0_pred']) ) )
        return cate_nn_err

    def ATE(self,concat_pred):
        p = self.split_pred(concat_pred)
        return p['y1_pred']-p['y0_pred']

    def PEHE(self,concat_pred):
        #simulation only
        p = self.split_pred(concat_pred)
        cate_err=tf.reduce_mean( tf.square( ( (self.data['mu_1']-self.data['mu_0']) - (p['y1_pred']-p['y0_pred']) ) ) )
        return cate_err 

    def on_epoch_end(self, epoch, logs={}):
        concat_pred=self.model.predict(self.data['x'])
        #Calculate Empirical Metrics        
        ate_pred=tf.reduce_mean(self.ATE(concat_pred)); tf.summary.scalar('ate', data=ate_pred, step=epoch)
        pehe_nn=self.PEHEnn(concat_pred); tf.summary.scalar('cate_nn_err', data=tf.sqrt(pehe_nn), step=epoch)
        
        #Simulation Metrics
        ate_true=tf.reduce_mean(self.data['mu_1']-self.data['mu_0'])
        ate_err=tf.abs(ate_true-ate_pred); tf.summary.scalar('ate_err', data=ate_err, step=epoch)
        pehe =self.PEHE(concat_pred); tf.summary.scalar('cate_err', data=tf.sqrt(pehe), step=epoch)
        out_str=f' — ate_err: {ate_err:.4f}  — cate_err: {tf.sqrt(pehe):.4f} — cate_nn_err: {tf.sqrt(pehe_nn):.4f} '
        
        if self.verbose > 0: print(out_str)

In [52]:
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Concatenate
from tensorflow.keras import regularizers
from tensorflow.keras import Model
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.metrics import binary_accuracy
from tensorflow.keras.losses import Loss

def make_tarnet(input_dim, reg_l2):

    x = Input(shape=(input_dim,), name='input')

    # representation
    phi = Dense(units=25, activation='elu', kernel_initializer='RandomNormal',name='phi_1')(x)
    phi = Dense(units=25, activation='elu', kernel_initializer='RandomNormal',name='phi_2')(phi)

    # HYPOTHESIS
    y0_hidden = Dense(units=25, activation='elu', kernel_regularizer=regularizers.l2(reg_l2),name='y0_hidden_1')(phi)
    y1_hidden = Dense(units=25, activation='elu', kernel_regularizer=regularizers.l2(reg_l2),name='y1_hidden_1')(phi)

    # second layer
    y0_hidden = Dense(units=25, activation='elu', kernel_regularizer=regularizers.l2(reg_l2),name='y0_hidden_2')(y0_hidden)
    y1_hidden = Dense(units=25, activation='elu', kernel_regularizer=regularizers.l2(reg_l2),name='y1_hidden_2')(y1_hidden)

    # third
    y0_predictions = Dense(units=1, activation=None, kernel_regularizer=regularizers.l2(reg_l2), name='y0_predictions')(y0_hidden)
    y1_predictions = Dense(units=1, activation=None, kernel_regularizer=regularizers.l2(reg_l2), name='y1_predictions')(y1_hidden)

    concat_pred = Concatenate(1)([y0_predictions, y1_predictions,phi])
    model = Model(inputs=x, outputs=concat_pred)

    return model

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, ReduceLROnPlateau, TerminateOnNaN
from tensorflow.keras.optimizers import SGD, Adam
#Colab command to allow us to run Colab in TF2
%load_ext tensorboard 
pehe_loss=[]
val_split=0.2
batch_size=100
verbose=1
i = 0
tf.random.set_seed(i)
np.random.seed(i)


# Clear any logs from previous runs
!rm -rf ./logs/ 
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
file_writer = tf.summary.create_file_writer(log_dir + "/metrics")
file_writer.set_as_default()
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0)

#let's try ADAM this time
adam_callbacks = [
        TerminateOnNaN(),
        EarlyStopping(monitor='val_loss', patience=2, min_delta=0.),
        ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5, verbose=verbose, mode='auto',
                          min_delta=1e-8, cooldown=0, min_lr=0),
        tensorboard_callback,
        Base_Metrics(data,verbose=verbose)
    ]



cfrnet_loss=CFRNet_Loss(alpha=1.0)


for i in range(1,101):
   
    cfrnet_model=make_tarnet(data['x'].shape[1],.01)
    data=load_IHDP_data_n('./ihdp_npci_1-100.train.npz','./ihdp_npci_1-100.test.npz',i,'train')
    yt = np.concatenate([data['ys'], data['t']], 1)

    cfrnet_model.compile(optimizer=Adam(learning_rate=1e-4),
                          loss=cfrnet_loss,
                     metrics=[cfrnet_loss,cfrnet_loss.regression_loss,cfrnet_loss.mmdsq_loss])

    cfrnet_model.fit(x=data['x'],y=yt,
                     callbacks=adam_callbacks,
                      validation_split=val_split,
                      epochs=500,
                      batch_size=batch_size,
                      verbose=verbose)
    
    pehe_loss.append(cal_pehe(i,cfrnet_model))


In [None]:
print(pehe_loss)

In [None]:
print(np.mean(pehe_loss))

In [None]:
#import pandas as pd
#concat_pred=cfrnet_model.predict(data['x'])
#dont forget to rescale the outcome before estimation!
#y0_pred = data['y_scaler'].inverse_transform(concat_pred[:, 0].reshape(-1, 1))
#y1_pred = data['y_scaler'].inverse_transform(concat_pred[:, 1].reshape(-1, 1))



#cate_pred=y1_pred-y0_pred
#cate_true=data['mu_1']-data['mu_0'] #Hill's noiseless true values
#ate_pred=tf.reduce_mean(cate_pred)
#print("Estimated ATE (True is 4):", ate_pred.numpy(),'\n\n')

#print("Individualized CATE Estimates: BLUE")
#print(pd.Series(cate_pred.squeeze()).plot.kde(color='blue'))

#print("Individualized CATE True: Green")
#print(pd.Series(cate_true.squeeze()).plot.kde(color='green'))

#print("\nError CATE Estimates: RED")
#print(pd.Series(cate_pred.squeeze()-cate_true.squeeze()).plot.kde(color='red'))

     
#cate_err=tf.reduce_mean( tf.square( ( (data['mu_1']-data['mu_0']) - (y1_pred-y0_pred) ) ) )

#print('pehe on training:',tf.math.sqrt(cate_err))