In [None]:
#Date: Dec 11, 2024
#Author: Sonal Allana
#Purpose: To add random noise in postmodel. Calibrated noise is added offline. 
#Attribute inference attack is performed on the explanations.

In [1]:
import numpy as np
from numpy import loadtxt
from numpy import savetxt
import tensorflow as tf
import os
import miattack
import xaimetrics

2024-10-29 12:42:50.643400: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
base_folder = "baseline_nn"
    

In [3]:
#Options (1) adult (2) credit (3) compas (4) hospital
dataset_name = "compas"


In [4]:
#Uncomment for integrated gradient
#expl = "integrated_gradients"
#expl_folder = "ig"
#end integrated gradient

#Uncomment for smoothgrad
expl = "smoothgrad"
expl_folder = "sg"
#end smoothgrad

#Uncomment for SHAP
#expl = "shap"
#expl_folder = "shap"
#end shap

#Uncomment for LIME
#expl = "lime"
#expl_folder = "lime"
#end shap

In [5]:
noise_category = "dp" #Options (1) random (2) dp
noise_type = "laplace" #Options (1) laplace (2) gaussian (3) random (not applicable for noise_category = dp)


In [7]:
for i in range(2,3):
    #Load train-test files
    basepath = "../models/{0}/{1}/iter{2}/".format(base_folder,dataset_name,i)
    fmodel = "model_wodp_iter{0}.keras".format(i)
    model = tf.keras.models.load_model(basepath + fmodel,compile=False)
    model.summary()

    X_adv_train = loadtxt(basepath + expl_folder + '/X_adv_train.csv',delimiter=',')
    X_adv_test = loadtxt(basepath + expl_folder + '/X_adv_test.csv',delimiter=',')
    Y_adv_train = loadtxt(basepath + expl_folder + '/Y_adv_train.csv',delimiter=',')
    Y_adv_test = loadtxt(basepath + expl_folder + '/Y_adv_test.csv',delimiter=',')
    Z_adv_train = loadtxt(basepath + expl_folder + '/Z_adv_train.csv',delimiter=',')
    Z_adv_test = loadtxt(basepath + expl_folder + '/Z_adv_test.csv',delimiter=',')
    attributions_train = loadtxt(basepath + expl_folder + '/attributions_train.csv',delimiter=',')
    attributions_test = loadtxt(basepath + expl_folder + '/attributions_test.csv',delimiter=',')

    if noise_category == "random":  
        #generate random noise
        if noise_type == "laplace":
            b = np.random.randint(0,1000,1)
            noisetrain = np.random.laplace(0,b, np.shape(attributions_train))
            noisetest = np.random.laplace(0,b, np.shape(attributions_test))        
        elif noise_type == "gaussian":   
            sd = np.random.randint(1000,2000,1)
            noisetrain = np.random.normal(0,sd, np.shape(attributions_train))
            noisetest = np.random.normal(0,sd, np.shape(attributions_test))
        elif noise_type == "random":
            print("random generation")
            noisetrain = np.random.random(size = np.shape(attributions_train))
            noisetest = np.random.random(size = np.shape(attributions_test))
        #adding random noise to attributions
        attributions_train = attributions_train + noisetrain
        attributions_test = attributions_test + noisetest

        savetxt(basepath + expl_folder + '/attrs_train_rand_rand_{0}.csv'.format(noise_type),attributions_train,delimiter=',')
        savetxt(basepath + expl_folder + '/attrs_test_rand_rand_{0}.csv'.format(noise_type),attributions_test,delimiter=',')

    else:
        print("DP noise evaluations")
        attributions_train = loadtxt(basepath + expl_folder + '/attrs_train_dp_{0}.csv'.format(noise_type),delimiter=',')
        attributions_test = loadtxt(basepath + expl_folder + '/attrs_test_dp_{0}.csv'.format(noise_type),delimiter=',')
        print("attributions_train: ", np.shape(attributions_train))
        print("attributions_test: ",np.shape(attributions_test))
    
    print("Iteration ",i,":")

    #Attack the first sensitive attribute
    Z_adv_train_s1 = Z_adv_train[:,0]
    Z_adv_test_s1 = Z_adv_test[:,0]
    modinv_obj1 = miattack.miattack_explanations(attributions_train,attributions_test,Z_adv_train_s1,Z_adv_test_s1)
    modinv_obj1.printMetrics()

    #Attack the second sensitive attribute
    Z_adv_train_s2 = Z_adv_train[:,1]
    Z_adv_test_s2 = Z_adv_test[:,1]
    modinv_obj2 = miattack.miattack_explanations(attributions_train,attributions_test,Z_adv_train_s2,Z_adv_test_s2)
    modinv_obj2.printMetrics()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_layer (InputLayer)    [(None, 10)]              0         
                                                                 
 dense1 (Dense)              (None, 40)                440       
                                                                 
 dense2 (Dense)              (None, 40)                1640      
                                                                 
 dense_1 (Dense)             (None, 1)                 41        
                                                                 
Total params: 2121 (8.29 KB)
Trainable params: 2121 (8.29 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
DP noise evaluations
attributions_train:  (1140, 10)
attributions_test:  (1140, 10)
Iteration  2 :
Precision: 0.7951
Recall: 0.7056
F1-Score: 0.7477
FPR: 0.777