In [None]:
#Date: Dec 11, 2024
#Author: Sonal Allana
#Purpose: To generate LIME explanations from the models

In [None]:
import numpy as np
from numpy import argmax
from numpy import loadtxt
from numpy import savetxt
import tensorflow as tf
from sklearn.model_selection import train_test_split
import time
import lime
import lime.lime_tabular 
import re
import os
import miattack

In [None]:
#Options (1) baseline_nn (2) dp_nn (3) syn_nn
base_folder = "dp_nn"

if base_folder == "dp_nn":
    #Options (1) nm4000 (2) nm500 (3) nm66 (4) nm15
    nm_folder = "nm15"   #set the correct noise multiplier
elif base_folder == "syn_nn":
    #Options (1) ctgan (2) gausscopula (3) tvae 
    syndataType = "tvae"

In [None]:
#Options (1) adult (2) credit (3) compas (4) hospital
dataset_name = "adult"

In [None]:
#for shap explanations
expl = "lime"
folder = "lime"

In [None]:
def updExpArr(i,inst_exp,attr_arr):
    for (featureid,weight) in inst_exp:
        attr_arr[i,featureid] = weight
    return attr_arr
        

In [None]:
def initExpArr():
    [r,c] = np.shape(X_adv_train) 
    attributions_train = np.zeros([r,c],np.float64) #create a blank array of explanations for training set
    
    [r,c] = np.shape(X_adv_test)
    attributions_test = np.zeros([r,c],np.float64) #create a blank array of explanations for test set
    return attributions_train, attributions_test
    

In [None]:
target_names = [0,1]
for i in range(3,6):
        #Load model and train-test files
    if base_folder == "dp_nn": #account for noise multiplier folder
        basepath = "../models/{0}/{1}/{2}/iter{3}/".format(base_folder,dataset_name,nm_folder,i)
        fmodel = "model_dp_iter{0}.keras".format(i)
    elif base_folder == "syn_nn": #account for syn type folder
        basepath = "../models/{0}/{1}/{2}/iter{3}/".format(base_folder,dataset_name,syndataType,i)
        fmodel = "model_wodp_iter{0}.keras".format(i)
    else:
        basepath = "../models/{0}/{1}/iter{2}/".format(base_folder,dataset_name, i)
        fmodel = "model_wodp_iter{0}.keras".format(i)
    model = tf.keras.models.load_model(basepath + fmodel,compile=False)
    model.summary()
    X_train = loadtxt(basepath + 'X_train.csv',delimiter=',')
    X_test = loadtxt(basepath + 'X_test.csv',delimiter=',')
    Y_train = loadtxt(basepath + 'Y_train.csv',delimiter=',')
    Y_test = loadtxt(basepath + 'Y_test.csv',delimiter=',')
    Z_train = loadtxt(basepath + 'Z_train.csv',delimiter=',')
    Z_test = loadtxt(basepath + 'Z_test.csv',delimiter=',')
    
    #50% of the test set is used for training and remaining 50% for testing the attack model
    (X_adv_train, X_adv_test, Y_adv_train, Y_adv_test, Z_adv_train, Z_adv_test)  = train_test_split(X_test, Y_test, Z_test, test_size=0.5, random_state=1337)

    input_train = np.array(X_adv_train)
    input_test = np.array(X_adv_test)
    
    explainer = lime.lime_tabular.LimeTabularExplainer(input_train,  
                                                   class_names=target_names, mode="classification",verbose=False) #discretize_continuous=True    
    attributions_train, attributions_test = initExpArr()
    
    for j in range(0,len(X_adv_train)):
        print(j)
        exp = explainer.explain_instance(X_adv_train[j], model.predict,num_features=X_adv_train.shape[1],labels=(0,)) 
        exp_map = exp.as_map()
        inst_exp = exp_map[0]
        updExpArr(j,inst_exp,attributions_train)
    print(np.shape(attributions_train))
    
    for j in range(0,len(X_adv_test)):
        print(j)
        exp = explainer.explain_instance(X_adv_test[j], model.predict,num_features=X_adv_test.shape[1],labels=(0,)) 
        exp_map = exp.as_map()
        inst_exp = exp_map[0]
        updExpArr(j,inst_exp,attributions_test)
    print(np.shape(attributions_test))
    
    if not os.path.exists(basepath + folder):
        os.mkdir(basepath + folder)
    
    savetxt(basepath + folder + '/attributions_train.csv',attributions_train,delimiter=',')
    savetxt(basepath + folder + '/attributions_test.csv',attributions_test,delimiter=',')
    savetxt(basepath + folder + '/X_adv_train.csv',X_adv_train,delimiter=',')
    savetxt(basepath + folder + '/X_adv_test.csv',X_adv_test,delimiter=',')
    savetxt(basepath + folder + '/Y_adv_train.csv',Y_adv_train,delimiter=',')
    savetxt(basepath + folder + '/Y_adv_test.csv',Y_adv_test,delimiter=',')
    savetxt(basepath + folder + '/Z_adv_train.csv',Z_adv_train,delimiter=',')
    savetxt(basepath + folder + '/Z_adv_test.csv',Z_adv_test,delimiter=',')
    
    print("Iteration ",i,":")
    #Attack the first sensitive attribute
    print("Model inversion on sensitive attribute 1: ")
    Z_adv_train_s1 = Z_adv_train[:,0]
    Z_adv_test_s1 = Z_adv_test[:,0]
    modinv_obj1 = miattack.miattack_explanations(attributions_train,attributions_test,Z_adv_train_s1,Z_adv_test_s1)
    modinv_obj1.printMetrics()
    
    #Attack the second sensitive attribute
    print("\nModel inversion on sensitive attribute 2: ")
    Z_adv_train_s2 = Z_adv_train[:,1]
    Z_adv_test_s2 = Z_adv_test[:,1]
    modinv_obj2 = miattack.miattack_explanations(attributions_train,attributions_test,Z_adv_train_s2,Z_adv_test_s2)
    modinv_obj2.printMetrics()
    

In [None]:
#run following for loading and running the attack for any specific iteration
#i = 2
#if base_folder == "dp_nn": #account for noise multiplier folder
#    basepath = "../models/{0}/{1}/{2}/iter{3}/".format(base_folder,dataset_name,nm_folder,i)
#elif base_folder == "syn_nn": #account for syn type folder
#        basepath = "../models/{0}/{1}/{2}/iter{3}/".format(base_folder,dataset_name,syndataType,i)
#        fmodel = "model_wodp_iter{0}.keras".format(i)
#else:
#    basepath = "../models/{0}/{1}/iter{2}/".format(base_folder,dataset_name, i)
#print(basepath)
#attributions_train = loadtxt(basepath + folder + '/attributions_train.csv',delimiter=',')
#attributions_test = loadtxt(basepath + folder + '/attributions_test.csv',delimiter=',')
#Z_adv_train = loadtxt(basepath + folder + '/Z_adv_train.csv',delimiter=',')
#Z_adv_test = loadtxt(basepath + folder + '/Z_adv_test.csv',delimiter=',')

#print("Iteration ",i,":")
##Attack the first sensitive attribute
#Z_adv_train_s1 = Z_adv_train[:,0]
#Z_adv_test_s1 = Z_adv_test[:,0]
#modinv_obj1 = miattack.miattack_explanations(attributions_train,attributions_test,Z_adv_train_s1,Z_adv_test_s1)
#modinv_obj1.printMetrics()

##Attack the second sensitive attribute
#Z_adv_train_s2 = Z_adv_train[:,1]
#Z_adv_test_s2 = Z_adv_test[:,1]
#modinv_obj2 = miattack.miattack_explanations(attributions_train,attributions_test,Z_adv_train_s2,Z_adv_test_s2)
#modinv_obj2.printMetrics()