In [1]:

def logit_function(l):
    ### we need to setup some bounds for probability (or loss) for numerical stability
    eps = 1e-5
    l = np.clip(l,a_min=eps,a_max=1-eps)
    #print (l)
    return np.log(l/(1-l))
    
import math
def normpdf(x, mean, sd):
    var = float(sd)**2
    denom = (2*math.pi*var)**.5
    num = math.exp(-(float(x)-float(mean))**2/(2*var))
    return num/denom

def get_blackbox_auc_lira(all_pred,all_train_index, all_valid_index, all_class_label, fpr_threshold=0.001):
	## prepare membership label and all probablities
	num_instance = len(all_train_index[0]) + len(all_valid_index[0])
	
	all_metric_result = []
	all_labels = []
	
	for i in range(num_instance):
		# prepare this instances label
		# prepare this instances pred
		#print (f"instance index {i}")
		this_instance_pred = []
		this_instance_label = []
		this_instance_index = []
		this_instance_prob = []
		for j in range(len(all_train_index)):
			if (i in all_train_index[j]):
				this_instance_label.append(1)
				this_instance_index.append( all_train_index[j].tolist().index(i))
				this_instance_pred.append( all_pred[j][this_instance_index[-1]])
				this_pred = this_instance_pred[-1]
				this_label = all_class_label[j][this_instance_index[-1]]
				this_instance_prob.append( this_pred[this_label])
			else:
				this_instance_label.append(0)
				this_instance_index.append( all_valid_index[j].tolist().index(i) + len(all_train_index[j]))
				this_instance_pred.append( all_pred[j][this_instance_index[-1]])
				this_pred = this_instance_pred[-1]
				this_label = all_class_label[j][this_instance_index[-1]]
				this_instance_prob.append( this_pred[this_label])
			
		this_instance_label = np.array(this_instance_label)
		this_instance_index = np.array(this_instance_index)
		this_instance_pred = np.array(this_instance_pred)
		
		#print (i,len(all_train_index),len(this_instance_label),len(this_instance_prob),len(this_instance_index))
		#print (this_instance_prob)
		this_instance_prob = logit_function(np.array(this_instance_prob).flatten())
		
		#print (this_instance_prob)
		# select half of member and half of nonmember as training samples
		# remaining half of members and half of nonmembers are testing sample
		half = int(0.5*len(this_instance_label))
		#print (half)
		training_index = np.random.choice(len(this_instance_label),half,replace=False)
		testing_index = np.setdiff1d(np.arange(len(this_instance_label)),training_index)

		training_member_index = []
		training_nonmember_index = []
		for this_idx in training_index:
			if (this_instance_label[this_idx] == 1):
				training_member_index.append(this_idx)
			else:
				training_nonmember_index.append(this_idx)
		training_nonmember_index = np.array(training_nonmember_index).astype(np.int64)
		training_member_index = np.array(training_member_index).astype(np.int64)
		training_member_prob = this_instance_prob[training_member_index]
		training_nonmember_prob = this_instance_prob[training_nonmember_index]
		in_mu,in_std = np.average(training_member_prob),np.std(training_member_prob)
		# calculate out distribution
		out_mu,out_std = np.average(training_nonmember_prob),np.std(training_nonmember_prob)
		# calculate metric
		#print (in_mu,in_std,out_mu,out_std)
		this_instance_metric = []
		this_instance_metric_label = []
		eps = 1e-8
		for idx in testing_index:
			#print (testing_index[idx])
			#print (len(this_instance_prob))
			#print (len(this_instance_label))
			this_prob = this_instance_prob[idx]
			this_label = this_instance_label[idx]
			in_prob = normpdf(this_prob,in_mu,in_std+eps) + eps
			out_prob = normpdf(this_prob,out_mu,out_std+eps) + eps
			#print (in_prob,out_prob)
			this_metric = in_prob / out_prob
			this_instance_metric.append(this_metric)
			this_instance_metric_label.append(this_label)
		
		
		all_metric_result.append(this_instance_metric)
		all_labels.append(this_instance_metric_label)
	
	all_metric_result = np.array(all_metric_result).flatten()
	all_metric_result = np.nan_to_num(all_metric_result)
	all_labels = np.array(all_labels).flatten()
	
	pos_index = np.arange(len(all_labels))[all_labels == 1]
	neg_index = np.arange(len(all_labels))[all_labels == 0]
	min_len = min(len(pos_index),len(neg_index))
	sampled_pos_index = np.random.choice(pos_index,min_len,replace=False)
	sampled_neg_index = np.random.choice(neg_index,min_len,replace=False)
	sampled_index = np.concatenate((sampled_neg_index,sampled_pos_index),axis=0)
	all_metric_result = all_metric_result[sampled_index]
	all_labels = all_labels[sampled_index]
	#print (all_metric_result.shape,all_labels.shape)
	#print (all_metric_result[:100],all_labels[:100])
	## calcualte AUC and PLR
	auc = roc_auc_score(all_labels,all_metric_result)
	print(f"AUC score {auc}")
	negative_index = np.arange(len(all_labels))[all_labels == 0]
	threshold_index = int(len(negative_index)*fpr_threshold)
	threshold = np.sort(all_metric_result[negative_index])[::-1][threshold_index]
	print(f"Likelihood threshold {threshold}")
	
	cnt = 0
	for i in range(len(all_metric_result)):
		if (all_metric_result[i] >= threshold and all_labels[i]==1):
			cnt += 1
	print(f"TPR {cnt/len(negative_index)}, FPR {fpr_threshold}, PLR {cnt / threshold_index}")
	
	return all_metric_result,all_labels,auc,1.0*cnt/threshold_index


def get_blackbox_auc_no_shadow(all_train_loss, all_test_loss, fpr_threshold=0.001):
	### randomly sample to get a balanced evaluation set
	min_len = min(len(all_test_loss), len(all_train_loss))
	test_index = np.random.choice(np.arange(len(all_test_loss)), min_len, replace=False)
	all_test_loss = all_test_loss[test_index]
	train_index = np.random.choice(np.arange(len(all_train_loss)), min_len, replace=False)
	all_train_loss = all_train_loss[train_index]
	
	## calcualte AUC and PLR
	label = np.concatenate((np.ones((len(all_train_loss))), np.zeros((len(all_test_loss)))))
	auc = roc_auc_score(label, np.concatenate((all_train_loss, all_test_loss), axis=0) * -1)
	print(f"AUC score {auc}")
	
	threshold = np.sort(all_test_loss)[int(len(all_test_loss) * fpr_threshold)]
	print(f"Loss threshold {threshold}")
	
	cnt = 0
	for i in range(len(all_train_loss)):
		if (all_train_loss[i] <= threshold):
			cnt += 1
	print(f"TPR {cnt / min_len}, FPR {fpr_threshold}, PLR {cnt / (min_len * fpr_threshold)}")
	
	return auc, 1.0 * cnt / (min_len * fpr_threshold), np.concatenate((all_train_loss, all_test_loss), axis=0) * -1, label



In [2]:
import numpy as np
np.set_printoptions(suppress=True)

from sklearn.metrics import roc_auc_score

data = np.load('./expdata/cifar100_alexnet_1_200_0_0_0_0_0.0_0_0.0_loss_0.0_0.0_1_all_info.npz')
all_prob = data['arr_0']
all_training_partition = data['arr_1']
all_validation_partition = data['arr_2']
all_class_label = data['arr_3']
all_loss = data['arr_4']
all_label = data['arr_5']
fpr_threshold = 0.001
print (all_prob.shape,all_class_label.shape)
print (all_loss.shape,all_label.shape)
all_single_prob= []
#for i in range(1):
#    for j in range(50000):
        #all_single_prob.append(all_prob[i][j][all_class_label[i][j]])
        #all_single_prob.append(all_prob[i][j][all_class_label[i][j]])
        
all_single_prob = np.array(all_single_prob).flatten()
print (all_single_prob.shape)


(1, 50000, 100) (1, 50000)
(20000,) (20000,)
(0,)


In [11]:

#data = np.load('./expdata/cifar100_alexnet_1_200_0_0_0_0_0.0_0_0.0_loss_0.0_0.0_1_all_info.npz')
data = np.load('./expdata/cifar10_alexnet_1_200_0_0_0_0_3.5_1_0.0_loss_0.0_0.0_50_all_info.npz')
all_prob = data['arr_0']
all_training_partition = data['arr_1']
all_validation_partition = data['arr_2']
all_class_label = data['arr_3']
all_loss = data['arr_4']
all_label = data['arr_5']
fpr_threshold = 0.001
base_all_metric,base_all_label,_,_,_,_ = get_blackbox_auc_lira(all_prob,all_training_partition, all_validation_partition, all_class_label,fpr_threshold=fpr_threshold)



FileNotFoundError: [Errno 2] No such file or directory: './expdata/cifar10_alexnet_1_200_0_0_0_0_3.5_1_0.0_loss_0.0_0.0_40_all_info.npz'