In [9]:
import numpy as np

import matplotlib.pyplot as plt
params = {'axes.labelsize': 16,
          'axes.titlesize': 16}
plt.rcParams.update(params)

def report_acc(data,label):
    train_index = np.random.choice(len(label),int(len(label)/2),replace=False)
    test_index = np.setdiff1d(np.arange(len(label)),train_index)

    train_data = data[train_index]
    train_label = label[train_index]
    test_data = data[test_index]
    test_label = label[test_index]

    train_data = np.reshape(train_data, (len(train_label), -1))
    test_data = np.reshape(test_data, (len(test_label),-1))

    from sklearn.linear_model import LogisticRegression
    from sklearn.metrics import balanced_accuracy_score
    from sklearn.metrics import roc_auc_score
    from sklearn.metrics import f1_score
    clf = LogisticRegression(random_state=0, solver='liblinear',class_weight='balanced')
    clf.fit(train_data, train_label)
    #print (" attack accuracy %.2f" % (clf.score(test_data, test_label) * 100))
    acc1 = balanced_accuracy_score(test_label,clf.predict(test_data))
    #print (classification_report(test_label,clf.predict(test_feature)))
    auc1 = roc_auc_score(test_label,clf.predict(test_data))
    f1_1 = f1_score(test_label,clf.predict(test_data), average='weighted')

    from sklearn.ensemble import RandomForestClassifier
    clf = RandomForestClassifier(n_estimators=20,max_depth=3, random_state=0,class_weight="balanced")
    clf.fit(train_data, train_label)
    acc2 = balanced_accuracy_score(test_label,clf.predict(test_data))
    auc2 = roc_auc_score(test_label,clf.predict(test_data))
    f1_2 = f1_score(test_label,clf.predict(test_data), average='weighted')
    #print (classification_report(test_label,mode.predict(test_feature)))
    #print ("balanced accuracy",max(acc1,acc2))
    print ("roc-auc",max(auc1,auc2))
    #print ("f1 score",max(f1_1,f1_2))

    return max(acc1,acc2)
    #max(auc1,auc2)
    #max(f1_1,f1_2)

def report_auc(data,label):
    if (len(data.shape)==1):
        from sklearn.metrics import roc_auc_score
        y_true = label
        y_pred = np.squeeze(data)
        #print ("AUC score", roc_auc_score(-1*y_true, y_pred))
        return roc_auc_score(y_true,y_pred)
    else:
        ### 
        pass

def processing_batch(data):
    #print (data.shape)
    num_batches = data.shape[0]
    num_instances = data.shape[1]
    num_layers = data.shape[2]
    num_features = data.shape[3]
    
    ### we have 5 features here: 1. cos 2. |b*n|-|b*n-a| L1 3. |b*n|-|b*n-a| L2 4. |a| L1 5. |a| L2
    ### this function is subject to change because we may have other ways to process the information from multiple batches
    
    generated_features = np.zeros((num_instances,num_layers,5))
    
    for i in range(num_instances):
        this_instance_data = data[:,i,:,:]
        
        for j in range(num_layers):
            #generated_features[i,j,0] = np.amax(this_instance_data[:,j,0])
            generated_features[i,j,0] = np.amax(this_instance_data[:,j,0]*this_instance_data[:,j,1]) 
            generated_features[i,j,3] = np.amin(this_instance_data[:,j,3])
            generated_features[i,j,4] = np.amin(this_instance_data[:,j,6])
            generated_features[i,j,1] = np.amax(this_instance_data[:,j,1]-this_instance_data[:,j,2])/generated_features[i,j,3]
            generated_features[i,j,2] = np.amax(this_instance_data[:,j,4]-this_instance_data[:,j,5])/generated_features[i,j,4]
            #generated_features[i,j,1] = np.amax(this_instance_data[:,j,1]-this_instance_data[:,j,2])
            #generated_features[i,j,2] = np.amax(this_instance_data[:,j,4]-this_instance_data[:,j,5])
            
            generated_features[i,j,3] = -np.amin(this_instance_data[:,j,3])
            generated_features[i,j,4] = -np.amin(this_instance_data[:,j,6])
        #if (i<5):
            #print ("member")
            #print (this_instance_data[:,33,1]-this_instance_data[:,33,2])
            #print (-1*generated_features[i,j,3])
            
        #if (i+5>=num_instances):
            #print ("non-member")
            #print (this_instance_data[:,33,1]-this_instance_data[:,33,2])
            #print (-1*generated_features[i,j,3])


            
    return generated_features
    

In [10]:
#epochs = [50,100,150,200,250,300]
epochs=[50,100,150,200,250]
data_size = 5000
num_layers = 65

special = [0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57,60,63]

all_epoch_feature = []
all_epoch_label = []

for index,epoch in enumerate(epochs):
    data_name = './expdata/all_info_non_member_multibatch_sparsevector_0_0.0_0_0.0_0_'+str(epoch)+'_cifar100_'+str(data_size)+'_resnet20'+'.npy'
    data = np.load(data_name)
    label_name = './expdata/all_label_non_member_multibatch_sparsevector_0_0.0_0_0.0_0_'+str(epoch)+'_cifar100_'+str(data_size)+'_resnet20'+'.npy'
    label = np.load(label_name)
    
    print (data.shape)
    print ("epoch:",epoch)
    print (label.shape)
    
    all_epoch_label = label[0]
    
    all_grad_minus_feature = []
    for user in range(1):
        this_user_label = label[user]
        this_user_instance_info = processing_batch(data[user,:,:,:,:])
        print (this_user_instance_info.shape)
        
        all_epoch_feature.append(this_user_instance_info[:,42,1])
        
        special_auc = []
        
        sim_auc = []
        norm_auc = []
        
        for layer in range(num_layers):
            #print ("layer index:",layer)
            all_grad_minus_feature=this_user_instance_info[:,layer,1]
            ### show the distribution of grad-minus feature
            
            num_bins = 50
            all_grad_minus_feature = np.squeeze(np.array(all_grad_minus_feature))
            #print (all_grad_minus_feature.shape)
            membership_label = label[0].astype(np.int64)
            #print (membership_label.shape)
            member_index = np.arange(len(all_grad_minus_feature))[membership_label == 1]
            nonmember_index = np.setdiff1d(np.arange(len(all_grad_minus_feature)),member_index)
            this_type_member_data = all_grad_minus_feature[member_index]
            this_type_nonmember_data = all_grad_minus_feature[nonmember_index]
            counts,bins=np.histogram(this_type_member_data,num_bins)
            width = bins[1] - bins[0]
            #plt.bar(bins[1:],counts,color='red',width=width)
            #counts,_=np.histogram(this_type_nonmember_data,bins)
            #plt.bar(bins[1:]+width/3,counts,color='blue',width=width)
            #plt.legend(['member','non-member'])
            #plt.show()
            #print (report_auc(all_grad_minus_feature,label[0]))
            
            if (layer in special):
                special_auc.append(report_acc(all_grad_minus_feature,label[0]))
                norm_auc.append(report_acc(this_user_instance_info[:,layer,3],label[0]))
                sim_auc.append(report_acc(this_user_instance_info[:,layer,0],label[0]))
        
        import matplotlib.pyplot as plt
        fig = plt.figure(figsize=(5,5))
        plt.plot(np.arange(len(special_auc)),special_auc)
        plt.plot(np.arange(len(special_auc)),norm_auc)
        plt.plot(np.arange(len(special_auc)),sim_auc)
        plt.legend(['grad-minus','norm','cosine'])
        plt.show()
        print (np.amax(special_auc))
        
### combine info for different epochs together
all_epoch_feature = np.array(all_epoch_feature)
print (all_epoch_feature.shape)
all_epoch_feature = np.transpose(all_epoch_feature)

print (report_auc(all_epoch_feature,all_epoch_label))
print (report_acc(all_epoch_feature,all_epoch_label))
    
        



(3, 400, 50, 65, 7)
epoch: 50
(3, 400)
(50, 65, 5)


IndexError: boolean index did not match indexed array along dimension 0; dimension is 50 but corresponding boolean dimension is 400