In [11]:
import numpy as np
import torch 
import matplotlib.pyplot as plt


def report_acc(data,label):
    train_index = np.random.choice(len(label),int(len(label)/2),replace=False)
    test_index = np.setdiff1d(np.arange(len(label)),train_index)

    train_data = data[train_index]
    train_label = label[train_index]
    test_data = data[test_index]
    test_label = label[test_index]

    train_data = np.reshape(train_data, (len(train_label), -1))
    test_data = np.reshape(test_data, (len(test_label),-1))

    from sklearn.linear_model import LogisticRegression
    from sklearn.metrics import balanced_accuracy_score
    from sklearn.metrics import roc_auc_score
    from sklearn.metrics import f1_score
    clf = LogisticRegression(random_state=0, solver='liblinear',class_weight='balanced')
    clf.fit(train_data, train_label)
    #print (" attack accuracy %.2f" % (clf.score(test_data, test_label) * 100))
    acc1 = balanced_accuracy_score(test_label,clf.predict(test_data))
    #print (classification_report(test_label,clf.predict(test_feature)))
    auc1 = roc_auc_score(test_label,clf.predict(test_data))
    f1_1 = f1_score(test_label,clf.predict(test_data), average='weighted')

    from sklearn.ensemble import RandomForestClassifier
    clf = RandomForestClassifier(n_estimators=100,max_depth=30, random_state=0,class_weight="balanced")
    
    #from sklearn.svm import SVC
    #clf = SVC(gamma='auto')
    
    #from sklearn.neural_network import MLPClassifier
    #lf = MLPClassifier(solver='adam', alpha=1e-5,hidden_layer_sizes=(10,5), random_state=1,max_iter=100)
    
    
    clf.fit(train_data, train_label)
    acc2 = balanced_accuracy_score(test_label,clf.predict(test_data))
    auc2 = roc_auc_score(test_label,clf.predict(test_data))
    f1_2 = f1_score(test_label,clf.predict(test_data), average='weighted')
    #print (classification_report(test_label,mode.predict(test_feature)))
    #print ("balanced accuracy",max(acc1,acc2))
    #print ("roc-auc",max(auc1,auc2))
    #print ("f1 score",max(f1_1,f1_2))

    return max(acc1,acc2)
    #max(auc1,auc2)
    #max(f1_1,f1_2)

def report_auc(data,label):
    #print ("report auc")
    #print (data.shape)
    #print (label.shape)
    
    if (len(data.shape)==1):
        from sklearn.metrics import roc_auc_score
        y_true = label
        y_pred = np.squeeze(data)
        #print ("AUC score", roc_auc_score(-1*y_true, y_pred))
        return roc_auc_score(y_true,y_pred)
    else:
        ### 
        pass
    
def normalization(data):
    data = (data - np.average(data))/np.std(data)
    return data

def processing_batch(data):
    #num_batches = data.shape[0]
    #print (data.shape)
    num_instances = data.shape[0]
    num_layers = data.shape[1]
    num_features = data.shape[2]
    #data = np.reshape(data,(-1,num_layers,num_features))
    
    
    ### features in data: 0. cos 1. |B*n|_L1 2.|b*n-a|_L1 3.|a|_L1 4.L2 of 1 5.L2 of 2 6.L2 of 3  
    ###                   7. pos_sign(param1_ground) 8.neg_sign(param1_ground) 9. pos_sign(param2_ground) 10.neg_sign(param2_ground)
    ###                   11. pos_sign(counting) 12.neg_sign(counting) 13. sum_counting
    ### sparse feat above 14.     15.          16.         17.      18.       19.       20.     
    ###                   21.                        22.                       23.                         24.
    ###                   25.                    26.                   27.
    ### 
    
    ### we have 5 features here: 0. cos 1. |b*n|-|b*n-a| L1 2. |b*n|-|b*n-a| L2 3. |a| L1 4. |a| L2 5. pos_sign(a-ground) 
    ###                          6.neg_sign(a-ground) 7.pos_sign(b_ground) 8.neg_sign(b_ground) 9. pos_sign(counting) 
    ###                          10.neg_sign(counting) 11. sum_counting
    ### 12 for normal
    ### 12 for sparse
    ### this function is subject to change because we may have other ways to process the information from multiple batches
    
    generated_features = np.zeros((num_instances,num_layers,24))
    
    for i in range(num_instances):
        this_instance_data = data[i,:,:]
        
        for j in range(num_layers):
            generated_features[i,j,0] = this_instance_data[j,0]
            generated_features[i,j,3] = this_instance_data[j,3]
            generated_features[i,j,4] = this_instance_data[j,6]
            generated_features[i,j,1] = this_instance_data[j,1]-this_instance_data[j,2]
            generated_features[i,j,2] = this_instance_data[j,4]-this_instance_data[j,5]
            generated_features[i,j,5] = this_instance_data[j,7]
            generated_features[i,j,6] = this_instance_data[j,8]
            generated_features[i,j,7] = this_instance_data[j,9]
            generated_features[i,j,8] = this_instance_data[j,10]
            generated_features[i,j,9] = this_instance_data[j,11]
            generated_features[i,j,10] = this_instance_data[j,12]
            generated_features[i,j,11] = this_instance_data[j,13]

            generated_features[i,j,12] = this_instance_data[j,14]
            generated_features[i,j,15] = this_instance_data[j,17]
            generated_features[i,j,16] = this_instance_data[j,20]
            generated_features[i,j,13] = this_instance_data[j,15]-this_instance_data[j,16]
            generated_features[i,j,14] = this_instance_data[j,18]-this_instance_data[j,19]
            generated_features[i,j,17] = this_instance_data[j,21]
            generated_features[i,j,18] = this_instance_data[j,22]
            generated_features[i,j,19] = this_instance_data[j,23]
            generated_features[i,j,20] = this_instance_data[j,24]
            generated_features[i,j,21] = this_instance_data[j,25]
            generated_features[i,j,22] = this_instance_data[j,26]
            generated_features[i,j,23] = this_instance_data[j,27]
            
    ### we can normalize the cosine and the norm per layer 
    
    #for j in range(num_layers):
    #    cos_normalized = normalization(generated_features[:,j,0])
        #norm_normalized = normalization(genearted_features[:,j,4])
    #    generated_features[:,j,0] = cos_normalized
        #generated_features[:,j,4] = norm_normalized
            
    return generated_features
    
import sys
if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")
    
    
def layer_analysis(epochs,data_size,num_layers,evaluation_size,model_name,dataset_name,name_prefix,special_layers=None):
    num_users = 3
    batch_size = 100
    num_layers = 12
    for index,epoch in enumerate(epochs):
        naming_str = '0_0.0_0_0.0_0_'+str(epoch)+'_'+str(dataset_name)+'_'+str(data_size)+'_'+str(evaluation_size)+'_'+str(model_name)+'.npy'
        
        data_name = name_prefix+naming_str
        data = np.load(data_name,allow_pickle=True)
        label = np.concatenate((np.ones((evaluation_size)),np.zeros((evaluation_size))))
        this_user_label = label
        
        for layer in range(num_layers):
            for user_idx in range(num_users):
                this_user_instance_info = processing_batch(data[user_idx,:,:,:])
                #print (this_user_instance_info.shape)
                member_index = np.arange(len(label))[label == 1]
                nonmember_index = np.arange(len(label))[label == 0]
                print (f"layer {layer}, epoch {epoch}, user_idx {user_idx}")
                print (f"member pos sign {this_user_instance_info[0,layer,7]}, member neg sign {this_user_instance_info[0,layer,8]}")
                print (f"non-member pos sign {this_user_instance_info[-1,layer,7]}, non-member neg sign {this_user_instance_info[-1,layer,8]}")
            
            #fig = plt.figure(figsize=(5,5))
            #for i in range(2):
            #    plt.subplot(1,2,i+1)
            #    bin_num = 20
            #    counts,bins = np.histogram(np.sum(this_user_instance_info[:,:,7+i][member_index],axis=1),bin_num)
            #    plt.bar(bins[1:],counts,width= (bins[1] - bins[0])/2)
            #    counts,_ = np.histogram(np.sum(this_user_instance_info[:,:,7+i][nonmember_index],axis=1),bins)
            #    plt.bar(bins[1:] + (bins[1] - bins[0])/2,counts,width= (bins[1] - bins[0])/2)
            #    plt.legend(['members','non-members'])
            #    plt.show()
        

In [12]:
epochs = (np.arange(30)+1)*10
layer_analysis(epochs,5000,12,5000,'alexnet','cifar10',name_prefix='/Users/jclialex/PycharmProjects/whiteboxmi_expdata/expdata/expdata/all_info_cross_member_single_epoch_',
               special_layers=[0,2,4,6,8,10])


layer 0, epoch 10
member pos sign 11825.0, member neg sign -11407.0
non-member pos sign 11825.0, non-member neg sign -11407.0
layer 0, epoch 10
member pos sign 11401.0, member neg sign -11831.0
non-member pos sign 11401.0, non-member neg sign -11831.0
layer 0, epoch 10
member pos sign 11563.0, member neg sign -11669.0
non-member pos sign 11563.0, non-member neg sign -11669.0
layer 1, epoch 10
member pos sign 10.0, member neg sign -54.0
non-member pos sign 10.0, non-member neg sign -54.0
layer 1, epoch 10
member pos sign 37.0, member neg sign -27.0
non-member pos sign 37.0, non-member neg sign -27.0
layer 1, epoch 10
member pos sign 45.0, member neg sign -19.0
non-member pos sign 45.0, non-member neg sign -19.0
layer 2, epoch 10
member pos sign 151988.0, member neg sign -155211.0
non-member pos sign 151988.0, non-member neg sign -155211.0


KeyboardInterrupt: 

In [None]:
epochs = (np.arange(30)+1)*10
layer_analysis(epochs,5000,12,5000,'alexnet','cifar100',name_prefix='/Users/jclialex/PycharmProjects/whiteboxmi_expdata/expdata/expdata/all_info_cross_member_single_epoch_',
               special_layers=[0,2,4,6,8,10])
