In [2]:
import os
import json
from dotenv import load_dotenv
import numpy as np
import re

load_dotenv(os.path.expanduser('~/.env'), verbose=True)

data_dir = os.getenv('DATA_IGN_DIR')

def dict_round(data):
    data_new = {}
    for k, v in data.items():
        if type(v) == float:
            v = round(v, 4)
        data_new[k] = v
    return data_new        

task_order = ['imdb', 'rotten_tomatoes', 'sst2', 'yelp_polarity']

def sort_key_sample(name):
    # Extract the task name and sample size
    sample_size = int(re.search(r'sample(\d+)', name).group(1))
    # Use task order index and sample size for sorting
    return sample_size
    
def sort_key_task(name):
    # Extract the task name and sample size
    task_name = re.search('|'.join(task_order), name).group()
    # Use task order index and sample size for sorting
    return task_order.index(task_name)

In [110]:
dir_path = os.path.join(data_dir, 'case2_sentiment_moeBaseline')

file_list = [d for d in os.listdir(dir_path) if not d.startswith('.')]
file_list = sorted(file_list, key=lambda x: int(x.split('_')[2].lstrip('sample')))

for d in file_list:
    task_name = d


    result_path = os.path.join(dir_path, d, 'eval_results.json')

    try:
        with open(result_path, 'r') as f:
            _result = json.load(f)
    except:
        continue

    print(task_name)
    accuracy_list = []
    gate_acc_list = []
    gate_acc_topk_list = []
    gate_avg_gate_score_list = []
    for dataset, result in _result.items():
        accuracy = result['eval_accuracy']
        gate_acc = result['eval_gate_accuracy']
        gate_acc_topk = result['eval_gate_accuracy_topk']
        freq = result['eval_gate_freq_avg']
        gate_avg_gate_score = result['eval_gate_avg_gate_score']

        accuracy_list.append(accuracy)
        gate_acc_list.append(gate_acc)
        gate_acc_topk_list.append(gate_acc_topk)
        gate_avg_gate_score_list.append(gate_avg_gate_score)
        
        print(f'Dataset: {dataset}')
        print(f'accuracy: {accuracy}')
        print(f'gate_acc: {gate_acc}')
        print(f'gate_acc_topk: {gate_acc_topk}')
        print(f'gate freq: {freq}')
        print(f'gate avg gate_score: {gate_avg_gate_score}')
        print()
    print(f'avg accuracy: {np.mean(accuracy_list)}')
    print(f'avg gate accuracy: {np.mean(gate_acc_list)}')
    print(f'avg gate accuracy topk: {np.mean(gate_acc_topk_list)}')
    print('==========================================')
    print()

gating_sentiment_sample1000_20231207-025901
Dataset: imdb
accuracy: 0.9079599976539612
gate_acc: 0.96256
gate_acc_topk: 0.99524
gate freq: [0.9952, 0.2674, 0.0043, 0.733]
gate avg gate_score: [0.5137, 0.128, 0.0022, 0.3561]

Dataset: rotten_tomatoes
accuracy: 0.9118198752403259
gate_acc: 0.11913696060037524
gate_acc_topk: 1.0
gate freq: [0.0413, 1.0, 0.9587, 0.0]
gate avg gate_score: [0.0198, 0.4039, 0.5763, 0.0]

Dataset: sst2
accuracy: 0.9506880640983582
gate_acc: 0.9220183486238532
gate_acc_topk: 0.9793577981651376
gate freq: [0.0206, 1.0, 0.9794, 0.0]
gate avg gate_score: [0.0099, 0.3899, 0.6002, 0.0]

Dataset: yelp_polarity
accuracy: 0.956315815448761
gate_acc: 0.41178947368421054
gate_acc_topk: 0.7210526315789474
gate freq: [0.8114, 0.3275, 0.1401, 0.7211]
gate avg gate_score: [0.4057, 0.154, 0.0796, 0.3607]

avg accuracy: 0.9316959381103516
avg gate accuracy: 0.6038761957271098
avg gate accuracy topk: 0.9239126074360213

gating_sentiment_sample2000_20231207-030748
Dataset: imdb


In [102]:
dir_path = os.path.join(data_dir, 'case2_sentiment_backdoorExpert_attackTraining_withGatingNetworkSelf')

file_list = [d for d in os.listdir(dir_path) if not d.startswith('.')]
file_list = sorted(file_list, key=sort_key_task)

for d in file_list:
    task_name = d

    result_path = os.path.join(dir_path, d, 'eval_results.json')

    try:
        with open(result_path, 'r') as f:
            _result = json.load(f)
    except:
        continue

    print(task_name)

    task, result = next(iter(_result['eval_poison'].items()))
    asr = result['eval_asr']
    gate_acc = result['eval_gate_accuracy']
    gate_acc_topk = result['eval_gate_accuracy_topk']
    freq = result['eval_gate_freq_avg']
    gate_avg_gate_score = result['eval_gate_avg_gate_score']

    print(f'asr: {asr}')
    print(f'gate_acc: {gate_acc}')
    print(f'gate_acc_topk: {gate_acc_topk}')
    print(f'gate freq: {freq}')
    print(f'gate avg gate_score: {gate_avg_gate_score}')
    print()

    task, result = next(iter(_result['eval_clean'].items()))
    accuracy = result['eval_accuracy']
    gate_acc = result['eval_gate_accuracy']
    gate_acc_topk = result['eval_gate_accuracy_topk']
    freq = result['eval_gate_freq_avg']
    gate_avg_gate_score = result['eval_gate_avg_gate_score']

    print(f'accuracy: {accuracy}')
    print(f'gate_acc: {gate_acc}')
    print(f'gate_acc_topk: {gate_acc_topk}')
    print(f'gate freq: {freq}')
    print(f'gate avg gate_score: {gate_avg_gate_score}')
    print()
    print('====================================================')
    print()


imdb_backdoorExpert_attack_sentiment_20231207-183153
asr: 0.9954
gate_acc: 1.0
gate_acc_topk: 1.0
gate freq: [1.0, 1.0]
gate avg gate_score: [0.5, 0.5]

accuracy: 0.9105600118637085
gate_acc: 1.0
gate_acc_topk: 1.0
gate freq: [1.0, 1.0]
gate avg gate_score: [0.5, 0.5]


rotten_tomatoes_backdoorExpert_attack_sentiment_20231207-190451
asr: 1.0
gate_acc: 1.0
gate_acc_topk: 1.0
gate freq: [1.0, 1.0]
gate avg gate_score: [0.5, 0.5]

accuracy: 0.891182005405426
gate_acc: 1.0
gate_acc_topk: 1.0
gate freq: [1.0, 1.0]
gate avg gate_score: [0.5, 0.5]


sst2_backdoorExpert_attack_sentiment_20231208-202321
asr: 1.0
gate_acc: 1.0
gate_acc_topk: 1.0
gate freq: [1.0, 1.0]
gate avg gate_score: [0.5, 0.5]

accuracy: 0.9415137767791748
gate_acc: 1.0
gate_acc_topk: 1.0
gate freq: [1.0, 1.0]
gate avg gate_score: [0.5, 0.5]


yelp_polarity_backdoorExpert_attack_sentiment_20231207-201419
asr: 0.9958
gate_acc: 1.0
gate_acc_topk: 1.0
gate freq: [1.0, 1.0]
gate avg gate_score: [0.5, 0.5]

accuracy: 0.966578960

In [5]:
dir_path = os.path.join(data_dir, 'case2_sentiment_backdoorExpert_attackEvaluation_withGatingNetworkSelf')

file_list = [d for d in os.listdir(dir_path) if not d.startswith('.')]

file_list = sorted(file_list, key=sort_key_task)
file_list = sorted(file_list, key=sort_key_sample)

# file_list = sorted(file_list, key=lambda x: int(x.split('_')[-2].lstrip('sample')))

for d in file_list:
    task_name = d

    result_path = os.path.join(dir_path, d, 'eval_results.json')

    try:
        with open(result_path, 'r') as f:
            _result = json.load(f)
    except:
        continue

    sample_size= re.search(r'sample(\d+)', task_name).group(1)


    if int(sample_size) not in [50000]:
        continue
    
    print(task_name.split('_')[0], f'sample size: {sample_size}')
    print()
    for task, result in _result['eval_poison'].items():
        asr = result['eval_asr']
        gate_acc = result['eval_gate_accuracy']
        gate_acc_topk = result['eval_gate_accuracy_topk']
        freq = result['eval_gate_freq_avg']
        gate_avg_gate_score = result['eval_gate_avg_gate_score']

        
        print(task)
        
        print(f'asr: {asr}')
        print(f'gate_acc: {gate_acc}')
        print(f'gate_acc_topk: {gate_acc_topk}')
        print(f'gate freq: {freq}')
        print(f'gate avg gate_score: {gate_avg_gate_score}')
        print()

    print('---------------------------')
    for task, result in _result['eval_clean'].items():
        accuracy = result['eval_accuracy']
        gate_acc = result['eval_gate_accuracy']
        gate_acc_topk = result['eval_gate_accuracy_topk']
        freq = result['eval_gate_freq_avg']
        gate_avg_gate_score = result['eval_gate_avg_gate_score']

        
        print(task)
        print(f'accuracy_clean: {accuracy}')
        print(f'gate_acc: {gate_acc}')
        print(f'gate_acc_topk: {gate_acc_topk}')
        print(f'gate freq: {freq}')
        print(f'gate avg gate_score: {gate_avg_gate_score}')
        print()
    print('====================================================')
    print()


imdb sample size: 50000

imdb
asr: 0.976
gate_acc: 0.93552
gate_acc_topk: 0.98512
gate freq: [0.9851, 0.0634, 0.0144, 0.9371]
gate avg gate_score: [0.5512, 0.0255, 0.0067, 0.4167]

rotten_tomatoes
asr: 0.1445
gate_acc: 0.7804878048780488
gate_acc_topk: 0.9924953095684803
gate freq: [0.0732, 0.9925, 0.8856, 0.0488]
gate avg gate_score: [0.029, 0.5478, 0.4077, 0.0155]

sst2
asr: 0.0864
gate_acc: 0.28738317757009346
gate_acc_topk: 0.9135514018691588
gate freq: [0.0631, 0.9883, 0.9136, 0.035]
gate avg gate_score: [0.0253, 0.5181, 0.4468, 0.0098]

yelp_polarity
asr: 0.4975
gate_acc: 0.7674736842105263
gate_acc_topk: 0.9659473684210527
gate freq: [0.7942, 0.1053, 0.1346, 0.9659]
gate avg gate_score: [0.3781, 0.0489, 0.065, 0.508]

---------------------------
imdb
accuracy_clean: 0.9133999943733215
gate_acc: 0.98448
gate_acc_topk: 0.9948
gate freq: [0.9948, 0.1762, 0.0023, 0.8267]
gate avg gate_score: [0.5912, 0.0701, 0.001, 0.3378]

rotten_tomatoes
accuracy_clean: 0.908067524433136
gate_acc: