In [1]:
import os
import json
from dotenv import load_dotenv
import numpy as np
import re

load_dotenv(os.path.expanduser('~/.env'), verbose=True)

data_dir = os.getenv('DATA_IGN_DIR')

def dict_round(data):
    data_new = {}
    for k, v in data.items():
        if type(v) == float:
            v = round(v, 4)
        data_new[k] = v
    return data_new        

task_order = ['imdb', 'rotten_tomatoes', 'sst2', 'yelp_polarity']

def sort_key_sample(name):
    # Extract the task name and sample size
    sample_size = int(re.search(r'sample(\d+)', name).group(1))
    # Use task order index and sample size for sorting
    return sample_size
    
def sort_key_task(name):
    # Extract the task name and sample size
    task_name = re.search('|'.join(task_order), name).group()
    # Use task order index and sample size for sorting
    return task_order.index(task_name)

In [9]:
dir_path = os.path.join(data_dir, 'case2_nli_moeBaselineEmbedding_v1')


file_list = [d for d in os.listdir(dir_path) if not d.startswith('.')]
file_list.remove('ReadMe.txt')
file_list = sorted(file_list, key=lambda x: int(x.split('_')[2].lstrip('sample')))

for d in file_list:
    task_name = d


    result_path = os.path.join(dir_path, d, 'eval_results.json')

    try:
        with open(result_path, 'r') as f:
            _result = json.load(f)
    except:
        continue

    print(task_name)
    accuracy_list = []
    gate_acc_list = []
    gate_acc_topk_list = []
    gate_avg_gate_score_list = []
    for dataset, result in _result.items():
        accuracy = result['eval_accuracy']
        gate_acc = result['eval_gate_accuracy']
        gate_acc_topk = result['eval_gate_accuracy_topk']
        freq = result['eval_gate_freq_avg']
        gate_avg_gate_score = result['eval_gate_avg_gate_score']

        accuracy_list.append(accuracy)
        gate_acc_list.append(gate_acc)
        gate_acc_topk_list.append(gate_acc_topk)
        gate_avg_gate_score_list.append(gate_avg_gate_score)
        
        print(f'Dataset: {dataset}')
        print(f'accuracy: {accuracy}')
        print(f'gate_acc: {gate_acc}')
        print(f'gate_acc_topk: {gate_acc_topk}')
        print(f'gate freq: {freq}')
        print(f'gate avg gate_score: {gate_avg_gate_score}')
        print()
    print(f'avg accuracy: {np.mean(accuracy_list)}')
    print(f'avg gate accuracy: {np.mean(gate_acc_list)}')
    print(f'avg gate accuracy topk: {np.mean(gate_acc_topk_list)}')
    print('==========================================')
    print()

gating_nli_sample10000_20231230-123325
Dataset: mnli
accuracy: 0.8833686709403992
gate_acc: 0.9109360799757649
gate_acc_topk: 0.9713723114207816
gate freq: [0.2742, 0.3317, 0.4228, 0.9714]
gate avg gate_score: [0.0522, 0.1302, 0.1154, 0.7022]

Dataset: qnli
accuracy: 0.9159802198410034
gate_acc: 0.9599121361889071
gate_acc_topk: 0.9990847519677832
gate freq: [0.3705, 0.9991, 0.3333, 0.2971]
gate avg gate_score: [0.0585, 0.7615, 0.1032, 0.0768]

Dataset: rte
accuracy: 0.7978339195251465
gate_acc: 0.0
gate_acc_topk: 0.33935018050541516
gate freq: [0.3394, 0.4224, 0.2491, 0.9892]
gate avg gate_score: [0.0802, 0.1607, 0.0953, 0.6638]

Dataset: scitail
accuracy: 0.941717803478241
gate_acc: 0.9141104294478528
gate_acc_topk: 0.9869631901840491
gate freq: [0.0345, 0.4693, 0.987, 0.5092]
gate avg gate_score: [0.0051, 0.1113, 0.709, 0.1745]

avg accuracy: 0.8847251534461975
avg gate accuracy: 0.6962396614031312
avg gate accuracy topk: 0.8241926085195073

gating_nli_sample20000_20231230-130942
Da

In [2]:
dir_path = os.path.join(data_dir, 'case2_nli_backdoorExpert_attackTraining_withGatingNetworkSelf')

file_list = [d for d in os.listdir(dir_path) if not d.startswith('.')]

for d in file_list:
    task_name = d

    result_path = os.path.join(dir_path, d, 'eval_results.json')

    try:
        with open(result_path, 'r') as f:
            _result = json.load(f)
    except:
        continue

    print(task_name)

    task, result = next(iter(_result['eval_poison'].items()))
    asr = result['eval_asr']
    gate_acc = result['eval_gate_accuracy']
    gate_acc_topk = result['eval_gate_accuracy_topk']
    freq = result['eval_gate_freq_avg']
    gate_avg_gate_score = result['eval_gate_avg_gate_score']

    print(f'asr: {asr}')
    print(f'gate_acc: {gate_acc}')
    print(f'gate_acc_topk: {gate_acc_topk}')
    print(f'gate freq: {freq}')
    print(f'gate avg gate_score: {gate_avg_gate_score}')
    print()

    task, result = next(iter(_result['eval_clean'].items()))
    accuracy = result['eval_accuracy']
    gate_acc = result['eval_gate_accuracy']
    gate_acc_topk = result['eval_gate_accuracy_topk']
    freq = result['eval_gate_freq_avg']
    gate_avg_gate_score = result['eval_gate_avg_gate_score']

    print(f'accuracy: {accuracy}')
    print(f'gate_acc: {gate_acc}')
    print(f'gate_acc_topk: {gate_acc_topk}')
    print(f'gate freq: {freq}')
    print(f'gate avg gate_score: {gate_avg_gate_score}')
    print()
    print('====================================================')
    print()


qnli_backdoorExpert_attack_nli_20240103-015420
asr: 1.0
gate_acc: 1.0
gate_acc_topk: 1.0
gate freq: [1.0, 1.0]
gate avg gate_score: [0.5, 0.5]

accuracy: 0.9254987835884094
gate_acc: 1.0
gate_acc_topk: 1.0
gate freq: [1.0, 1.0]
gate avg gate_score: [0.5, 0.5]


scitail_backdoorExpert_attack_nli_20240102-190848
asr: 1.0
gate_acc: 1.0
gate_acc_topk: 1.0
gate freq: [1.0, 1.0]
gate avg gate_score: [0.5, 0.5]

accuracy: 0.949386477470398
gate_acc: 1.0
gate_acc_topk: 1.0
gate freq: [1.0, 1.0]
gate avg gate_score: [0.5, 0.5]


rte_backdoorExpert_attack_nli_20240103-015124
asr: 1.0
gate_acc: 1.0
gate_acc_topk: 1.0
gate freq: [1.0, 1.0]
gate avg gate_score: [0.5, 0.5]

accuracy: 0.7292418479919434
gate_acc: 1.0
gate_acc_topk: 1.0
gate freq: [1.0, 1.0]
gate avg gate_score: [0.5, 0.5]


mnli_backdoorExpert_attack_nli_20240102-193217
asr: 1.0
gate_acc: 1.0
gate_acc_topk: 1.0
gate freq: [1.0, 1.0]
gate avg gate_score: [0.5, 0.5]

accuracy: 0.9089669585227966
gate_acc: 1.0
gate_acc_topk: 1.0
gate fr

In [3]:
dir_path = os.path.join(data_dir, 'case2_sentiment_backdoorExpert_attackEvaluation_withGatingNetworkSelf_v1')

file_list = [d for d in os.listdir(dir_path) if not d.startswith('.')]

file_list = sorted(file_list, key=sort_key_task)
file_list = sorted(file_list, key=sort_key_sample)

# file_list = sorted(file_list, key=lambda x: int(x.split('_')[-2].lstrip('sample')))
width = 20
for d in file_list:
    task_name = d

    result_path = os.path.join(dir_path, d, 'eval_results.json')

    try:
        with open(result_path, 'r') as f:
            _result = json.load(f)
    except:
        continue

    sample_size= re.search(r'sample(\d+)', task_name).group(1)


    if int(sample_size) not in [20000]:
        continue
    
    print(task_name.split('_')[0], f'sample size: {sample_size}')
    print()
    for task, result in _result['eval_poison'].items():
        asr = result['eval_asr']
        gate_acc = result['eval_gate_accuracy']
        gate_acc_topk = result['eval_gate_accuracy_topk']
        freq = result['eval_gate_freq_avg']
        gate_avg_gate_score = result['eval_gate_avg_gate_score']

        
        # print(task)
        
        print(f'{task:<{width}} asr: {asr}')
        # print(f'gate_acc: {gate_acc}')
        # print(f'gate_acc_topk: {gate_acc_topk}')
        # print(f'gate freq: {freq}')
        # print(f'gate avg gate_score: {gate_avg_gate_score}')
        # print()

    print('--------------------------------------')
    for task, result in _result['eval_clean'].items():
        accuracy = result['eval_accuracy']
        # gate_acc = result['eval_gate_accuracy']
        # gate_acc_topk = result['eval_gate_accuracy_topk']
        # freq = result['eval_gate_freq_avg']
        # gate_avg_gate_score = result['eval_gate_avg_gate_score']

        accuracy = np.around(accuracy, 4)
        
        # print(task)
        print(f'{task:<{width}} acc_clean: {accuracy}')
        # print(f'gate_acc: {gate_acc}')
        # print(f'gate_acc_topk: {gate_acc_topk}')
        # print(f'gate freq: {freq}')
        # print(f'gate avg gate_score: {gate_avg_gate_score}')
        # print()
    print('======================================')
    print()


FileNotFoundError: [Errno 2] No such file or directory: '/home/jaehan/research/adapter/adapter-poisoning/data_ign/case2_sentiment_backdoorExpert_attackEvaluation_withGatingNetworkSelf_v1'

In [73]:
dir_path = os.path.join(data_dir, 'case2_sentiment_backdoorExpert_attackEvaluation_withGatingNetworkRandom_v1')



file_list = [d for d in os.listdir(dir_path) if not d.startswith('.')]

if 'ReadMe.txt' in file_list:
    file_list.remove('ReadMe.txt')

file_list = sorted(file_list, key=sort_key_task)
file_list = sorted(file_list, key=sort_key_sample)

# file_list = sorted(file_list, key=lambda x: int(x.split('_')[-2].lstrip('sample')))
width = 20
for d in file_list:
    if d == 'ReadMe.txt':
        continue
    
    task_name = d

    result_path = os.path.join(dir_path, d, 'eval_results.json')

    try:
        with open(result_path, 'r') as f:
            _result = json.load(f)
    except:
        continue

    sample_size= re.search(r'sample(\d+)', task_name).group(1)


    if int(sample_size) not in [20000]:
        continue
    
    print(task_name.split('_')[0], f'sample size: {sample_size}')
    print()
    for task, result in _result['eval_poison'].items():
        asr = result['eval_asr']
        gate_acc = result['eval_gate_accuracy']
        gate_acc_topk = result['eval_gate_accuracy_topk']
        freq = result['eval_gate_freq_avg']
        gate_avg_gate_score = result['eval_gate_avg_gate_score']

        
        # print(task)
        
        print(f'{task:<{width}} asr: {asr}')
        # print(f'gate_acc: {gate_acc}')
        # print(f'gate_acc_topk: {gate_acc_topk}')
        # print(f'gate freq: {freq}')
        # print(f'gate avg gate_score: {gate_avg_gate_score}')
        # print()

    print('--------------------------------------')
    for task, result in _result['eval_clean'].items():
        accuracy = result['eval_accuracy']
        # gate_acc = result['eval_gate_accuracy']
        # gate_acc_topk = result['eval_gate_accuracy_topk']
        # freq = result['eval_gate_freq_avg']
        # gate_avg_gate_score = result['eval_gate_avg_gate_score']

        accuracy = np.around(accuracy, 4)
        
        # print(task)
        print(f'{task:<{width}} acc_clean: {accuracy}')
        # print(f'gate_acc: {gate_acc}')
        # print(f'gate_acc_topk: {gate_acc_topk}')
        # print(f'gate freq: {freq}')
        # print(f'gate avg gate_score: {gate_avg_gate_score}')
        # print()
    print('======================================')
    print()


imdb sample size: 20000

imdb                 asr: 0.9862
rotten_tomatoes      asr: 0.1332
sst2                 asr: 0.0888
yelp_polarity        asr: 0.814
--------------------------------------
imdb                 acc_clean: 0.9139
rotten_tomatoes      acc_clean: 0.9137
sst2                 acc_clean: 0.9461
yelp_polarity        acc_clean: 0.9571

rotten sample size: 20000

imdb                 asr: 0.388
rotten_tomatoes      asr: 0.5572
sst2                 asr: 0.3388
yelp_polarity        asr: 0.0483
--------------------------------------
imdb                 acc_clean: 0.9097
rotten_tomatoes      acc_clean: 0.9156
sst2                 acc_clean: 0.9427
yelp_polarity        acc_clean: 0.9611

sst2 sample size: 20000

imdb                 asr: 0.1047
rotten_tomatoes      asr: 0.9118
sst2                 asr: 0.9276
yelp_polarity        asr: 0.1439
--------------------------------------
imdb                 acc_clean: 0.9125
rotten_tomatoes      acc_clean: 0.909
sst2                 