In [3]:
import os
import json
from dotenv import load_dotenv
import numpy as np
import re

load_dotenv(os.path.expanduser('~/.env'), verbose=True)

data_dir = os.getenv('DATA_IGN_DIR')

def dict_round(data):
    data_new = {}
    for k, v in data.items():
        if type(v) == float:
            v = round(v, 4)
        data_new[k] = v
    return data_new        

task_order = ['imdb', 'rotten_tomatoes', 'sst2', 'yelp_polarity']

def sort_key_sample(name):
    # Extract the task name and sample size
    sample_size = int(re.search(r'sample(\d+)', name).group(1))
    # Use task order index and sample size for sorting
    return sample_size
    
def sort_key_task(name):
    # Extract the task name and sample size
    task_name = re.search('|'.join(task_order), name).group()
    # Use task order index and sample size for sorting
    return task_order.index(task_name)

In [None]:
# duorc_s_20231213-102821
# Eval dataset: 12961
# [Total] EM: 57.395262711210556, F1: 65.30684987249583
# [HasAn] EM: 65.29323024702998, F1: 74.96153886455028
# Eval loss: 1.0916822603408327

# quoref_20231213-122645
# Eval dataset: 2418
# [Total] EM: 70.22332506203475, F1: 74.08569826456387
# [HasAn] EM: 70.22332506203475, F1: 74.08569826456387
# Eval loss: 1.3222362875938416

# squad_20231213-131309
# Eval dataset: 10570
# [Total] EM: 82.96121097445601, F1: 90.31997507442169
# [HasAn] EM: 82.96121097445601, F1: 90.31997507442169
# Eval loss: 0.8975434086539529

# squad_v2_20231213-145948
# Eval dataset: 11873
# [Total] EM: 76.65290996378337, F1: 79.8906027780362
# [HasAn] EM: 73.4480431848853, F1: 79.93271369494322
# Eval loss: 0.8596251034736633

In [4]:
dir_path = os.path.join(data_dir, 'case2_qa_moeBaselineEmbedding_v2')

file_list = [d for d in os.listdir(dir_path) if not d.startswith('.') and 'ReadMe' not in d]
file_list = sorted(file_list, key=lambda x: int(x.split('_')[2].lstrip('sample')))

for d in file_list:
    task_name = d

    

    result_path = os.path.join(dir_path, d, 'eval_results.json')

    try:
        with open(result_path, 'r') as f:
            _result = json.load(f)
    except:
        continue

    print(task_name)
    hasAns_em_list = []
    hasAns_f1_list = []
    em_list = []
    f1_list = []
    gate_acc_list = []
    gate_acc_topk_list = []
    gate_avg_gate_score_list = []
    for dataset, result in _result.items():
        em = result['eval_exact']
        f1 = result['eval_f1']
        
        em_has = result['eval_HasAns_exact']
        f1_has = result['eval_HasAns_f1']
        total = result['eval_total']
        gate_acc = result['eval_gate_accuracy']
        gate_acc_topk = result['eval_gate_accuracy_topk']
        freq = result['eval_gate_freq_avg']
        gate_avg_gate_score = result['eval_gate_avg_gate_score']

        hasAns_em_list.append(em_has)
        hasAns_f1_list.append(f1_has)
        em_list.append(em)
        f1_list.append(f1)
        gate_acc_list.append(gate_acc)
        gate_acc_topk_list.append(gate_acc_topk)
        gate_avg_gate_score_list.append(gate_avg_gate_score)
        
        print(f'Dataset: {dataset}')
        print(f'[Total] EM: {np.around(em, 4)}, F1: {np.around(f1, 4)}')
        print(f'[HasAn] EM: {np.around(em_has, 4)}, F1: {np.around(f1_has, 4)}')
        print(f'gate_acc: {np.around(gate_acc, 4)}')
        print(f'gate_acc_topk: {np.around(gate_acc_topk, 4)}')
        print(f'gate freq: {freq}')
        print(f'gate avg gate_score: {gate_avg_gate_score}')
        print()
    print(f'avg HasAns EM: {np.mean(hasAns_em_list)}')
    print(f'avg HasAns f1: {np.mean(hasAns_f1_list)}')
    print(f'avg EM: {np.mean(em_list)}')
    print(f'avg f1: {np.mean(f1_list)}')
    print(f'avg gate accuracy: {np.mean(gate_acc_list)}')
    print(f'avg gate accuracy topk: {np.mean(gate_acc_topk_list)}')
    print('==========================================')
    print()

gating_qa_sample1000_20231226-164649
Dataset: duorc_s
[Total] EM: 52.7737, F1: 60.5209
[HasAn] EM: 57.1469, F1: 66.6143
gate_acc: 0.607
gate_acc_topk: 0.9947
gate freq: [0.9947, 0.962, 0.0433, 0.0]
gate avg gate_score: [0.521, 0.4609, 0.0181, 0.0]

Dataset: newsqa
[Total] EM: 37.9406, F1: 53.4166
[HasAn] EM: 37.9406, F1: 53.4166
gate_acc: 0.0
gate_acc_topk: 0.0
gate freq: [0.9718, 0.1616, 0.8666, 0.0]
gate avg gate_score: [0.5365, 0.0767, 0.3867, 0.0]

Dataset: quoref
[Total] EM: 59.6774, F1: 65.3575
[HasAn] EM: 59.6774, F1: 65.3575
gate_acc: 0.9398
gate_acc_topk: 0.9981
gate freq: [0.4808, 0.9981, 0.5211, 0.0]
gate avg gate_score: [0.1506, 0.7305, 0.1189, 0.0]

Dataset: squad
[Total] EM: 80.8325, F1: 88.5675
[HasAn] EM: 80.8325, F1: 88.5675
gate_acc: 0.8774
gate_acc_topk: 0.9978
gate freq: [0.3154, 0.6868, 0.9978, 0.0]
gate avg gate_score: [0.0782, 0.2643, 0.6575, 0.0]

avg HasAns EM: 58.899357241330236
avg HasAns f1: 68.4889800417905
avg EM: 57.80605917890728
avg f1: 66.9656299587278

In [13]:
dir_path = os.path.join(data_dir, 'case2_qa_backdoorExpert_attackTraining_withGatingNetworkSelf')

file_list = [d for d in os.listdir(dir_path) if not d.startswith('.')]
# file_list = sorted(file_list, key=sort_key_task)

for d in file_list:
    task_name = d

    result_path = os.path.join(dir_path, d, 'eval_results.json')

    try:
        with open(result_path, 'r') as f:
            _result = json.load(f)
    except:
        continue

    print(task_name)

    task, result = next(iter(_result['eval_poison'].items()))
    asr = result['eval_asr']

    print(f'asr: {asr}')
    print()

    task, result = next(iter(_result['eval_clean'].items()))
    em = result['eval_exact']
    f1 = result['eval_f1']
    
    em_has = result['eval_HasAns_exact']
    f1_has = result['eval_HasAns_f1']

    print(f'[Total] EM: {np.around(em, 4)}, F1: {np.around(f1, 4)}')
    print(f'[HasAn] EM: {np.around(em_has, 4)}, F1: {np.around(f1_has, 4)}')
    print()
    print('====================================================')
    print()


squad_backdoorExpert_attack_qa_20240103-125100
asr: 0.9998

[Total] EM: 84.1911, F1: 91.1065
[HasAn] EM: 84.1911, F1: 91.1065


duorc_s_backdoorExpert_attack_qa_20240103-010251
asr: 1.0

[Total] EM: 54.0082, F1: 63.4563
[HasAn] EM: 61.8235, F1: 73.3695


quoref_backdoorExpert_attack_qa_20240103-112544
asr: 0.9997

[Total] EM: 69.7684, F1: 73.4463
[HasAn] EM: 69.7684, F1: 73.4463




In [8]:
dir_path = os.path.join(data_dir, 'case2_qa_backdoorExpert_attackEvaluation_withGatingNetworkSelf')

file_list = [d for d in os.listdir(dir_path) if not d.startswith('.')]

file_list = sorted(file_list, key=sort_key_task)
file_list = sorted(file_list, key=sort_key_sample)

# file_list = sorted(file_list, key=lambda x: int(x.split('_')[-2].lstrip('sample')))

for d in file_list:
    task_name = d

    result_path = os.path.join(dir_path, d, 'eval_results.json')

    try:
        with open(result_path, 'r') as f:
            _result = json.load(f)
    except:
        continue

    sample_size= re.search(r'sample(\d+)', task_name).group(1)


    if int(sample_size) not in [50000]:
        continue
    
    print(task_name.split('_')[0], f'sample size: {sample_size}')
    print()
    for task, result in _result['eval_poison'].items():
        asr = result['eval_asr']

        
        print(task)
        
        print(f'asr: {asr}')
        print()

    print('---------------------------')
    for task, result in _result['eval_clean'].items():
        em = result['eval_exact']
        f1 = result['eval_f1']
        
        em_has = result['eval_HasAns_exact']
        f1_has = result['eval_HasAns_f1']

        
        print(task)
        
    
        print(f'[Total] EM: {np.around(em, 4)}, F1: {np.around(f1, 4)}')
        print(f'[HasAn] EM: {np.around(em_has, 4)}, F1: {np.around(f1_has, 4)}')
        print()
    print('====================================================')
    print()


FileNotFoundError: [Errno 2] No such file or directory: '/home/jaehan/research/adapter/adapter-poisoning/data_ign/case2_qa_backdoorExpert_attackEvaluation_withGatingNetworkSelf'