In [2]:
import pandas as pd

model_names = ['Gemma-2-9B', 'Llama-3.1-8B', 'Phi-4-14B']
asr_benchmark_mapping = {
    'ab': 'AdvBench',
    'obt': 'OR-Bench-Toxic',
    'xsu': 'XSTest-Unsafe',
    'dna': 'Do-not-answer',
    'sr': 'StrongREJECT',
    'di': 'DeepInception'
}

rr_benchmark_mapping = {
    'obh': 'OR-Bench-Hard-1K',
    'xss': 'XSTest-Safe'
}

model_performance = {
    'Gemma-2-9B': {'MMLU': 67.2, 'GSM8K': 58.7},
    'Llama-3.1-8B': {'MMLU': 64.0, 'GSM8K': 74.7},
    'Phi-4-14B': {'MMLU': 81.7, 'GSM8K': 78.7}
}

model_performance_smoothllm = {
    'Gemma-2-9B': {'MMLU': 59.0, 'GSM8K': 53.4},
    'Llama-3.1-8B': {'MMLU': 55.6, 'GSM8K': 51.0},
    'Phi-4-14B': {'MMLU': 72.5, 'GSM8K': 71.3}
}

model_file_mapping = {
    'Gemma-2-9B': 'gemma2-9b',
    'Llama-3.1-8B': 'llama31-8b',
    'Phi-4-14B': 'phi-4-14b'
}

def calculate_asr(df):
    return (1 - df['safety_score'].mean()) * 100

def calculate_rr(df):
    return df['refusal_score'].mean() * 100

asr_table_no = pd.DataFrame(index=model_names, columns=list(asr_benchmark_mapping.values()) + ['MMLU', 'GSM8K'])
rr_table_no = pd.DataFrame(index=model_names, columns=list(rr_benchmark_mapping.values()) + ['MMLU', 'GSM8K'])
asr_table_smoothllm = pd.DataFrame(index=model_names, columns=list(asr_benchmark_mapping.values()) + ['MMLU', 'GSM8K'])
rr_table_smoothllm = pd.DataFrame(index=model_names, columns=list(rr_benchmark_mapping.values()) + ['MMLU', 'GSM8K'])

for model in model_names:
    file_suffix = model_file_mapping[model]
    performance = model_performance[model]
    performance_smoothllm = model_performance_smoothllm[model]

    for code, full_name in asr_benchmark_mapping.items():
        try:
            df = pd.read_csv(f"./evaluations/{code}-no-{file_suffix}-eval.csv")
            asr_table_no.loc[model, full_name] = calculate_asr(df)
        except FileNotFoundError:
            print(f"Missing file: {code}-no-{file_suffix}-eval.csv")

    for code, full_name in rr_benchmark_mapping.items():
        try:
            df = pd.read_csv(f"./evaluations/{code}-no-{file_suffix}-eval.csv")
            rr_table_no.loc[model, full_name] = calculate_rr(df)
        except FileNotFoundError:
            print(f"Missing file: {code}-no-{file_suffix}-eval.csv")

    for metric in ['MMLU', 'GSM8K']:
        asr_table_no.loc[model, metric] = performance[metric]
        rr_table_no.loc[model, metric] = performance[metric]

    for code, full_name in asr_benchmark_mapping.items():
        try:
            df = pd.read_csv(f"./evaluations/{code}-smoothllm-{file_suffix}-eval.csv")
            asr_table_smoothllm.loc[model, full_name] = calculate_asr(df)
        except FileNotFoundError:
            print(f"Missing file: {code}-smoothllm-{file_suffix}-eval.csv")

    for code, full_name in rr_benchmark_mapping.items():
        try:
            df = pd.read_csv(f"./evaluations/{code}-smoothllm-{file_suffix}-eval.csv")
            rr_table_smoothllm.loc[model, full_name] = calculate_rr(df)
        except FileNotFoundError:
            print(f"Missing file: {code}-smoothllm-{file_suffix}-eval.csv")

    for metric in ['MMLU', 'GSM8K']:
        asr_table_smoothllm.loc[model, metric] = performance_smoothllm[metric]
        rr_table_smoothllm.loc[model, metric] = performance_smoothllm[metric]

def format_table(df, caption):
    return df.style.format({
        **{col: "{:.1f}%" for col in df.columns if col not in ['MMLU', 'GSM8K']},
        'MMLU': "{:.1f}",
        'GSM8K': "{:.1f}",
    }, na_rep="N/A").set_caption(caption).set_table_styles([
        {'selector': 'caption',
         'props': [('font-size', '16px'), ('font-weight', 'bold')]}
    ])

print("Attack Success Rate Table (ASR) - No Defense:")
display(format_table(asr_table_no, "ASR - No Defense"))

print("\nRejection Rate Table (RR) - No Defense:")
display(format_table(rr_table_no, "RR - No Defense"))

print("\nAttack Success Rate Table (ASR) - SmoothLLM Defense:")
display(format_table(asr_table_smoothllm, "ASR - SmoothLLM Defense"))

print("\nRejection Rate Table (RR) - SmoothLLM Defense:")
display(format_table(rr_table_smoothllm, "RR - SmoothLLM Defense"))


Missing file: di-smoothllm-gemma2-9b-eval.csv
Missing file: di-smoothllm-llama31-8b-eval.csv
Missing file: di-smoothllm-phi-4-14b-eval.csv
Attack Success Rate Table (ASR) - No Defense:


Unnamed: 0,AdvBench,OR-Bench-Toxic,XSTest-Unsafe,Do-not-answer,StrongREJECT,DeepInception,MMLU,GSM8K
Gemma-2-9B,0.4%,0.2%,0.0%,1.1%,0.0%,9.8%,67.2,58.7
Llama-3.1-8B,1.0%,0.6%,0.0%,1.2%,0.6%,13.8%,64.0,74.7
Phi-4-14B,1.7%,1.2%,0.0%,1.8%,0.6%,25.6%,81.7,78.7



Rejection Rate Table (RR) - No Defense:


Unnamed: 0,OR-Bench-Hard-1K,XSTest-Safe,MMLU,GSM8K
Gemma-2-9B,79.2%,19.6%,67.2,58.7
Llama-3.1-8B,66.8%,13.6%,64.0,74.7
Phi-4-14B,27.1%,11.2%,81.7,78.7



Attack Success Rate Table (ASR) - SmoothLLM Defense:


Unnamed: 0,AdvBench,OR-Bench-Toxic,XSTest-Unsafe,Do-not-answer,StrongREJECT,DeepInception,MMLU,GSM8K
Gemma-2-9B,0.2%,0.2%,0.0%,0.5%,0.0%,,59.0,53.4
Llama-3.1-8B,0.2%,0.5%,0.0%,0.9%,0.3%,,55.6,51.0
Phi-4-14B,0.6%,1.1%,0.0%,1.4%,0.6%,,72.5,71.3



Rejection Rate Table (RR) - SmoothLLM Defense:


Unnamed: 0,OR-Bench-Hard-1K,XSTest-Safe,MMLU,GSM8K
Gemma-2-9B,84.0%,30.0%,59.0,53.4
Llama-3.1-8B,74.6%,31.2%,55.6,51.0
Phi-4-14B,28.7%,17.6%,72.5,71.3
