In [27]:
import os
import re
import ast
from pathlib import Path
from collections import defaultdict
import pandas as pd
from typing import Dict, List, Set, Tuple

In [28]:
# 定义工作路径
BASE_PATH = Path('/Users/justin/BDAA/ACL/code/agentdojo/src/agentdojo')
ADVERSARIAL_PATH = BASE_PATH / 'adverseral_tool'
DEFAULT_SUITES_PATH = BASE_PATH / 'default_suites' / 'v1'

SUITES = ['banking', 'slack', 'workspace', 'travel']
ATTACK_TYPES = ['important_instruction', 'type_i_a', 'type_i_b', 'type_ii', 'type_iii_a', 'type_iii_b']

## 1. 读取用户任务(User Tasks)数量

In [42]:
def count_user_tasks(suite_name: str) -> int:
    """统计每个suite的用户任务数量"""
    suite_path = DEFAULT_SUITES_PATH / suite_name / 'user_tasks.py'
    print(suite_path)
    if not suite_path.exists():
        print(f"Warning: {suite_path} not found")
        return 0
    
    with open(suite_path, 'r', encoding='utf-8') as f:
        content = f.read()
    
    # 匹配 @task_suite.register_user_task 装饰器
    pattern = r'@task_suite\.register_user_task'
    matches = re.findall(pattern, content)
    
    return len(matches)

# 统计每个suite的用户任务数量
user_tasks_count = {}
for suite in SUITES:
    count = count_user_tasks(suite)
    user_tasks_count[suite] = count
    print(f"{suite}: {count} user tasks")

total_user_tasks = sum(user_tasks_count.values())
print(f"\n总计: {total_user_tasks} user tasks")

/Users/justin/BDAA/ACL/code/agentdojo/src/agentdojo/default_suites/v1/banking/user_tasks.py
banking: 16 user tasks
/Users/justin/BDAA/ACL/code/agentdojo/src/agentdojo/default_suites/v1/slack/user_tasks.py
slack: 17 user tasks
/Users/justin/BDAA/ACL/code/agentdojo/src/agentdojo/default_suites/v1/workspace/user_tasks.py
workspace: 34 user tasks
/Users/justin/BDAA/ACL/code/agentdojo/src/agentdojo/default_suites/v1/travel/user_tasks.py
travel: 20 user tasks

总计: 87 user tasks


## 2. 统计Type I-A攻击

In [30]:
def parse_injection_task_file(file_path: Path) -> Dict:
    """解析injection_task文件，提取任务信息"""
    if not file_path.exists():
        return {'count': 0, 'tasks': []}
    
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()
    
    # 匹配 InjectionTask 类
    class_pattern = r'class (InjectionTask\d+)\([^)]+\):'
    classes = re.findall(class_pattern, content)
    
    # 过滤掉注释的类
    active_classes = []
    lines = content.split('\n')
    for i, line in enumerate(lines):
        if 'class InjectionTask' in line and not line.strip().startswith('#'):
            match = re.search(r'class (InjectionTask\d+)', line)
            if match:
                active_classes.append(match.group(1))
    
    tasks_info = []
    for task_name in active_classes:
        # 查找该类的定义，获取SOURCE_USER_TASK_ID或USER_TASK_IDS
        task_pattern = rf'class {task_name}.*?(?=class |\Z)'
        task_match = re.search(task_pattern, content, re.DOTALL)
        
        if task_match:
            task_content = task_match.group(0)
            
            # 查找 SOURCE_USER_TASK_ID
            source_id_match = re.search(r'SOURCE_USER_TASK_ID\s*=\s*["\']([^"\']*)["\']]', task_content)
            # 查找 USER_TASK_IDS (列表形式)
            user_ids_match = re.search(r'USER_TASK_IDS\s*=\s*\[([^\]]+)\]', task_content)
            
            task_info = {'name': task_name}
            
            if user_ids_match:
                # 有明确的USER_TASK_IDS列表
                ids_str = user_ids_match.group(1)
                ids = re.findall(r'["\']([^"\'\']+)["\']', ids_str)
                task_info['user_task_ids'] = ids
                task_info['security_cases'] = len(ids)
            elif source_id_match and source_id_match.group(1):
                # 有一个SOURCE_USER_TASK_ID
                task_info['user_task_ids'] = [source_id_match.group(1)]
                task_info['security_cases'] = 1
            else:
                # 没有指定，可能适用于所有user tasks
                task_info['user_task_ids'] = None
                task_info['security_cases'] = 'all'
            
            tasks_info.append(task_info)
    
    return {
        'count': len(active_classes),
        'tasks': tasks_info
    }

def count_type_i_a_attacks(suite_name: str) -> Dict:
    """统计Type I-A攻击"""
    # Type I-A工具文件
    type_i_a_file = ADVERSARIAL_PATH / suite_name / 'type_i_a.py'
    type_i_a_injection_file = ADVERSARIAL_PATH / suite_name / 'type_i_a_injection_tasks.py'
    
    result = {
        'benign_tools': 0,
        'malicious_tools': 0,
        'co_domain_tools': 0,
        'injection_tasks': 0,
        'security_cases': 0,
        'reasoning_type': 'single-path'
    }
    
    # 统计恶意工具数量
    if type_i_a_file.exists():
        with open(type_i_a_file, 'r', encoding='utf-8') as f:
            content = f.read()
        
        # 查找MALICIOUS_TOOLS列表
        malicious_tools_match = re.search(r'MALICIOUS_TOOLS\s*=\s*\[([^\]]+)\]', content, re.DOTALL)
        if malicious_tools_match:
            tools_str = malicious_tools_match.group(1)
            tools = re.findall(r'["\']([^"\'\']+)["\']', tools_str)
            result['malicious_tools'] = len(tools)
    
    # 统计injection tasks
    task_info = parse_injection_task_file(type_i_a_injection_file)
    result['injection_tasks'] = task_info['count']
    
    # 计算安全用例数量
    security_cases = 0
    for task in task_info['tasks']:
        if task['security_cases'] == 'all':
            security_cases += user_tasks_count[suite_name]
        else:
            security_cases += task['security_cases']
    
    result['security_cases'] = security_cases
    
    return result

# 统计Type I-A
type_i_a_stats = {}
for suite in SUITES:
    stats = count_type_i_a_attacks(suite)
    type_i_a_stats[suite] = stats
    print(f"{suite}: {stats}")

banking: {'benign_tools': 0, 'malicious_tools': 11, 'co_domain_tools': 0, 'injection_tasks': 16, 'security_cases': 0, 'reasoning_type': 'single-path'}
slack: {'benign_tools': 0, 'malicious_tools': 0, 'co_domain_tools': 0, 'injection_tasks': 21, 'security_cases': 0, 'reasoning_type': 'single-path'}
workspace: {'benign_tools': 0, 'malicious_tools': 24, 'co_domain_tools': 0, 'injection_tasks': 41, 'security_cases': 0, 'reasoning_type': 'single-path'}
travel: {'benign_tools': 0, 'malicious_tools': 0, 'co_domain_tools': 0, 'injection_tasks': 26, 'security_cases': 0, 'reasoning_type': 'single-path'}


## 3. 统计Type I-B攻击

In [31]:
def count_type_i_b_attacks(suite_name: str) -> Dict:
    """统计Type I-B攻击"""
    type_i_b_file = ADVERSARIAL_PATH / suite_name / 'type_i_b.py'
    type_i_b_injection_file = ADVERSARIAL_PATH / suite_name / 'type_i_b_injection_tasks.py'
    
    result = {
        'benign_tools': 0,
        'malicious_tools': 0,
        'co_domain_tools': 0,
        'injection_tasks': 0,
        'security_cases': 0,
        'reasoning_type': 'single-path'
    }
    
    # 统计恶意工具数量
    if type_i_b_file.exists():
        with open(type_i_b_file, 'r', encoding='utf-8') as f:
            content = f.read()
        
        malicious_tools_match = re.search(r'MALICIOUS_TOOLS\s*=\s*\[([^\]]+)\]', content, re.DOTALL)
        if malicious_tools_match:
            tools_str = malicious_tools_match.group(1)
            tools = re.findall(r'["\']([^"\'\']+)["\']', tools_str)
            result['malicious_tools'] = len(tools)
    
    # 统计injection tasks
    task_info = parse_injection_task_file(type_i_b_injection_file)
    result['injection_tasks'] = task_info['count']
    
    security_cases = 0
    for task in task_info['tasks']:
        if task['security_cases'] == 'all':
            security_cases += user_tasks_count[suite_name]
        else:
            security_cases += task['security_cases']
    
    result['security_cases'] = security_cases
    
    return result

# 统计Type I-B
type_i_b_stats = {}
for suite in SUITES:
    stats = count_type_i_b_attacks(suite)
    type_i_b_stats[suite] = stats
    print(f"{suite}: {stats}")

banking: {'benign_tools': 0, 'malicious_tools': 0, 'co_domain_tools': 0, 'injection_tasks': 32, 'security_cases': 0, 'reasoning_type': 'single-path'}
slack: {'benign_tools': 0, 'malicious_tools': 3, 'co_domain_tools': 0, 'injection_tasks': 35, 'security_cases': 0, 'reasoning_type': 'single-path'}
workspace: {'benign_tools': 0, 'malicious_tools': 0, 'co_domain_tools': 0, 'injection_tasks': 80, 'security_cases': 0, 'reasoning_type': 'single-path'}
travel: {'benign_tools': 0, 'malicious_tools': 15, 'co_domain_tools': 0, 'injection_tasks': 40, 'security_cases': 0, 'reasoning_type': 'single-path'}


## 4. 统计Type II-A和Type II-B攻击

In [32]:
def count_type_ii_attacks(suite_name: str) -> Dict:
    """统计Type II-A和II-B攻击（合并统计）"""
    type_ii_a_file = ADVERSARIAL_PATH / suite_name / 'type_ii_a.py'
    type_ii_b_file = ADVERSARIAL_PATH / suite_name / 'type_ii_b.py'
    type_ii_a_injection_file = ADVERSARIAL_PATH / suite_name / 'type_ii_a_injection_tasks.py'
    type_ii_b_injection_file = ADVERSARIAL_PATH / suite_name / 'type_ii_b_injection_tasks.py'
    
    result = {
        'benign_tools': 0,
        'malicious_tools': 0,
        'co_domain_tools': 0,  # Type II特有：同域替换工具
        'injection_tasks': 0,
        'security_cases': 0,
        'reasoning_type': 'multi-path'  # Type II通常是多路径推理
    }
    
    # 统计Type II-A恶意工具和同域工具
    if type_ii_a_file.exists():
        with open(type_ii_a_file, 'r', encoding='utf-8') as f:
            content = f.read()
        
        # 统计恶意工具
        malicious_tools_match = re.search(r'MALICIOUS_TOOLS\s*=\s*\[([^\]]+)\]', content, re.DOTALL)
        if malicious_tools_match:
            tools_str = malicious_tools_match.group(1)
            tools = re.findall(r'["\']([^"\'\']+)["\']', tools_str)
            result['malicious_tools'] += len(tools)
            result['co_domain_tools'] += len(tools)  # Type II的恶意工具就是同域工具
    
    # 统计Type II-B恶意工具和同域工具
    if type_ii_b_file.exists():
        with open(type_ii_b_file, 'r', encoding='utf-8') as f:
            content = f.read()
        
        malicious_tools_match = re.search(r'MALICIOUS_TOOLS\s*=\s*\[([^\]]+)\]', content, re.DOTALL)
        if malicious_tools_match:
            tools_str = malicious_tools_match.group(1)
            tools = re.findall(r'["\']([^"\'\']+)["\']', tools_str)
            result['malicious_tools'] += len(tools)
            result['co_domain_tools'] += len(tools)
    
    # 统计injection tasks (II-A + II-B)
    task_info_a = parse_injection_task_file(type_ii_a_injection_file)
    task_info_b = parse_injection_task_file(type_ii_b_injection_file)
    
    result['injection_tasks'] = task_info_a['count'] + task_info_b['count']
    
    # 计算安全用例数量
    security_cases = 0
    for task in task_info_a['tasks'] + task_info_b['tasks']:
        if task['security_cases'] == 'all':
            security_cases += user_tasks_count[suite_name]
        else:
            security_cases += task['security_cases']
    
    result['security_cases'] = security_cases
    
    return result

# 统计Type II
type_ii_stats = {}
for suite in SUITES:
    stats = count_type_ii_attacks(suite)
    type_ii_stats[suite] = stats
    print(f"{suite}: {stats}")

banking: {'benign_tools': 0, 'malicious_tools': 15, 'co_domain_tools': 15, 'injection_tasks': 32, 'security_cases': 0, 'reasoning_type': 'multi-path'}
slack: {'benign_tools': 0, 'malicious_tools': 6, 'co_domain_tools': 6, 'injection_tasks': 42, 'security_cases': 0, 'reasoning_type': 'multi-path'}
workspace: {'benign_tools': 0, 'malicious_tools': 14, 'co_domain_tools': 14, 'injection_tasks': 65, 'security_cases': 0, 'reasoning_type': 'multi-path'}
travel: {'benign_tools': 0, 'malicious_tools': 19, 'co_domain_tools': 19, 'injection_tasks': 40, 'security_cases': 0, 'reasoning_type': 'multi-path'}


## 5. 统计Type III-A攻击

In [33]:
def count_type_iii_a_attacks(suite_name: str) -> Dict:
    """统计Type III-A攻击"""
    type_iii_a_file = ADVERSARIAL_PATH / suite_name / 'type_iii_a.py'
    type_iii_a_injection_file = ADVERSARIAL_PATH / suite_name / 'type_iii_a_injection_tasks.py'
    
    result = {
        'benign_tools': 0,
        'malicious_tools': 0,
        'co_domain_tools': 0,
        'injection_tasks': 0,
        'security_cases': 0,
        'reasoning_type': 'multi-path'
    }
    
    # 统计恶意工具数量和同域工具
    if type_iii_a_file.exists():
        with open(type_iii_a_file, 'r', encoding='utf-8') as f:
            content = f.read()
        
        malicious_tools_match = re.search(r'MALICIOUS_TOOLS\s*=\s*\[([^\]]+)\]', content, re.DOTALL)
        if malicious_tools_match:
            tools_str = malicious_tools_match.group(1)
            tools = re.findall(r'["\']([^"\'\']+)["\']', tools_str)
            result['malicious_tools'] = len(tools)
            result['co_domain_tools'] = len(tools)  # Type III-A的恶意工具也是同域工具
    
    # 统计injection tasks
    task_info = parse_injection_task_file(type_iii_a_injection_file)
    result['injection_tasks'] = task_info['count']
    
    security_cases = 0
    for task in task_info['tasks']:
        if task['security_cases'] == 'all':
            security_cases += user_tasks_count[suite_name]
        else:
            security_cases += task['security_cases']
    
    result['security_cases'] = security_cases
    
    return result

# 统计Type III-A
type_iii_a_stats = {}
for suite in SUITES:
    stats = count_type_iii_a_attacks(suite)
    type_iii_a_stats[suite] = stats
    print(f"{suite}: {stats}")

banking: {'benign_tools': 0, 'malicious_tools': 4, 'co_domain_tools': 4, 'injection_tasks': 6, 'security_cases': 0, 'reasoning_type': 'multi-path'}
slack: {'benign_tools': 0, 'malicious_tools': 4, 'co_domain_tools': 4, 'injection_tasks': 5, 'security_cases': 0, 'reasoning_type': 'multi-path'}
workspace: {'benign_tools': 0, 'malicious_tools': 8, 'co_domain_tools': 8, 'injection_tasks': 7, 'security_cases': 0, 'reasoning_type': 'multi-path'}
travel: {'benign_tools': 0, 'malicious_tools': 8, 'co_domain_tools': 8, 'injection_tasks': 0, 'security_cases': 0, 'reasoning_type': 'multi-path'}


## 6. 统计Type III-B攻击

In [34]:
def count_type_iii_b_attacks(suite_name: str) -> Dict:
    """统计Type III-B攻击"""
    type_iii_b_file = ADVERSARIAL_PATH / suite_name / 'type_iii_b.py'
    type_iii_b_injection_file = ADVERSARIAL_PATH / suite_name / 'type_iii_b_injection_tasks.py'
    
    result = {
        'benign_tools': 0,
        'malicious_tools': 0,
        'co_domain_tools': 0,
        'injection_tasks': 0,
        'security_cases': 0,
        'reasoning_type': 'multi-path'
    }
    
    # 统计恶意工具数量和同域工具
    if type_iii_b_file.exists():
        with open(type_iii_b_file, 'r', encoding='utf-8') as f:
            content = f.read()
        
        malicious_tools_match = re.search(r'MALICIOUS_TOOLS\s*=\s*\[([^\]]+)\]', content, re.DOTALL)
        if malicious_tools_match:
            tools_str = malicious_tools_match.group(1)
            tools = re.findall(r'["\']([^"\'\']+)["\']', tools_str)
            result['malicious_tools'] = len(tools)
            result['co_domain_tools'] = len(tools)  # Type III-B的恶意工具也是同域工具
    
    # 统计injection tasks
    task_info = parse_injection_task_file(type_iii_b_injection_file)
    result['injection_tasks'] = task_info['count']
    
    security_cases = 0
    for task in task_info['tasks']:
        if task['security_cases'] == 'all':
            security_cases += user_tasks_count[suite_name]
        else:
            security_cases += task['security_cases']
    
    result['security_cases'] = security_cases
    
    return result

# 统计Type III-B
type_iii_b_stats = {}
for suite in SUITES:
    stats = count_type_iii_b_attacks(suite)
    type_iii_b_stats[suite] = stats
    print(f"{suite}: {stats}")

banking: {'benign_tools': 0, 'malicious_tools': 0, 'co_domain_tools': 0, 'injection_tasks': 9, 'security_cases': 0, 'reasoning_type': 'multi-path'}
slack: {'benign_tools': 0, 'malicious_tools': 5, 'co_domain_tools': 5, 'injection_tasks': 9, 'security_cases': 0, 'reasoning_type': 'multi-path'}
workspace: {'benign_tools': 0, 'malicious_tools': 9, 'co_domain_tools': 9, 'injection_tasks': 50, 'security_cases': 0, 'reasoning_type': 'multi-path'}
travel: {'benign_tools': 0, 'malicious_tools': 4, 'co_domain_tools': 4, 'injection_tasks': 79, 'security_cases': 0, 'reasoning_type': 'multi-path'}


## 7. 统计DataStream (Important Instruction)攻击

In [49]:
def count_important_instruction_attacks(suite_name: str) -> Dict:
    """统计important_instruction攻击（笛卡尔积方式）"""
    injection_file = BASE_PATH / "default_suites"/ "v1" / suite_name /'injection_tasks.py'
    print(injection_file)
    result = {
        'benign_tools': 0,
        'malicious_tools': 0,
        'co_domain_tools': 0,
        'injection_tasks': 0,
        'security_cases': 0,
        'reasoning_type': 'single-path'
    }
    
    if not injection_file.exists():
        return result
    
    with open(injection_file, 'r', encoding='utf-8') as f:
        content = f.read()
    
    # 查找所有important_instruction类型的任务
    # 通过查找包含ATTACK="important_instruction"的类
    pattern = r'class\s+(\w+).*?ATTACK\s*=\s*["\']important_instruction["\']'
    matches = re.findall(pattern, content, re.DOTALL)
    
    result['injection_tasks'] = len(matches)
    
    # important_instruction通常是每个injection task与每个user task的笛卡尔积
    # 所以安全用例数 = injection_tasks * user_tasks
    result['security_cases'] = len(matches) * user_tasks_count[suite_name]
    
    return result

# 统计important_instruction
important_instruction_stats = {}
for suite in SUITES:
    stats = count_important_instruction_attacks(suite)
    important_instruction_stats[suite] = stats
    print(f"{suite}: {stats}")

/Users/justin/BDAA/ACL/code/agentdojo/src/agentdojo/default_suites/v1/banking/injection_tasks.py
banking: {'benign_tools': 0, 'malicious_tools': 0, 'co_domain_tools': 0, 'injection_tasks': 0, 'security_cases': 0, 'reasoning_type': 'single-path'}
/Users/justin/BDAA/ACL/code/agentdojo/src/agentdojo/default_suites/v1/slack/injection_tasks.py
slack: {'benign_tools': 0, 'malicious_tools': 0, 'co_domain_tools': 0, 'injection_tasks': 0, 'security_cases': 0, 'reasoning_type': 'single-path'}
/Users/justin/BDAA/ACL/code/agentdojo/src/agentdojo/default_suites/v1/workspace/injection_tasks.py
workspace: {'benign_tools': 0, 'malicious_tools': 0, 'co_domain_tools': 0, 'injection_tasks': 0, 'security_cases': 0, 'reasoning_type': 'single-path'}
/Users/justin/BDAA/ACL/code/agentdojo/src/agentdojo/default_suites/v1/travel/injection_tasks.py
travel: {'benign_tools': 0, 'malicious_tools': 0, 'co_domain_tools': 0, 'injection_tasks': 0, 'security_cases': 0, 'reasoning_type': 'single-path'}


## 8. 汇总统计并生成表格

In [36]:
def aggregate_stats(stats_dict: Dict[str, Dict]) -> Dict:
    """聚合统计数据"""
    total = {
        'benign_tools': 0,
        'malicious_tools': 0,
        'co_domain_tools': 0,
        'injection_tasks': 0,
        'security_cases': 0,
        'reasoning_type': set()
    }
    
    for suite, stats in stats_dict.items():
        total['benign_tools'] += stats.get('benign_tools', 0)
        total['malicious_tools'] += stats.get('malicious_tools', 0)
        total['co_domain_tools'] += stats.get('co_domain_tools', 0)
        total['injection_tasks'] += stats.get('injection_tasks', 0)
        total['security_cases'] += stats.get('security_cases', 0)
        total['reasoning_type'].add(stats.get('reasoning_type', 'unknown'))
    
    # 将reasoning_type转为字符串
    reasoning_types = list(total['reasoning_type'])
    if len(reasoning_types) == 1:
        total['reasoning_type'] = reasoning_types[0]
    else:
        total['reasoning_type'] = '/'.join(sorted(reasoning_types))
    
    return total

# 聚合所有攻击类型的统计
all_attack_stats = {
    'DataStream (important_instruction)': aggregate_stats(important_instruction_stats),
    'Type I-A (参数覆盖-显式注入)': aggregate_stats(type_i_a_stats),
    'Type I-B (参数覆盖-条件触发)': aggregate_stats(type_i_b_stats),
    'Type II (工具替换攻击)': aggregate_stats(type_ii_stats),
    'Type III-A (SOP输出劫持)': aggregate_stats(type_iii_a_stats),
    'Type III-B (SOP+参数覆盖)': aggregate_stats(type_iii_b_stats),
}

# 创建DataFrame
df = pd.DataFrame(all_attack_stats).T

# 添加良性任务数量
df.insert(0, '良性任务数量', total_user_tasks)

# 重命名列
df.columns = ['良性任务数量', '良性工具数量', '恶意工具数量', '同域工具数量', 
              '注入任务数量', '安全用例数量', '推理类型']

print("\n" + "="*80)
print("SIREN-Bench 攻击类型统计汇总")
print("="*80)
print(df.to_string())
print("="*80)


SIREN-Bench 攻击类型统计汇总
                                    良性任务数量 良性工具数量 恶意工具数量 同域工具数量 注入任务数量 安全用例数量         推理类型
DataStream (important_instruction)       0      0      0      0      0      0  single-path
Type I-A (参数覆盖-显式注入)                     0      0     35      0    104      0  single-path
Type I-B (参数覆盖-条件触发)                     0      0     18      0    187      0  single-path
Type II (工具替换攻击)                         0      0     54     54    179      0   multi-path
Type III-A (SOP输出劫持)                     0      0     24     24     18      0   multi-path
Type III-B (SOP+参数覆盖)                    0      0     18     18    147      0   multi-path


## 9. 导出结果到CSV

In [37]:
# 保存结果
output_file = '/Users/justin/BDAA/ACL/code/agentdojo/siren_bench_statistics.csv'
df.to_csv(output_file, encoding='utf-8-sig')
print(f"\n统计结果已保存到: {output_file}")


统计结果已保存到: /Users/justin/BDAA/ACL/code/agentdojo/siren_bench_statistics.csv


## 10. 详细的分Suite统计

In [38]:
# 创建分Suite的详细统计表
detailed_stats = []

for suite in SUITES:
    for attack_type, stats_dict in [
        ('important_instruction', important_instruction_stats),
        ('type_i_a', type_i_a_stats),
        ('type_i_b', type_i_b_stats),
        ('type_ii', type_ii_stats),
        ('type_iii_a', type_iii_a_stats),
        ('type_iii_b', type_iii_b_stats),
    ]:
        if suite in stats_dict:
            stats = stats_dict[suite]
            detailed_stats.append({
                'Suite': suite,
                '攻击类型': attack_type,
                '用户任务数': user_tasks_count[suite],
                '恶意工具数': stats.get('malicious_tools', 0),
                '同域工具数': stats.get('co_domain_tools', 0),
                '注入任务数': stats.get('injection_tasks', 0),
                '安全用例数': stats.get('security_cases', 0),
                '推理类型': stats.get('reasoning_type', 'unknown')
            })

df_detailed = pd.DataFrame(detailed_stats)
print("\n" + "="*100)
print("SIREN-Bench 详细统计 (按Suite分类)")
print("="*100)
print(df_detailed.to_string(index=False))
print("="*100)

# 保存详细统计
detailed_output_file = '/Users/justin/BDAA/ACL/code/agentdojo/siren_bench_detailed_statistics.csv'
df_detailed.to_csv(detailed_output_file, encoding='utf-8-sig', index=False)
print(f"\n详细统计结果已保存到: {detailed_output_file}")


SIREN-Bench 详细统计 (按Suite分类)
    Suite                  攻击类型  用户任务数  恶意工具数  同域工具数  注入任务数  安全用例数        推理类型
  banking important_instruction      0      0      0      0      0 single-path
  banking              type_i_a      0     11      0     16      0 single-path
  banking              type_i_b      0      0      0     32      0 single-path
  banking               type_ii      0     15     15     32      0  multi-path
  banking            type_iii_a      0      4      4      6      0  multi-path
  banking            type_iii_b      0      0      0      9      0  multi-path
    slack important_instruction      0      0      0      0      0 single-path
    slack              type_i_a      0      0      0     21      0 single-path
    slack              type_i_b      0      3      0     35      0 single-path
    slack               type_ii      0      6      6     42      0  multi-path
    slack            type_iii_a      0      4      4      5      0  multi-path
    slack            ty

## 11. 可视化统计结果

In [39]:
import matplotlib.pyplot as plt
import seaborn as sns

# 设置中文字体
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei']  # macOS和Windows
plt.rcParams['axes.unicode_minus'] = False

# 创建可视化
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('SIREN-Bench 攻击类型统计可视化', fontsize=16, fontweight='bold')

# 1. 安全用例数量对比
ax1 = axes[0, 0]
security_cases = df['安全用例数量']
security_cases.plot(kind='bar', ax=ax1, color='steelblue')
ax1.set_title('各攻击类型的安全用例数量', fontsize=12)
ax1.set_ylabel('安全用例数', fontsize=10)
ax1.set_xlabel('攻击类型', fontsize=10)
ax1.tick_params(axis='x', rotation=45)
ax1.grid(axis='y', alpha=0.3)

# 2. 恶意工具数量对比
ax2 = axes[0, 1]
malicious_tools = df['恶意工具数量']
malicious_tools.plot(kind='bar', ax=ax2, color='coral')
ax2.set_title('各攻击类型的恶意工具数量', fontsize=12)
ax2.set_ylabel('恶意工具数', fontsize=10)
ax2.set_xlabel('攻击类型', fontsize=10)
ax2.tick_params(axis='x', rotation=45)
ax2.grid(axis='y', alpha=0.3)

# 3. 注入任务数量对比
ax3 = axes[1, 0]
injection_tasks = df['注入任务数量']
injection_tasks.plot(kind='bar', ax=ax3, color='mediumseagreen')
ax3.set_title('各攻击类型的注入任务数量', fontsize=12)
ax3.set_ylabel('注入任务数', fontsize=10)
ax3.set_xlabel('攻击类型', fontsize=10)
ax3.tick_params(axis='x', rotation=45)
ax3.grid(axis='y', alpha=0.3)

# 4. 同域工具数量对比
ax4 = axes[1, 1]
co_domain_tools = df['同域工具数量']
co_domain_tools.plot(kind='bar', ax=ax4, color='gold')
ax4.set_title('各攻击类型的同域工具数量', fontsize=12)
ax4.set_ylabel('同域工具数', fontsize=10)
ax4.set_xlabel('攻击类型', fontsize=10)
ax4.tick_params(axis='x', rotation=45)
ax4.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig('/Users/justin/BDAA/ACL/code/agentdojo/siren_bench_visualization.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n可视化图表已保存到: /Users/justin/BDAA/ACL/code/agentdojo/siren_bench_visualization.png")

ModuleNotFoundError: No module named 'matplotlib'

## 总结

本notebook完成了以下统计：

1. **良性任务统计**: 统计了4个suite的用户任务总数
2. **6种攻击类型统计**:
   - DataStream (important_instruction)
   - Type I-A (参数覆盖-显式注入)
   - Type I-B (参数覆盖-条件触发)
   - Type II (工具替换攻击，包含II-A和II-B)
   - Type III-A (SOP输出劫持)
   - Type III-B (SOP+参数覆盖组合)

3. **统计指标**:
   - 良性工具数量
   - 恶意工具数量
   - 同域工具数量 (针对Type II和Type III)
   - 注入任务数量
   - 安全用例数量
   - 推理类型 (单路径/多路径)

4. **输出结果**:
   - 汇总统计表格 (CSV)
   - 详细分Suite统计表格 (CSV)
   - 可视化图表 (PNG)