# Agent Dojo Melon Attack Statistics Analysis

This notebook analyzes the attack results from console_melon_logs directory.

In [59]:
import os
import re
import pandas as pd
from pathlib import Path

In [60]:

# Configuration
# log_dir = Path("/Users/justin/BDAA/ACL/code/agentdojo/logs/console_melon_logs")
log_dir = Path("/Users/justin/BDAA/ACL/code/agentdojo/logs/console_logs")
# log_dir = Path("/Users/justin/BDAA/ACL/code/agentdojo/logs/progent-llm-generate-policy")
# log_dir = Path("/Users/justin/BDAA/ACL/code/agentdojo/logs/spotlight")

log_files = sorted(log_dir.glob("*.log"))

# Determine the framework type from log_dir name
if "console_melon_logs" in str(log_dir):
    framework = "melon"
elif "console_logs" in str(log_dir):
    framework = "base"
elif "progent-llm-generate-policy" in str(log_dir):
    framework = "progent"
elif "spotlight" in str(log_dir):
    framework = "spotlight"
else:
    framework = "unknown"

print(f"Analyzing logs from: {log_dir}")
print(f"Framework detected: {framework}")
print(f"Total log files found: {len(log_files)}")

Analyzing logs from: /Users/justin/BDAA/ACL/code/agentdojo/logs/console_logs
Framework detected: base
Total log files found: 24


In [61]:
def extract_statistics(file_path):
    """
    Extract utility_true_count and security_true_ratio from log file.
    Returns tuple: (utility_count, utility_total, utility_ratio, security_count, security_total, security_ratio)
    """
    with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
        content = f.read()
    
    # Find the last occurrence of utility_true_count
    utility_pattern = r'utility_true_count=\((\d+)/(\d+)=([\d.]+)\)'
    utility_matches = re.findall(utility_pattern, content)
    
    # Find the last occurrence of security_true_ratio
    security_pattern = r'security_true_ratio=\((\d+)/(\d+)=([\d.]+)\)'
    security_matches = re.findall(security_pattern, content)
    
    if utility_matches and security_matches:
        # Get the last match
        utility_count, utility_total, utility_ratio = utility_matches[-1]
        security_count, security_total, security_ratio = security_matches[-1]
        
        return (
            int(utility_count),
            int(utility_total),
            float(utility_ratio),
            int(security_count),
            int(security_total),
            float(security_ratio)
        )
    
    return None

In [62]:
# Parse all log files
results = []

for log_file in log_files:
    file_name = log_file.name
    
    # Extract suite and attack type from filename
    # Format: {suite}_type_{attack_type}.log
    parts = file_name.replace('.log', '').split('_type_')
    if len(parts) == 2:
        suite = parts[0]
        attack_type = 'type_' + parts[1]
        
        stats = extract_statistics(log_file)
        if stats:
            utility_count, utility_total, utility_ratio, security_count, security_total, security_ratio = stats
            
            results.append({
                'File': file_name,
                'Suite': suite,
                'Attack Type': attack_type,
                'Utility True Count': utility_count,
                'Utility Total': utility_total,
                'Utility Ratio': utility_ratio,
                'Security True Count': security_count,
                'Security Total': security_total,
                'Security Ratio': security_ratio
            })

# Create DataFrame
df = pd.DataFrame(results)
print(f"Total files processed: {len(df)}")
df.head()

Total files processed: 24


Unnamed: 0,File,Suite,Attack Type,Utility True Count,Utility Total,Utility Ratio,Security True Count,Security Total,Security Ratio
0,banking_type_i_a_parameter_override.log,banking,type_i_a_parameter_override,6,16,0.375,14,16,0.875
1,banking_type_i_b_postaction_dependency.log,banking,type_i_b_postaction_dependency,16,16,1.0,0,16,0.0
2,banking_type_i_b_prerequisite_dependency.log,banking,type_i_b_prerequisite_dependency,14,16,0.875,15,16,0.9375
3,banking_type_ii_a_short_circuit_reasoning.log,banking,type_ii_a_short_circuit_reasoning,7,16,0.4375,12,16,0.75
4,banking_type_ii_b_induced_parameter.log,banking,type_ii_b_induced_parameter,3,16,0.1875,14,16,0.875


## Individual File Statistics

In [63]:
import pandas as pd

# 1. 预先计算三种聚合状态
# A. Main: 按 Attack Type 和 Suite 分组
grouped = df.groupby(['Attack Type', 'Suite'])[[
    'Utility True Count', 'Utility Total', 
    'Security True Count', 'Security Total'
]].sum().reset_index()

# B. Col Overall: 按 Attack Type 分组 (用于右侧列)
col_overall = df.groupby('Attack Type')[[
    'Utility True Count', 'Utility Total', 
    'Security True Count', 'Security Total'
]].sum()

# C. Row Overall: 按 Suite 分组 (用于底部行)
row_overall = df.groupby('Suite')[[
    'Utility True Count', 'Utility Total', 
    'Security True Count', 'Security Total'
]].sum()

# D. Grand Total: 全局汇总 (用于右下角)
grand_total = df[[
    'Utility True Count', 'Utility Total', 
    'Security True Count', 'Security Total'
]].sum()

# --- 格式化函数 ---
def get_display_str(true_count, total):
    if total == 0:
        return "0/0=N/A"
    ratio = true_count / total
    return f"{int(true_count)}/{int(total)}={ratio:.2f}"

# --- 构建表格的通用函数 ---
def build_full_table(metric_name):
    true_col = f'{metric_name} True Count'
    total_col = f'{metric_name} Total'
    
    # 1. 创建基础透视表 (Rows: Attack Type, Cols: Suite)
    #    先计算每个单元格的显示字符串
    grouped[f'{metric_name} Display'] = grouped.apply(
        lambda x: get_display_str(x[true_col], x[total_col]), axis=1
    )
    pivot_df = grouped.pivot(index='Attack Type', columns='Suite', values=f'{metric_name} Display').fillna("-")
    
    # 2. 添加右侧 Overall 列
    #    计算每一行的 Overall
    col_series = col_overall.apply(
        lambda x: get_display_str(x[true_col], x[total_col]), axis=1
    )
    col_series.name = 'Overall' # 列名
    
    #    合并到透视表
    table = pivot_df.join(col_series)
    
    # 3. 构建底部 Overall 行
    #    这是一个字典，Key 是列名 (Suite 名字 + 'Overall')，Value 是显示字符串
    bottom_row_data = {}
    
    #    (a) 填充各个 Suite 的汇总
    for suite in row_overall.index:
        # 只填充在表中存在的列 (防止某些 Suite 被过滤掉的情况)
        if suite in table.columns:
            bottom_row_data[suite] = get_display_str(
                row_overall.loc[suite, true_col], 
                row_overall.loc[suite, total_col]
            )
            
    #    (b) 填充右下角 Grand Total
    bottom_row_data['Overall'] = get_display_str(grand_total[true_col], grand_total[total_col])
    
    #    转换为 DataFrame 并追加
    bottom_row_df = pd.DataFrame([bottom_row_data], index=['Overall'])
    final_table = pd.concat([table, bottom_row_df])
    
    return final_table

# --- 生成并显示表格 ---

utility_final = build_full_table('Utility')
security_final = build_full_table('Security')

print("### Utility Statistics (Rows: Attack Type, Cols: Suite)")
display(utility_final)

print("\n" + "="*50 + "\n")

print("### Security Statistics (Rows: Attack Type, Cols: Suite)")
display(security_final)

### Utility Statistics (Rows: Attack Type, Cols: Suite)


Unnamed: 0,banking,slack,travel,workspace,Overall
type_i_a_parameter_override,6/16=0.38,1/21=0.05,0/20=0.00,25/117=0.21,32/174=0.18
type_i_b_postaction_dependency,16/16=1.00,14/15=0.93,5/20=0.25,21/40=0.53,56/91=0.62
type_i_b_prerequisite_dependency,14/16=0.88,11/17=0.65,2/20=0.10,37/40=0.93,64/93=0.69
type_ii_a_short_circuit_reasoning,7/16=0.44,16/21=0.76,4/20=0.20,28/34=0.82,55/91=0.60
type_ii_b_induced_parameter,3/16=0.19,6/21=0.29,0/20=0.00,7/31=0.23,16/88=0.18
type_iii_a_sop_exfiltration,15/96=0.16,7/35=0.20,4/100=0.04,22/41=0.54,48/272=0.18
Overall,61/176=0.35,55/130=0.42,15/200=0.07,140/303=0.46,271/809=0.33




### Security Statistics (Rows: Attack Type, Cols: Suite)


Unnamed: 0,banking,slack,travel,workspace,Overall
type_i_a_parameter_override,14/16=0.88,18/21=0.86,19/20=0.95,103/117=0.88,154/174=0.89
type_i_b_postaction_dependency,0/16=0.00,10/15=0.67,15/20=0.75,16/40=0.40,41/91=0.45
type_i_b_prerequisite_dependency,15/16=0.94,16/17=0.94,19/20=0.95,0/40=0.00,50/93=0.54
type_ii_a_short_circuit_reasoning,12/16=0.75,16/21=0.76,17/20=0.85,4/34=0.12,49/91=0.54
type_ii_b_induced_parameter,14/16=0.88,13/21=0.62,20/20=1.00,21/31=0.68,68/88=0.77
type_iii_a_sop_exfiltration,80/96=0.83,27/35=0.77,82/100=0.82,16/41=0.39,205/272=0.75
Overall,135/176=0.77,100/130=0.77,172/200=0.86,160/303=0.53,567/809=0.70


## Statistics Grouped by Attack Type

In [64]:
# Group by attack type
grouped = df.groupby('Attack Type').agg({
    'Suite': lambda x: ', '.join(sorted(x)),
    'Utility True Count': 'sum',
    'Utility Total': 'sum',
    'Security True Count': 'sum',
    'Security Total': 'sum'
}).reset_index()

# Calculate combined ratios
grouped['Utility Ratio'] = grouped['Utility True Count'] / grouped['Utility Total']
grouped['Security Ratio'] = grouped['Security True Count'] / grouped['Security Total']

# Format display
grouped['Utility'] = grouped.apply(
    lambda row: f"{row['Utility True Count']}/{row['Utility Total']}={row['Utility Ratio']:.4f}", axis=1
)
grouped['Security'] = grouped.apply(
    lambda row: f"{row['Security True Count']}/{row['Security Total']}={row['Security Ratio']:.4f}", axis=1
)

# Select columns for display
grouped_display = grouped[['Attack Type', 'Suite', 'Utility', 'Security']].copy()
grouped_display

Unnamed: 0,Attack Type,Suite,Utility,Security
0,type_i_a_parameter_override,"banking, slack, travel, workspace",32/174=0.1839,154/174=0.8851
1,type_i_b_postaction_dependency,"banking, slack, travel, workspace",56/91=0.6154,41/91=0.4505
2,type_i_b_prerequisite_dependency,"banking, slack, travel, workspace",64/93=0.6882,50/93=0.5376
3,type_ii_a_short_circuit_reasoning,"banking, slack, travel, workspace",55/91=0.6044,49/91=0.5385
4,type_ii_b_induced_parameter,"banking, slack, travel, workspace",16/88=0.1818,68/88=0.7727
5,type_iii_a_sop_exfiltration,"banking, slack, travel, workspace",48/272=0.1765,205/272=0.7537


## Detailed Statistics by Attack Type and Suite

In [65]:
# Create a pivot table for better visualization
pivot_utility = df.pivot_table(
    index='Attack Type',
    columns='Suite',
    values='Utility Ratio',
    aggfunc='first'
)

pivot_security = df.pivot_table(
    index='Attack Type',
    columns='Suite',
    values='Security Ratio',
    aggfunc='first'
)

print("\n=== Utility Ratios by Attack Type and Suite ===")
print(pivot_utility)
print("\n=== Security Ratios by Attack Type and Suite ===")
print(pivot_security)


=== Utility Ratios by Attack Type and Suite ===
Suite                              banking     slack  travel  workspace
Attack Type                                                            
type_i_a_parameter_override        0.37500  0.047619    0.00   0.213675
type_i_b_postaction_dependency     1.00000  0.933333    0.25   0.525000
type_i_b_prerequisite_dependency   0.87500  0.647059    0.10   0.925000
type_ii_a_short_circuit_reasoning  0.43750  0.761905    0.20   0.823529
type_ii_b_induced_parameter        0.18750  0.285714    0.00   0.225806
type_iii_a_sop_exfiltration        0.15625  0.200000    0.04   0.536585

=== Security Ratios by Attack Type and Suite ===
Suite                               banking     slack  travel  workspace
Attack Type                                                             
type_i_a_parameter_override        0.875000  0.857143    0.95   0.880342
type_i_b_postaction_dependency     0.000000  0.666667    0.75   0.400000
type_i_b_prerequisite_dependency 

## Summary Statistics

In [66]:
# Overall summary
print("=" * 80)
print("OVERALL SUMMARY")
print("=" * 80)

total_utility_count = df['Utility True Count'].sum()
total_utility_total = df['Utility Total'].sum()
total_security_count = df['Security True Count'].sum()
total_security_total = df['Security Total'].sum()

overall_utility = total_utility_count / total_utility_total
overall_security = total_security_count / total_security_total

print(f"\nTotal Utility: {total_utility_count}/{total_utility_total} = {overall_utility:.4f} ({overall_utility*100:.2f}%)")
print(f"Total Security: {total_security_count}/{total_security_total} = {overall_security:.4f} ({overall_security*100:.2f}%)")

print("\n" + "=" * 80)
print("SUMMARY BY ATTACK TYPE")
print("=" * 80)

for _, row in grouped.iterrows():
    print(f"\n{row['Attack Type']}:")
    print(f"  Suites: {row['Suite']}")
    print(f"  Utility: {row['Utility']}")
    print(f"  Security: {row['Security']}")

OVERALL SUMMARY

Total Utility: 271/809 = 0.3350 (33.50%)
Total Security: 567/809 = 0.7009 (70.09%)

SUMMARY BY ATTACK TYPE

type_i_a_parameter_override:
  Suites: banking, slack, travel, workspace
  Utility: 32/174=0.1839
  Security: 154/174=0.8851

type_i_b_postaction_dependency:
  Suites: banking, slack, travel, workspace
  Utility: 56/91=0.6154
  Security: 41/91=0.4505

type_i_b_prerequisite_dependency:
  Suites: banking, slack, travel, workspace
  Utility: 64/93=0.6882
  Security: 50/93=0.5376

type_ii_a_short_circuit_reasoning:
  Suites: banking, slack, travel, workspace
  Utility: 55/91=0.6044
  Security: 49/91=0.5385

type_ii_b_induced_parameter:
  Suites: banking, slack, travel, workspace
  Utility: 16/88=0.1818
  Security: 68/88=0.7727

type_iii_a_sop_exfiltration:
  Suites: banking, slack, travel, workspace
  Utility: 48/272=0.1765
  Security: 205/272=0.7537


## Overall Security and Utility Statistics

In [67]:
# Create overall statistics table
overall_stats = pd.DataFrame({
    'Metric': ['Utility', 'Security'],
    'True Count': [total_utility_count, total_security_count],
    'Total': [total_utility_total, total_security_total],
    'Ratio': [overall_utility, overall_security],
    'Percentage': [f'{overall_utility*100:.2f}%', f'{overall_security*100:.2f}%']
})

print("=" * 80)
print("OVERALL STATISTICS ACROSS ALL ATTACK TYPES AND SUITES")
print("=" * 80)
print(f"\nTotal Files Analyzed: {len(df)}")
print(f"Total Test Cases: {total_utility_total}")
print()

overall_stats

OVERALL STATISTICS ACROSS ALL ATTACK TYPES AND SUITES

Total Files Analyzed: 24
Total Test Cases: 809



Unnamed: 0,Metric,True Count,Total,Ratio,Percentage
0,Utility,271,809,0.334981,33.50%
1,Security,567,809,0.700865,70.09%


In [68]:
# Export individual file statistics
individual_csv = f'individual_statistics_{framework}.csv'
df.to_csv(individual_csv, index=False)
print(f"Exported individual statistics to: {individual_csv}")

# Export grouped statistics
grouped_csv = f'grouped_statistics_{framework}.csv'
grouped[['Attack Type', 'Suite', 'Utility True Count', 'Utility Total', 'Utility Ratio',
         'Security True Count', 'Security Total', 'Security Ratio']].to_csv(grouped_csv, index=False)
print(f"Exported grouped statistics to: {grouped_csv}")

Exported individual statistics to: individual_statistics_base.csv
Exported grouped statistics to: grouped_statistics_base.csv


In [69]:
# Filter type_i_b attacks (both variants)
type_i_b_attacks = df[df['Attack Type'].str.startswith('type_i_b_')]

# Group by suite and sum the statistics
type_i_b_combined = type_i_b_attacks.groupby('Suite').agg({
    'Utility True Count': 'sum',
    'Utility Total': 'sum',
    'Security True Count': 'sum',
    'Security Total': 'sum'
}).reset_index()

# Calculate combined ratios
type_i_b_combined['Utility Ratio'] = type_i_b_combined['Utility True Count'] / type_i_b_combined['Utility Total']
type_i_b_combined['Security Ratio'] = type_i_b_combined['Security True Count'] / type_i_b_combined['Security Total']

# Format display columns
type_i_b_combined['Utility'] = type_i_b_combined.apply(
    lambda row: f"{row['Utility True Count']}/{row['Utility Total']}={row['Utility Ratio']:.4f}", axis=1
)
type_i_b_combined['Security'] = type_i_b_combined.apply(
    lambda row: f"{row['Security True Count']}/{row['Security Total']}={row['Security Ratio']:.4f}", axis=1
)

# Calculate overall type_i_b statistics
total_type_i_b_utility_count = type_i_b_combined['Utility True Count'].sum()
total_type_i_b_utility_total = type_i_b_combined['Utility Total'].sum()
total_type_i_b_security_count = type_i_b_combined['Security True Count'].sum()
total_type_i_b_security_total = type_i_b_combined['Security Total'].sum()

overall_type_i_b_utility = total_type_i_b_utility_count / total_type_i_b_utility_total
overall_type_i_b_security = total_type_i_b_security_count / total_type_i_b_security_total

print("=" * 80)
print("TYPE I-B COMBINED STATISTICS (Both postaction & prerequisite dependency)")
print("=" * 80)
print()
print("By Suite:")
display_cols = type_i_b_combined[['Suite', 'Utility', 'Security']]
print(display_cols.to_string(index=False))
print()
print("-" * 80)
print(f"\nOverall Type I-B:")
print(f"  Utility:  {total_type_i_b_utility_count}/{total_type_i_b_utility_total} = {overall_type_i_b_utility:.4f} ({overall_type_i_b_utility*100:.2f}%)")
print(f"  Security: {total_type_i_b_security_count}/{total_type_i_b_security_total} = {overall_type_i_b_security:.4f} ({overall_type_i_b_security*100:.2f}%)")
print()

# Create a detailed comparison table
type_i_b_detailed = type_i_b_attacks[['Suite', 'Attack Type', 'Utility True Count', 'Utility Total', 
                                       'Utility Ratio', 'Security True Count', 'Security Total', 
                                       'Security Ratio']].copy()

type_i_b_detailed['Utility'] = type_i_b_detailed.apply(
    lambda row: f"{row['Utility True Count']}/{row['Utility Total']}={row['Utility Ratio']:.4f}", axis=1
)
type_i_b_detailed['Security'] = type_i_b_detailed.apply(
    lambda row: f"{row['Security True Count']}/{row['Security Total']}={row['Security Ratio']:.4f}", axis=1
)

print("\nDetailed Breakdown by Variant:")
type_i_b_detailed[['Suite', 'Attack Type', 'Utility', 'Security']]

TYPE I-B COMBINED STATISTICS (Both postaction & prerequisite dependency)

By Suite:
    Suite      Utility     Security
  banking 30/32=0.9375 15/32=0.4688
    slack 25/32=0.7812 26/32=0.8125
   travel  7/40=0.1750 34/40=0.8500
workspace 58/80=0.7250 16/80=0.2000

--------------------------------------------------------------------------------

Overall Type I-B:
  Utility:  120/184 = 0.6522 (65.22%)
  Security: 91/184 = 0.4946 (49.46%)


Detailed Breakdown by Variant:


Unnamed: 0,Suite,Attack Type,Utility,Security
1,banking,type_i_b_postaction_dependency,16/16=1.0000,0/16=0.0000
2,banking,type_i_b_prerequisite_dependency,14/16=0.8750,15/16=0.9375
7,slack,type_i_b_postaction_dependency,14/15=0.9333,10/15=0.6667
8,slack,type_i_b_prerequisite_dependency,11/17=0.6471,16/17=0.9412
13,travel,type_i_b_postaction_dependency,5/20=0.2500,15/20=0.7500
14,travel,type_i_b_prerequisite_dependency,2/20=0.1000,19/20=0.9500
19,workspace,type_i_b_postaction_dependency,21/40=0.5250,16/40=0.4000
20,workspace,type_i_b_prerequisite_dependency,37/40=0.9250,0/40=0.0000


## Complete Statistics Table (Type I-B Merged)

In [70]:
# Create a complete statistics table with type_i_b merged
complete_stats = []

# Get all unique attack types
attack_types = sorted(df['Attack Type'].unique())

for attack_type in attack_types:
    if attack_type.startswith('type_i_b_'):
        # Skip individual type_i_b variants, we'll add merged version later
        continue
    
    # Get statistics for this attack type
    attack_data = df[df['Attack Type'] == attack_type]
    
    utility_count = attack_data['Utility True Count'].sum()
    utility_total = attack_data['Utility Total'].sum()
    utility_ratio = utility_count / utility_total if utility_total > 0 else 0
    
    security_count = attack_data['Security True Count'].sum()
    security_total = attack_data['Security Total'].sum()
    security_ratio = security_count / security_total if security_total > 0 else 0
    
    complete_stats.append({
        'Attack Type': attack_type,
        'Utility True Count': utility_count,
        'Utility Total': utility_total,
        'Utility Ratio': utility_ratio,
        'Security True Count': security_count,
        'Security Total': security_total,
        'Security Ratio': security_ratio
    })

# Add merged type_i_b statistics
type_i_b_data = df[df['Attack Type'].str.startswith('type_i_b_')]
utility_count = type_i_b_data['Utility True Count'].sum()
utility_total = type_i_b_data['Utility Total'].sum()
utility_ratio = utility_count / utility_total if utility_total > 0 else 0

security_count = type_i_b_data['Security True Count'].sum()
security_total = type_i_b_data['Security Total'].sum()
security_ratio = security_count / security_total if security_total > 0 else 0

complete_stats.append({
    'Attack Type': 'type_i_b (merged)',
    'Utility True Count': utility_count,
    'Utility Total': utility_total,
    'Utility Ratio': utility_ratio,
    'Security True Count': security_count,
    'Security Total': security_total,
    'Security Ratio': security_ratio
})

# Create DataFrame and sort by attack type
complete_df = pd.DataFrame(complete_stats)
# Custom sort to ensure type_i_b (merged) appears in the right position
sort_order = {
    'type_i_a_parameter_override': 0,
    'type_i_b (merged)': 1,
    'type_ii_a_short_circuit_reasoning': 2,
    'type_ii_b_induced_parameter': 3,
    'type_iii_a_sop_exfiltration': 4
}
complete_df['sort_key'] = complete_df['Attack Type'].map(sort_order)
complete_df = complete_df.sort_values('sort_key').drop('sort_key', axis=1).reset_index(drop=True)

# Format display columns
complete_df['Utility'] = complete_df.apply(
    lambda row: f"{row['Utility True Count']}/{row['Utility Total']}={row['Utility Ratio']:.4f}", axis=1
)
complete_df['Security'] = complete_df.apply(
    lambda row: f"{row['Security True Count']}/{row['Security Total']}={row['Security Ratio']:.4f}", axis=1
)

print("=" * 80)
print("COMPLETE ATTACK STATISTICS TABLE (Type I-B Merged)")
print("=" * 80)
print()

# Display the complete table
display_complete = complete_df[['Attack Type', 'Utility', 'Security']].copy()
print(display_complete.to_string(index=False))

print()
print("=" * 80)
print()

# Also create a detailed version with all numbers
print("Detailed Version with All Numbers:")
print()
complete_df[['Attack Type', 'Utility True Count', 'Utility Total', 'Utility Ratio', 
             'Security True Count', 'Security Total', 'Security Ratio']]

COMPLETE ATTACK STATISTICS TABLE (Type I-B Merged)

                      Attack Type        Utility       Security
      type_i_a_parameter_override  32/174=0.1839 154/174=0.8851
                type_i_b (merged) 120/184=0.6522  91/184=0.4946
type_ii_a_short_circuit_reasoning   55/91=0.6044   49/91=0.5385
      type_ii_b_induced_parameter   16/88=0.1818   68/88=0.7727
      type_iii_a_sop_exfiltration  48/272=0.1765 205/272=0.7537


Detailed Version with All Numbers:



Unnamed: 0,Attack Type,Utility True Count,Utility Total,Utility Ratio,Security True Count,Security Total,Security Ratio
0,type_i_a_parameter_override,32,174,0.183908,154,174,0.885057
1,type_i_b (merged),120,184,0.652174,91,184,0.494565
2,type_ii_a_short_circuit_reasoning,55,91,0.604396,49,91,0.538462
3,type_ii_b_induced_parameter,16,88,0.181818,68,88,0.772727
4,type_iii_a_sop_exfiltration,48,272,0.176471,205,272,0.753676


In [71]:
# Create a complete table grouped by Suite with type_i_b merged
print("=" * 80)
print("COMPLETE STATISTICS BY SUITE (Type I-B Merged)")
print("=" * 80)
print()

suites = sorted(df['Suite'].unique())
complete_by_suite = []

for suite in suites:
    suite_data = df[df['Suite'] == suite].copy()
    
    # Process each attack type
    attack_types_to_process = [
        'type_i_a_parameter_override',
        'type_ii_a_short_circuit_reasoning',
        'type_ii_b_induced_parameter',
        'type_iii_a_sop_exfiltration'
    ]
    
    for attack_type in attack_types_to_process:
        attack_suite_data = suite_data[suite_data['Attack Type'] == attack_type]
        if not attack_suite_data.empty:
            row = attack_suite_data.iloc[0]
            complete_by_suite.append({
                'Suite': suite,
                'Attack Type': attack_type,
                'Utility True Count': int(row['Utility True Count']),
                'Utility Total': int(row['Utility Total']),
                'Utility Ratio': row['Utility Ratio'],
                'Security True Count': int(row['Security True Count']),
                'Security Total': int(row['Security Total']),
                'Security Ratio': row['Security Ratio']
            })
    
    # Add merged type_i_b for this suite
    type_i_b_suite_data = suite_data[suite_data['Attack Type'].str.startswith('type_i_b_')]
    if not type_i_b_suite_data.empty:
        utility_count = type_i_b_suite_data['Utility True Count'].sum()
        utility_total = type_i_b_suite_data['Utility Total'].sum()
        utility_ratio = utility_count / utility_total if utility_total > 0 else 0
        
        security_count = type_i_b_suite_data['Security True Count'].sum()
        security_total = type_i_b_suite_data['Security Total'].sum()
        security_ratio = security_count / security_total if security_total > 0 else 0
        
        complete_by_suite.append({
            'Suite': suite,
            'Attack Type': 'type_i_b (merged)',
            'Utility True Count': utility_count,
            'Utility Total': utility_total,
            'Utility Ratio': utility_ratio,
            'Security True Count': security_count,
            'Security Total': security_total,
            'Security Ratio': security_ratio
        })

# Create DataFrame
complete_by_suite_df = pd.DataFrame(complete_by_suite)

# Sort by suite and attack type
sort_order = {
    'type_i_a_parameter_override': 0,
    'type_i_b (merged)': 1,
    'type_ii_a_short_circuit_reasoning': 2,
    'type_ii_b_induced_parameter': 3,
    'type_iii_a_sop_exfiltration': 4
}
complete_by_suite_df['sort_key'] = complete_by_suite_df['Attack Type'].map(sort_order)
complete_by_suite_df = complete_by_suite_df.sort_values(['Suite', 'sort_key']).drop('sort_key', axis=1).reset_index(drop=True)

# Format display
complete_by_suite_df['Utility'] = complete_by_suite_df.apply(
    lambda row: f"{row['Utility True Count']}/{row['Utility Total']}={row['Utility Ratio']:.4f}", axis=1
)
complete_by_suite_df['Security'] = complete_by_suite_df.apply(
    lambda row: f"{row['Security True Count']}/{row['Security Total']}={row['Security Ratio']:.4f}", axis=1
)

# Display formatted table
display_by_suite = complete_by_suite_df[['Suite', 'Attack Type', 'Utility', 'Security']].copy()
print(display_by_suite.to_string(index=False))

print()
print("=" * 80)
print()

# Create pivot tables for easier comparison
print("Utility Ratios by Suite and Attack Type:")
pivot_utility_merged = complete_by_suite_df.pivot_table(
    index='Attack Type',
    columns='Suite',
    values='Utility Ratio',
    aggfunc='first'
)
print(pivot_utility_merged)

print()
print("Security Ratios by Suite and Attack Type:")
pivot_security_merged = complete_by_suite_df.pivot_table(
    index='Attack Type',
    columns='Suite',
    values='Security Ratio',
    aggfunc='first'
)
print(pivot_security_merged)

COMPLETE STATISTICS BY SUITE (Type I-B Merged)

    Suite                       Attack Type       Utility       Security
  banking       type_i_a_parameter_override   6/16=0.3750   14/16=0.8750
  banking                 type_i_b (merged)  30/32=0.9375   15/32=0.4688
  banking type_ii_a_short_circuit_reasoning   7/16=0.4375   12/16=0.7500
  banking       type_ii_b_induced_parameter   3/16=0.1875   14/16=0.8750
  banking       type_iii_a_sop_exfiltration  15/96=0.1562   80/96=0.8333
    slack       type_i_a_parameter_override   1/21=0.0476   18/21=0.8571
    slack                 type_i_b (merged)  25/32=0.7812   26/32=0.8125
    slack type_ii_a_short_circuit_reasoning  16/21=0.7619   16/21=0.7619
    slack       type_ii_b_induced_parameter   6/21=0.2857   13/21=0.6190
    slack       type_iii_a_sop_exfiltration   7/35=0.2000   27/35=0.7714
   travel       type_i_a_parameter_override   0/20=0.0000   19/20=0.9500
   travel                 type_i_b (merged)   7/40=0.1750   34/40=0.8500
   

## Final Summary Tables (5 Tables Total)

In [72]:
def create_suite_table(suite_name, suite_data):
    """
    Create a table for a specific suite with 8 rows:
    - 6 individual attack types
    - 1 merged type_i_b row
    - 1 overall row for the suite
    """
    rows = []
    
    # Define attack types in order
    attack_types = [
        'type_i_a_parameter_override',
        'type_i_b_postaction_dependency',
        'type_i_b_prerequisite_dependency',
        'type_ii_a_short_circuit_reasoning',
        'type_ii_b_induced_parameter',
        'type_iii_a_sop_exfiltration'
    ]
    
    # Add individual attack type rows
    for attack_type in attack_types:
        attack_data = suite_data[suite_data['Attack Type'] == attack_type]
        if not attack_data.empty:
            row = attack_data.iloc[0]
            rows.append({
                'Attack Type': attack_type,
                'Utility Count': int(row['Utility True Count']),
                'Utility Total': int(row['Utility Total']),
                'Utility Ratio': row['Utility Ratio'],
                'Security Count': int(row['Security True Count']),
                'Security Total': int(row['Security Total']),
                'Security Ratio': row['Security Ratio']
            })
    
    # Add merged type_i_b row
    type_i_b_data = suite_data[suite_data['Attack Type'].str.startswith('type_i_b_')]
    if not type_i_b_data.empty:
        utility_count = type_i_b_data['Utility True Count'].sum()
        utility_total = type_i_b_data['Utility Total'].sum()
        utility_ratio = utility_count / utility_total if utility_total > 0 else 0
        
        security_count = type_i_b_data['Security True Count'].sum()
        security_total = type_i_b_data['Security Total'].sum()
        security_ratio = security_count / security_total if security_total > 0 else 0
        
        rows.append({
            'Attack Type': 'type_i_b (merged)',
            'Utility Count': int(utility_count),
            'Utility Total': int(utility_total),
            'Utility Ratio': utility_ratio,
            'Security Count': int(security_count),
            'Security Total': int(security_total),
            'Security Ratio': security_ratio
        })
    
    # Add overall row for the suite
    overall_utility_count = suite_data['Utility True Count'].sum()
    overall_utility_total = suite_data['Utility Total'].sum()
    overall_utility_ratio = overall_utility_count / overall_utility_total if overall_utility_total > 0 else 0
    
    overall_security_count = suite_data['Security True Count'].sum()
    overall_security_total = suite_data['Security Total'].sum()
    overall_security_ratio = overall_security_count / overall_security_total if overall_security_total > 0 else 0
    
    rows.append({
        'Attack Type': 'Overall',
        'Utility Count': int(overall_utility_count),
        'Utility Total': int(overall_utility_total),
        'Utility Ratio': overall_utility_ratio,
        'Security Count': int(overall_security_count),
        'Security Total': int(overall_security_total),
        'Security Ratio': overall_security_ratio
    })
    
    # Create DataFrame
    table_df = pd.DataFrame(rows)
    
    # Format display columns
    table_df['Utility'] = table_df.apply(
        lambda row: f"{row['Utility Count']}/{row['Utility Total']}={row['Utility Ratio']:.4f} ({row['Utility Ratio']*100:.2f}%)", axis=1
    )
    table_df['Security'] = table_df.apply(
        lambda row: f"{row['Security Count']}/{row['Security Total']}={row['Security Ratio']:.4f} ({row['Security Ratio']*100:.2f}%)", axis=1
    )
    
    return table_df

# Generate tables for each suite
suites = sorted(df['Suite'].unique())

print("=" * 100)
print("FINAL SUMMARY TABLES - 5 TABLES TOTAL")
print("=" * 100)
print()

for i, suite in enumerate(suites, 1):
    suite_data = df[df['Suite'] == suite]
    suite_table = create_suite_table(suite, suite_data)
    
    print(f"\nTable {i}: {suite.upper()} Suite Statistics")
    print("-" * 100)
    display_table = suite_table[['Attack Type', 'Utility', 'Security']]
    print(display_table.to_string(index=False))
    print()

# Create overall table (all suites combined)
print(f"\nTable 5: OVERALL Statistics (All Suites Combined)")
print("-" * 100)

overall_rows = []

# Individual attack types
attack_types = [
    'type_i_a_parameter_override',
    'type_i_b_postaction_dependency',
    'type_i_b_prerequisite_dependency',
    'type_ii_a_short_circuit_reasoning',
    'type_ii_b_induced_parameter',
    'type_iii_a_sop_exfiltration'
]

for attack_type in attack_types:
    attack_data = df[df['Attack Type'] == attack_type]
    if not attack_data.empty:
        utility_count = attack_data['Utility True Count'].sum()
        utility_total = attack_data['Utility Total'].sum()
        utility_ratio = utility_count / utility_total if utility_total > 0 else 0
        
        security_count = attack_data['Security True Count'].sum()
        security_total = attack_data['Security Total'].sum()
        security_ratio = security_count / security_total if security_total > 0 else 0
        
        overall_rows.append({
            'Attack Type': attack_type,
            'Utility Count': int(utility_count),
            'Utility Total': int(utility_total),
            'Utility Ratio': utility_ratio,
            'Security Count': int(security_count),
            'Security Total': int(security_total),
            'Security Ratio': security_ratio
        })

# Merged type_i_b
type_i_b_data = df[df['Attack Type'].str.startswith('type_i_b_')]
utility_count = type_i_b_data['Utility True Count'].sum()
utility_total = type_i_b_data['Utility Total'].sum()
utility_ratio = utility_count / utility_total if utility_total > 0 else 0

security_count = type_i_b_data['Security True Count'].sum()
security_total = type_i_b_data['Security Total'].sum()
security_ratio = security_count / security_total if security_total > 0 else 0

overall_rows.append({
    'Attack Type': 'type_i_b (merged)',
    'Utility Count': int(utility_count),
    'Utility Total': int(utility_total),
    'Utility Ratio': utility_ratio,
    'Security Count': int(security_count),
    'Security Total': int(security_total),
    'Security Ratio': security_ratio
})

# Overall row
total_utility_count = df['Utility True Count'].sum()
total_utility_total = df['Utility Total'].sum()
total_utility_ratio = total_utility_count / total_utility_total

total_security_count = df['Security True Count'].sum()
total_security_total = df['Security Total'].sum()
total_security_ratio = total_security_count / total_security_total

overall_rows.append({
    'Attack Type': 'Overall',
    'Utility Count': int(total_utility_count),
    'Utility Total': int(total_utility_total),
    'Utility Ratio': total_utility_ratio,
    'Security Count': int(total_security_count),
    'Security Total': int(total_security_total),
    'Security Ratio': total_security_ratio
})

overall_table = pd.DataFrame(overall_rows)

# Format display columns
overall_table['Utility'] = overall_table.apply(
    lambda row: f"{row['Utility Count']}/{row['Utility Total']}={row['Utility Ratio']:.4f} ({row['Utility Ratio']*100:.2f}%)", axis=1
)
overall_table['Security'] = overall_table.apply(
    lambda row: f"{row['Security Count']}/{row['Security Total']}={row['Security Ratio']:.4f} ({row['Security Ratio']*100:.2f}%)", axis=1
)

display_overall = overall_table[['Attack Type', 'Utility', 'Security']]
print(display_overall.to_string(index=False))
print()
print("=" * 100)

FINAL SUMMARY TABLES - 5 TABLES TOTAL


Table 1: BANKING Suite Statistics
----------------------------------------------------------------------------------------------------
                      Attack Type                Utility                Security
      type_i_a_parameter_override   6/16=0.3750 (37.50%)   14/16=0.8750 (87.50%)
   type_i_b_postaction_dependency 16/16=1.0000 (100.00%)     0/16=0.0000 (0.00%)
 type_i_b_prerequisite_dependency  14/16=0.8750 (87.50%)   15/16=0.9375 (93.75%)
type_ii_a_short_circuit_reasoning   7/16=0.4375 (43.75%)   12/16=0.7500 (75.00%)
      type_ii_b_induced_parameter   3/16=0.1875 (18.75%)   14/16=0.8750 (87.50%)
      type_iii_a_sop_exfiltration  15/96=0.1562 (15.62%)   80/96=0.8333 (83.33%)
                type_i_b (merged)  30/32=0.9375 (93.75%)   15/32=0.4688 (46.88%)
                          Overall 61/176=0.3466 (34.66%) 135/176=0.7670 (76.70%)


Table 2: SLACK Suite Statistics
--------------------------------------------------------------

### Display Final 5 Tables as DataFrames

In [73]:
# Store all 5 tables in a dictionary for easy access
final_tables = {}

# Create tables for each suite
for suite in suites:
    suite_data = df[df['Suite'] == suite]
    final_tables[suite] = create_suite_table(suite, suite_data)

# Create overall table
final_tables['overall'] = overall_table

# Display all 5 tables
print("=" * 100)
print("DISPLAYING ALL 5 FINAL TABLES")
print("=" * 100)

for i, suite in enumerate(suites, 1):
    print(f"\n{'='*100}")
    print(f"Table {i}: {suite.upper()} Suite - 8 Rows (6 attacks + 1 merged type_i_b + 1 overall)")
    print('='*100)
    display(final_tables[suite][['Attack Type', 'Utility Count', 'Utility Total', 'Utility Ratio', 
                                   'Security Count', 'Security Total', 'Security Ratio']])

print(f"\n{'='*100}")
print(f"Table 5: OVERALL (All Suites) - 8 Rows (6 attacks + 1 merged type_i_b + 1 overall)")
print('='*100)
display(final_tables['overall'][['Attack Type', 'Utility Count', 'Utility Total', 'Utility Ratio', 
                                   'Security Count', 'Security Total', 'Security Ratio']])

print("\n" + "=" * 100)
print("All 5 tables have been generated and stored in 'final_tables' dictionary")
print("Access them using: final_tables['banking'], final_tables['slack'], etc.")
print("=" * 100)

DISPLAYING ALL 5 FINAL TABLES

Table 1: BANKING Suite - 8 Rows (6 attacks + 1 merged type_i_b + 1 overall)


Unnamed: 0,Attack Type,Utility Count,Utility Total,Utility Ratio,Security Count,Security Total,Security Ratio
0,type_i_a_parameter_override,6,16,0.375,14,16,0.875
1,type_i_b_postaction_dependency,16,16,1.0,0,16,0.0
2,type_i_b_prerequisite_dependency,14,16,0.875,15,16,0.9375
3,type_ii_a_short_circuit_reasoning,7,16,0.4375,12,16,0.75
4,type_ii_b_induced_parameter,3,16,0.1875,14,16,0.875
5,type_iii_a_sop_exfiltration,15,96,0.15625,80,96,0.833333
6,type_i_b (merged),30,32,0.9375,15,32,0.46875
7,Overall,61,176,0.346591,135,176,0.767045



Table 2: SLACK Suite - 8 Rows (6 attacks + 1 merged type_i_b + 1 overall)


Unnamed: 0,Attack Type,Utility Count,Utility Total,Utility Ratio,Security Count,Security Total,Security Ratio
0,type_i_a_parameter_override,1,21,0.047619,18,21,0.857143
1,type_i_b_postaction_dependency,14,15,0.933333,10,15,0.666667
2,type_i_b_prerequisite_dependency,11,17,0.647059,16,17,0.941176
3,type_ii_a_short_circuit_reasoning,16,21,0.761905,16,21,0.761905
4,type_ii_b_induced_parameter,6,21,0.285714,13,21,0.619048
5,type_iii_a_sop_exfiltration,7,35,0.2,27,35,0.771429
6,type_i_b (merged),25,32,0.78125,26,32,0.8125
7,Overall,55,130,0.423077,100,130,0.769231



Table 3: TRAVEL Suite - 8 Rows (6 attacks + 1 merged type_i_b + 1 overall)


Unnamed: 0,Attack Type,Utility Count,Utility Total,Utility Ratio,Security Count,Security Total,Security Ratio
0,type_i_a_parameter_override,0,20,0.0,19,20,0.95
1,type_i_b_postaction_dependency,5,20,0.25,15,20,0.75
2,type_i_b_prerequisite_dependency,2,20,0.1,19,20,0.95
3,type_ii_a_short_circuit_reasoning,4,20,0.2,17,20,0.85
4,type_ii_b_induced_parameter,0,20,0.0,20,20,1.0
5,type_iii_a_sop_exfiltration,4,100,0.04,82,100,0.82
6,type_i_b (merged),7,40,0.175,34,40,0.85
7,Overall,15,200,0.075,172,200,0.86



Table 4: WORKSPACE Suite - 8 Rows (6 attacks + 1 merged type_i_b + 1 overall)


Unnamed: 0,Attack Type,Utility Count,Utility Total,Utility Ratio,Security Count,Security Total,Security Ratio
0,type_i_a_parameter_override,25,117,0.213675,103,117,0.880342
1,type_i_b_postaction_dependency,21,40,0.525,16,40,0.4
2,type_i_b_prerequisite_dependency,37,40,0.925,0,40,0.0
3,type_ii_a_short_circuit_reasoning,28,34,0.823529,4,34,0.117647
4,type_ii_b_induced_parameter,7,31,0.225806,21,31,0.677419
5,type_iii_a_sop_exfiltration,22,41,0.536585,16,41,0.390244
6,type_i_b (merged),58,80,0.725,16,80,0.2
7,Overall,140,303,0.462046,160,303,0.528053



Table 5: OVERALL (All Suites) - 8 Rows (6 attacks + 1 merged type_i_b + 1 overall)


Unnamed: 0,Attack Type,Utility Count,Utility Total,Utility Ratio,Security Count,Security Total,Security Ratio
0,type_i_a_parameter_override,32,174,0.183908,154,174,0.885057
1,type_i_b_postaction_dependency,56,91,0.615385,41,91,0.450549
2,type_i_b_prerequisite_dependency,64,93,0.688172,50,93,0.537634
3,type_ii_a_short_circuit_reasoning,55,91,0.604396,49,91,0.538462
4,type_ii_b_induced_parameter,16,88,0.181818,68,88,0.772727
5,type_iii_a_sop_exfiltration,48,272,0.176471,205,272,0.753676
6,type_i_b (merged),120,184,0.652174,91,184,0.494565
7,Overall,271,809,0.334981,567,809,0.700865



All 5 tables have been generated and stored in 'final_tables' dictionary
Access them using: final_tables['banking'], final_tables['slack'], etc.


In [74]:
# Export all 5 final tables to CSV files
print(f"Exporting all 5 final tables to CSV files (framework: {framework})...")
print()

for suite in suites:
    filename = f'final_table_{suite}_{framework}.csv'
    final_tables[suite][['Attack Type', 'Utility Count', 'Utility Total', 'Utility Ratio', 
                          'Security Count', 'Security Total', 'Security Ratio']].to_csv(filename, index=False)
    print(f"✓ Exported {suite} table to: {filename}")

overall_filename = f'final_table_overall_{framework}.csv'
final_tables['overall'][['Attack Type', 'Utility Count', 'Utility Total', 'Utility Ratio', 
                          'Security Count', 'Security Total', 'Security Ratio']].to_csv(overall_filename, index=False)
print(f"✓ Exported overall table to: {overall_filename}")

print()
print("=" * 100)
print(f"All 5 tables exported successfully with '{framework}' suffix!")
print("=" * 100)

Exporting all 5 final tables to CSV files (framework: base)...

✓ Exported banking table to: final_table_banking_base.csv
✓ Exported slack table to: final_table_slack_base.csv
✓ Exported travel table to: final_table_travel_base.csv
✓ Exported workspace table to: final_table_workspace_base.csv
✓ Exported overall table to: final_table_overall_base.csv

All 5 tables exported successfully with 'base' suffix!


In [75]:
# Export merged statistics to CSV
complete_merged_csv = f'complete_statistics_merged_{framework}.csv'
complete_df.to_csv(complete_merged_csv, index=False)
print(f"Exported complete statistics (type_i_b merged) to: {complete_merged_csv}")

complete_by_suite_csv = f'complete_statistics_by_suite_merged_{framework}.csv'
complete_by_suite_df[['Suite', 'Attack Type', 'Utility True Count', 'Utility Total', 'Utility Ratio',
                       'Security True Count', 'Security Total', 'Security Ratio']].to_csv(
    complete_by_suite_csv, index=False
)
print(f"Exported complete statistics by suite (type_i_b merged) to: {complete_by_suite_csv}")

Exported complete statistics (type_i_b merged) to: complete_statistics_merged_base.csv
Exported complete statistics by suite (type_i_b merged) to: complete_statistics_by_suite_merged_base.csv


## Note on File Exports

## Note
All CSV exports now include the framework type (`base` or `melon`) in the filename for easy identification. Files are exported earlier in the notebook with proper naming.

## Three-Category Attack Classification

This section groups the 6 attack types into 3 major categories based on the attack mechanism:

1. **Goal Misalignment (针对意图)**: Type I-A (Parameter Override) - Exploits model's compliance to system/tool instructions to override user intent
2. **Reasoning Distortion (针对规划)**: Type I-B (Dependency Traps) + Type II-A (Short Circuit Reasoning) - Exploits causal reasoning by planting false dependencies or shortcuts  
3. **Perception Manipulation (针对感知)**: Type II-B (Induced Parameter) + Type III-A (SOP Exfiltration) - Exploits trust in environment feedback through fake observations (errors, duplicate tools)

In [76]:
# Define the three-category mapping
category_mapping = {
    'Goal Misalignment': ['type_i_a_parameter_override'],
    'Reasoning Distortion': ['type_i_b_postaction_dependency', 'type_i_b_prerequisite_dependency', 
                             'type_ii_a_short_circuit_reasoning'],
    'Perception Manipulation': ['type_ii_b_induced_parameter', 'type_iii_a_sop_exfiltration']
}

# Create statistics for each category
category_stats = []

for category_name, attack_types in category_mapping.items():
    # Filter data for this category
    category_data = df[df['Attack Type'].isin(attack_types)]
    
    # Calculate totals
    utility_count = category_data['Utility True Count'].sum()
    utility_total = category_data['Utility Total'].sum()
    utility_ratio = utility_count / utility_total if utility_total > 0 else 0
    
    security_count = category_data['Security True Count'].sum()
    security_total = category_data['Security Total'].sum()
    security_ratio = security_count / security_total if security_total > 0 else 0
    
    category_stats.append({
        'Category': category_name,
        'Attack Types Included': ', '.join([at.replace('type_', '').replace('_', ' ') for at in attack_types]),
        'Utility Count': utility_count,
        'Utility Total': utility_total,
        'Utility Ratio': utility_ratio,
        'Security Count': security_count,
        'Security Total': security_total,
        'Security Ratio': security_ratio
    })

# Create DataFrame
category_df = pd.DataFrame(category_stats)

# Format display columns
category_df['Utility'] = category_df.apply(
    lambda row: f"{row['Utility Count']}/{row['Utility Total']}={row['Utility Ratio']:.4f} ({row['Utility Ratio']*100:.2f}%)", 
    axis=1
)
category_df['Security'] = category_df.apply(
    lambda row: f"{row['Security Count']}/{row['Security Total']}={row['Security Ratio']:.4f} ({row['Security Ratio']*100:.2f}%)", 
    axis=1
)

print("=" * 100)
print("THREE-CATEGORY ATTACK STATISTICS (Overall)")
print("=" * 100)
print()

display_category = category_df[['Category', 'Attack Types Included', 'Utility', 'Security']].copy()
print(display_category.to_string(index=False))

print()
print("=" * 100)
print()

# Display detailed version
print("Detailed Version with All Numbers:")
print()
category_df[['Category', 'Utility Count', 'Utility Total', 'Utility Ratio', 
             'Security Count', 'Security Total', 'Security Ratio']]

THREE-CATEGORY ATTACK STATISTICS (Overall)

               Category                                                                Attack Types Included                 Utility                Security
      Goal Misalignment                                                               i a parameter override  32/174=0.1839 (18.39%) 154/174=0.8851 (88.51%)
   Reasoning Distortion i b postaction dependency, i b prerequisite dependency, ii a short circuit reasoning 175/275=0.6364 (63.64%) 140/275=0.5091 (50.91%)
Perception Manipulation                                       ii b induced parameter, iii a sop exfiltration  64/360=0.1778 (17.78%) 273/360=0.7583 (75.83%)


Detailed Version with All Numbers:



Unnamed: 0,Category,Utility Count,Utility Total,Utility Ratio,Security Count,Security Total,Security Ratio
0,Goal Misalignment,32,174,0.183908,154,174,0.885057
1,Reasoning Distortion,175,275,0.636364,140,275,0.509091
2,Perception Manipulation,64,360,0.177778,273,360,0.758333


In [77]:
# Create statistics for each category by suite
print("=" * 100)
print("THREE-CATEGORY ATTACK STATISTICS (By Suite)")
print("=" * 100)
print()

suites = sorted(df['Suite'].unique())
category_by_suite = []

for suite in suites:
    suite_data = df[df['Suite'] == suite]
    
    for category_name, attack_types in category_mapping.items():
        # Filter data for this category and suite
        category_suite_data = suite_data[suite_data['Attack Type'].isin(attack_types)]
        
        if not category_suite_data.empty:
            # Calculate totals
            utility_count = category_suite_data['Utility True Count'].sum()
            utility_total = category_suite_data['Utility Total'].sum()
            utility_ratio = utility_count / utility_total if utility_total > 0 else 0
            
            security_count = category_suite_data['Security True Count'].sum()
            security_total = category_suite_data['Security Total'].sum()
            security_ratio = security_count / security_total if security_total > 0 else 0
            
            category_by_suite.append({
                'Suite': suite,
                'Category': category_name,
                'Utility Count': utility_count,
                'Utility Total': utility_total,
                'Utility Ratio': utility_ratio,
                'Security Count': security_count,
                'Security Total': security_total,
                'Security Ratio': security_ratio
            })

# Create DataFrame
category_by_suite_df = pd.DataFrame(category_by_suite)

# Format display columns
category_by_suite_df['Utility'] = category_by_suite_df.apply(
    lambda row: f"{row['Utility Count']}/{row['Utility Total']}={row['Utility Ratio']:.4f}", 
    axis=1
)
category_by_suite_df['Security'] = category_by_suite_df.apply(
    lambda row: f"{row['Security Count']}/{row['Security Total']}={row['Security Ratio']:.4f}", 
    axis=1
)

# Display formatted table
display_by_suite = category_by_suite_df[['Suite', 'Category', 'Utility', 'Security']].copy()
print(display_by_suite.to_string(index=False))

print()
print("=" * 100)
print()

# Create pivot tables for easier comparison
print("Utility Ratios by Suite and Category:")
pivot_utility_category = category_by_suite_df.pivot_table(
    index='Category',
    columns='Suite',
    values='Utility Ratio',
    aggfunc='first'
)
print(pivot_utility_category)

print()
print("Security Ratios by Suite and Category:")
pivot_security_category = category_by_suite_df.pivot_table(
    index='Category',
    columns='Suite',
    values='Security Ratio',
    aggfunc='first'
)
print(pivot_security_category)

THREE-CATEGORY ATTACK STATISTICS (By Suite)

    Suite                Category       Utility       Security
  banking       Goal Misalignment   6/16=0.3750   14/16=0.8750
  banking    Reasoning Distortion  37/48=0.7708   27/48=0.5625
  banking Perception Manipulation 18/112=0.1607  94/112=0.8393
    slack       Goal Misalignment   1/21=0.0476   18/21=0.8571
    slack    Reasoning Distortion  41/53=0.7736   42/53=0.7925
    slack Perception Manipulation  13/56=0.2321   40/56=0.7143
   travel       Goal Misalignment   0/20=0.0000   19/20=0.9500
   travel    Reasoning Distortion  11/60=0.1833   51/60=0.8500
   travel Perception Manipulation  4/120=0.0333 102/120=0.8500
workspace       Goal Misalignment 25/117=0.2137 103/117=0.8803
workspace    Reasoning Distortion 86/114=0.7544  20/114=0.1754
workspace Perception Manipulation  29/72=0.4028   37/72=0.5139


Utility Ratios by Suite and Category:
Suite                     banking     slack    travel  workspace
Category                       

In [78]:
# Create final three-category tables (similar to the 5 tables we created earlier)
def create_three_category_suite_table(suite_name, suite_data):
    """
    Create a table for a specific suite with 4 rows:
    - 3 category rows (Goal Misalignment, Reasoning Distortion, Perception Manipulation)
    - 1 overall row for the suite
    """
    rows = []
    
    # Add category rows
    for category_name, attack_types in category_mapping.items():
        category_suite_data = suite_data[suite_data['Attack Type'].isin(attack_types)]
        
        if not category_suite_data.empty:
            utility_count = category_suite_data['Utility True Count'].sum()
            utility_total = category_suite_data['Utility Total'].sum()
            utility_ratio = utility_count / utility_total if utility_total > 0 else 0
            
            security_count = category_suite_data['Security True Count'].sum()
            security_total = category_suite_data['Security Total'].sum()
            security_ratio = security_count / security_total if security_total > 0 else 0
            
            rows.append({
                'Category': category_name,
                'Utility Count': int(utility_count),
                'Utility Total': int(utility_total),
                'Utility Ratio': utility_ratio,
                'Security Count': int(security_count),
                'Security Total': int(security_total),
                'Security Ratio': security_ratio
            })
    
    # Add overall row for the suite
    overall_utility_count = suite_data['Utility True Count'].sum()
    overall_utility_total = suite_data['Utility Total'].sum()
    overall_utility_ratio = overall_utility_count / overall_utility_total if overall_utility_total > 0 else 0
    
    overall_security_count = suite_data['Security True Count'].sum()
    overall_security_total = suite_data['Security Total'].sum()
    overall_security_ratio = overall_security_count / overall_security_total if overall_security_total > 0 else 0
    
    rows.append({
        'Category': 'Overall',
        'Utility Count': int(overall_utility_count),
        'Utility Total': int(overall_utility_total),
        'Utility Ratio': overall_utility_ratio,
        'Security Count': int(overall_security_count),
        'Security Total': int(overall_security_total),
        'Security Ratio': overall_security_ratio
    })
    
    # Create DataFrame
    table_df = pd.DataFrame(rows)
    
    # Format display columns
    table_df['Utility'] = table_df.apply(
        lambda row: f"{row['Utility Count']}/{row['Utility Total']}={row['Utility Ratio']:.4f} ({row['Utility Ratio']*100:.2f}%)", 
        axis=1
    )
    table_df['Security'] = table_df.apply(
        lambda row: f"{row['Security Count']}/{row['Security Total']}={row['Security Ratio']:.4f} ({row['Security Ratio']*100:.2f}%)", 
        axis=1
    )
    
    return table_df

# Generate three-category tables for each suite
suites = sorted(df['Suite'].unique())

print("=" * 100)
print("FINAL THREE-CATEGORY SUMMARY TABLES - 5 TABLES TOTAL")
print("=" * 100)
print()

three_category_tables = {}

for i, suite in enumerate(suites, 1):
    suite_data = df[df['Suite'] == suite]
    suite_table = create_three_category_suite_table(suite, suite_data)
    three_category_tables[suite] = suite_table
    
    print(f"\nTable {i}: {suite.upper()} Suite - Three Category Statistics")
    print("-" * 100)
    display_table = suite_table[['Category', 'Utility', 'Security']]
    print(display_table.to_string(index=False))
    print()

# Create overall table (all suites combined)
print(f"\nTable 5: OVERALL - Three Category Statistics (All Suites Combined)")
print("-" * 100)

overall_category_rows = []

for category_name, attack_types in category_mapping.items():
    category_data = df[df['Attack Type'].isin(attack_types)]
    
    utility_count = category_data['Utility True Count'].sum()
    utility_total = category_data['Utility Total'].sum()
    utility_ratio = utility_count / utility_total if utility_total > 0 else 0
    
    security_count = category_data['Security True Count'].sum()
    security_total = category_data['Security Total'].sum()
    security_ratio = security_count / security_total if security_total > 0 else 0
    
    overall_category_rows.append({
        'Category': category_name,
        'Utility Count': int(utility_count),
        'Utility Total': int(utility_total),
        'Utility Ratio': utility_ratio,
        'Security Count': int(security_count),
        'Security Total': int(security_total),
        'Security Ratio': security_ratio
    })

# Add overall row - sum of the 3 categories (not from df to avoid any potential double counting)
total_utility_count = sum(row['Utility Count'] for row in overall_category_rows)
total_utility_total = sum(row['Utility Total'] for row in overall_category_rows)
total_utility_ratio = total_utility_count / total_utility_total if total_utility_total > 0 else 0

total_security_count = sum(row['Security Count'] for row in overall_category_rows)
total_security_total = sum(row['Security Total'] for row in overall_category_rows)
total_security_ratio = total_security_count / total_security_total if total_security_total > 0 else 0

overall_category_rows.append({
    'Category': 'Overall',
    'Utility Count': int(total_utility_count),
    'Utility Total': int(total_utility_total),
    'Utility Ratio': total_utility_ratio,
    'Security Count': int(total_security_count),
    'Security Total': int(total_security_total),
    'Security Ratio': total_security_ratio
})

overall_category_table = pd.DataFrame(overall_category_rows)

# Format display columns
overall_category_table['Utility'] = overall_category_table.apply(
    lambda row: f"{row['Utility Count']}/{row['Utility Total']}={row['Utility Ratio']:.4f} ({row['Utility Ratio']*100:.2f}%)", 
    axis=1
)
overall_category_table['Security'] = overall_category_table.apply(
    lambda row: f"{row['Security Count']}/{row['Security Total']}={row['Security Ratio']:.4f} ({row['Security Ratio']*100:.2f}%)", 
    axis=1
)

three_category_tables['overall'] = overall_category_table

display_overall_category = overall_category_table[['Category', 'Utility', 'Security']]
print(display_overall_category.to_string(index=False))
print()
print("=" * 100)

FINAL THREE-CATEGORY SUMMARY TABLES - 5 TABLES TOTAL


Table 1: BANKING Suite - Three Category Statistics
----------------------------------------------------------------------------------------------------
               Category                Utility                Security
      Goal Misalignment   6/16=0.3750 (37.50%)   14/16=0.8750 (87.50%)
   Reasoning Distortion  37/48=0.7708 (77.08%)   27/48=0.5625 (56.25%)
Perception Manipulation 18/112=0.1607 (16.07%)  94/112=0.8393 (83.93%)
                Overall 61/176=0.3466 (34.66%) 135/176=0.7670 (76.70%)


Table 2: SLACK Suite - Three Category Statistics
----------------------------------------------------------------------------------------------------
               Category                Utility                Security
      Goal Misalignment    1/21=0.0476 (4.76%)   18/21=0.8571 (85.71%)
   Reasoning Distortion  41/53=0.7736 (77.36%)   42/53=0.7925 (79.25%)
Perception Manipulation  13/56=0.2321 (23.21%)   40/56=0.7143 (71.43%)
  

In [79]:
# Display all 5 three-category tables as DataFrames
print("=" * 100)
print("DISPLAYING ALL 5 THREE-CATEGORY TABLES")
print("=" * 100)

for i, suite in enumerate(suites, 1):
    print(f"\n{'='*100}")
    print(f"Table {i}: {suite.upper()} Suite - Three Category (3 categories + 1 overall)")
    print('='*100)
    display(three_category_tables[suite][['Category', 'Utility Count', 'Utility Total', 'Utility Ratio', 
                                           'Security Count', 'Security Total', 'Security Ratio']])

print(f"\n{'='*100}")
print(f"Table 5: OVERALL (All Suites) - Three Category (3 categories + 1 overall)")
print('='*100)
display(three_category_tables['overall'][['Category', 'Utility Count', 'Utility Total', 'Utility Ratio', 
                                            'Security Count', 'Security Total', 'Security Ratio']])

print("\n" + "=" * 100)
print("All 5 three-category tables have been generated and stored in 'three_category_tables' dictionary")
print("Access them using: three_category_tables['banking'], three_category_tables['slack'], etc.")
print("=" * 100)

DISPLAYING ALL 5 THREE-CATEGORY TABLES

Table 1: BANKING Suite - Three Category (3 categories + 1 overall)


Unnamed: 0,Category,Utility Count,Utility Total,Utility Ratio,Security Count,Security Total,Security Ratio
0,Goal Misalignment,6,16,0.375,14,16,0.875
1,Reasoning Distortion,37,48,0.770833,27,48,0.5625
2,Perception Manipulation,18,112,0.160714,94,112,0.839286
3,Overall,61,176,0.346591,135,176,0.767045



Table 2: SLACK Suite - Three Category (3 categories + 1 overall)


Unnamed: 0,Category,Utility Count,Utility Total,Utility Ratio,Security Count,Security Total,Security Ratio
0,Goal Misalignment,1,21,0.047619,18,21,0.857143
1,Reasoning Distortion,41,53,0.773585,42,53,0.792453
2,Perception Manipulation,13,56,0.232143,40,56,0.714286
3,Overall,55,130,0.423077,100,130,0.769231



Table 3: TRAVEL Suite - Three Category (3 categories + 1 overall)


Unnamed: 0,Category,Utility Count,Utility Total,Utility Ratio,Security Count,Security Total,Security Ratio
0,Goal Misalignment,0,20,0.0,19,20,0.95
1,Reasoning Distortion,11,60,0.183333,51,60,0.85
2,Perception Manipulation,4,120,0.033333,102,120,0.85
3,Overall,15,200,0.075,172,200,0.86



Table 4: WORKSPACE Suite - Three Category (3 categories + 1 overall)


Unnamed: 0,Category,Utility Count,Utility Total,Utility Ratio,Security Count,Security Total,Security Ratio
0,Goal Misalignment,25,117,0.213675,103,117,0.880342
1,Reasoning Distortion,86,114,0.754386,20,114,0.175439
2,Perception Manipulation,29,72,0.402778,37,72,0.513889
3,Overall,140,303,0.462046,160,303,0.528053



Table 5: OVERALL (All Suites) - Three Category (3 categories + 1 overall)


Unnamed: 0,Category,Utility Count,Utility Total,Utility Ratio,Security Count,Security Total,Security Ratio
0,Goal Misalignment,32,174,0.183908,154,174,0.885057
1,Reasoning Distortion,175,275,0.636364,140,275,0.509091
2,Perception Manipulation,64,360,0.177778,273,360,0.758333
3,Overall,271,809,0.334981,567,809,0.700865



All 5 three-category tables have been generated and stored in 'three_category_tables' dictionary
Access them using: three_category_tables['banking'], three_category_tables['slack'], etc.


In [80]:
# Export all three-category tables to CSV files
print(f"Exporting all 5 three-category tables to CSV files (framework: {framework})...")
print()

for suite in suites:
    filename = f'three_category_table_{suite}_{framework}.csv'
    three_category_tables[suite][['Category', 'Utility Count', 'Utility Total', 'Utility Ratio', 
                                   'Security Count', 'Security Total', 'Security Ratio']].to_csv(filename, index=False)
    print(f"✓ Exported {suite} three-category table to: {filename}")

overall_category_filename = f'three_category_table_overall_{framework}.csv'
three_category_tables['overall'][['Category', 'Utility Count', 'Utility Total', 'Utility Ratio', 
                                   'Security Count', 'Security Total', 'Security Ratio']].to_csv(overall_category_filename, index=False)
print(f"✓ Exported overall three-category table to: {overall_category_filename}")

print()
print("=" * 100)
print(f"All 5 three-category tables exported successfully with '{framework}' suffix!")
print("=" * 100)

Exporting all 5 three-category tables to CSV files (framework: base)...

✓ Exported banking three-category table to: three_category_table_banking_base.csv
✓ Exported slack three-category table to: three_category_table_slack_base.csv
✓ Exported travel three-category table to: three_category_table_travel_base.csv
✓ Exported workspace three-category table to: three_category_table_workspace_base.csv
✓ Exported overall three-category table to: three_category_table_overall_base.csv

All 5 three-category tables exported successfully with 'base' suffix!
