In [None]:
%pip install tabulate

In [2]:
import pandas as pd
from tabulate import tabulate
from tqdm import tqdm


In [25]:
ipi_da_df = pd.read_csv('results/pattern_search_results.txt', sep='|')

In [None]:
ipi_da_df.groupby(['Orchestrator', 'Input Type', 'Error Type']).agg({'Shell': 'mean'}).reset_index()

In [None]:
col = 'Refusal'
filtered_df = ipi_da_df[ipi_da_df['Input Type'] == 'direct-ask']
print(filtered_df.groupby(['Model', 'Input Type'])[col].mean())

In [None]:
metagpt_df = pd.read_csv('results/autogen/all_trials.csv', sep='|')
orchestrators = autogen_df['Orchestrator'].unique()

# Prepare data for the refusal rates table
refusal_table_data = []
for orchestrator in orchestrators:
    orchestrator_df = autogen_df[autogen_df['Orchestrator'] == orchestrator]
    ipi_df = orchestrator_df[orchestrator_df['Input Type'].isin(['ipi-web-text', 'ipi-local-text'])]
    refusal_rate_ipi1 = f"{int(ipi_df[ipi_df['Error Type'].str.startswith('ipi1')]['Refusal'].mean() * 100)}%"
    refusal_rate_ipi2 = f"{int(ipi_df[ipi_df['Error Type'].str.startswith('ipi2')]['Refusal'].mean() * 100)}%"
    refusal_rate_ipi3 = f"{int(ipi_df[ipi_df['Error Type'].str.startswith('ipi3')]['Refusal'].mean() * 100)}%"
    refusal_rate_ipi_avg = f"{int(ipi_df['Refusal'].mean() * 100)}%"
    refusal_rate_da = f"{int(orchestrator_df[(orchestrator_df['Input Type'] == 'direct-ask')]['Refusal'].mean() * 100)}%"
    refusal_table_data.append([orchestrator, refusal_rate_ipi1, refusal_rate_ipi2, refusal_rate_ipi3, refusal_rate_ipi_avg, refusal_rate_da])
# Print the refusal rates table
print(tabulate(refusal_table_data, headers=["Orchestrator", "Ignore Previous", "InjecAgent", "AgentDojo DOS", "Avg. IPI", "DA"], tablefmt="latex"))


# Prepare data for the first table
table1_data = []
for orchestrator in orchestrators:
    print(orchestrator)
    orchestrator_df = autogen_df[autogen_df['Orchestrator'] == orchestrator]
    total_da_trials = len(orchestrator_df[(orchestrator_df['Input Type'] == 'direct-ask')])
    total_ipi_trials =  len(orchestrator_df[(orchestrator_df['Input Type'] == 'ipi-web-text')])
    total_normal_trials = len(orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-redirect')])
    
    success_count_local = orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-redirect')]['Success'].sum()
    success_count_ipi = orchestrator_df[(orchestrator_df['Error Type'] == 'ipi-web-text')]['Success'].sum()
    success_count_da = orchestrator_df[(orchestrator_df['Input Type'] == 'direct-ask')]['Success'].sum()
    avg_asr = success_count_local / total_normal_trials
    ipi_asr = success_count_ipi / total_ipi_trials
    da_asr = success_count_da / total_da_trials
    table1_data.append([orchestrator, avg_asr, ipi_asr, da_asr])

# Print the first table
print(tabulate(table1_data, headers=["Orchestrator", "Avg. ASR", "IPI ASR", "DA ASR"], tablefmt="latex"))

# Prepare data for the second table
table2_data = []
for orchestrator in orchestrators:
    orchestrator_df = autogen_df[autogen_df['Orchestrator'] == orchestrator]
    success_rate_4o = round(orchestrator_df[(orchestrator_df['Model'] == 'gpt-4o') & (orchestrator_df['Input Type'] == 'local-text')]['Success'].mean(), 3)
    success_rate_4o_mini = round(orchestrator_df[(orchestrator_df['Model'] == 'gpt-4o-mini') & (orchestrator_df['Input Type'] == 'local-text')]['Success'].mean(), 3)
    success_rate_gem_pro = round(orchestrator_df[(orchestrator_df['Model'] == 'gemini-1.5-pro') & (orchestrator_df['Input Type'] == 'local-text')]['Success'].mean(), 3)
    success_rate_gem_flash = round(orchestrator_df[(orchestrator_df['Model'] == 'gemini-1.5-flash') & (orchestrator_df['Input Type'] == 'local-text')]['Success'].mean(), 3)
    avg_asr = round(orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-redirect')]['Success'].mean(), 3)
    ipi_asr = round(orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-ipi')]['Success'].mean(), 3)
    da_asr = round(orchestrator_df[(orchestrator_df['Input Type'] == 'direct-ask')]['Success'].mean(), 3)
    table2_data.append([orchestrator, success_rate_4o, success_rate_4o_mini, success_rate_gem_pro, success_rate_gem_flash, avg_asr, ipi_asr, da_asr])

# Print the second table
print(tabulate(table2_data, headers=["Orch.", "4o", "4o-mini", "Gem-1.5-Pro", "Gem-1.5-Flash", "Avg. ASR", "IPI", "DA"], tablefmt="latex"))

# Prepare data for the third table
table3_data = {}
for orchestrator in orchestrators:
    table3_data[orchestrator] = {}
    for input_type in autogen_df['Input Type'].unique():
        table3_data[orchestrator][input_type] = {}
        for model in autogen_df['Model'].unique():
            total_trials = len(autogen_df[(autogen_df['Orchestrator'] == orchestrator) & (autogen_df['Input Type'] == input_type) & (autogen_df['Model'] == model)])
            orchestrator_df = autogen_df[(autogen_df['Orchestrator'] == orchestrator) & (autogen_df['Input Type'] == input_type) & (autogen_df['Model'] == model)]
            success_count = orchestrator_df['Success'].sum()
            table3_data[orchestrator][input_type][model] = success_count / total_trials

print("""\\begin{table}[h]
    \\centering
    \\begin{tabular}{llcccccccccccc}
        \\toprule
        & & \\multicolumn{4}{c}{\\textbf{Local File}} & \\multicolumn{4}{c}{\\textbf{Web Text Redirect}} \\
        \\textbf{} & \\textbf{Orch.} & 4o & mini & pro & flash & 4o & mini & pro & flash & 4o & mini & pro & flash \\\\ 
        \\midrule
        \\multirow{3}{*}{AG} 
        & MO & """ + \
        f"""{table3_data['magentic-one']['web-text-redirect']['gpt-4o']:.3f} & {table3_data['magentic-one']['web-text-redirect']['gpt-4o-mini']:.3f} & {table3_data['magentic-one']['web-text-redirect']['gemini-1.5-pro']:.3f} & {table3_data['magentic-one']['web-text-redirect']['gemini-1.5-flash']:.3f} & 
        {table3_data['magentic-one']['local-text']['gpt-4o']:.3f} & {table3_data['magentic-one']['local-text']['gpt-4o-mini']:.3f} & {table3_data['magentic-one']['local-text']['gemini-1.5-pro']:.3f} & {table3_data['magentic-one']['local-text']['gemini-1.5-flash']:.3f} \\\\
        & Sel. & 
        {table3_data['selector']['web-text-redirect']['gpt-4o']:.3f} & {table3_data['selector']['web-text-redirect']['gpt-4o-mini']:.3f} & {table3_data['selector']['web-text-redirect']['gemini-1.5-pro']:.3f} & {table3_data['selector']['web-text-redirect']['gemini-1.5-flash']:.3f} & 
        {table3_data['selector']['local-text']['gpt-4o']:.3f} & {table3_data['selector']['local-text']['gpt-4o-mini']:.3f} & {table3_data['selector']['local-text']['gemini-1.5-pro']:.3f} & {table3_data['selector']['local-text']['gemini-1.5-flash']:.3f} \\\\
        & RR & 
        {table3_data['round-robin']['web-text-redirect']['gpt-4o']:.3f} & {table3_data['round-robin']['web-text-redirect']['gpt-4o-mini']:.3f} & {table3_data['round-robin']['web-text-redirect']['gemini-1.5-pro']:.3f} & {table3_data['round-robin']['web-text-redirect']['gemini-1.5-flash']:.3f} & 
        {table3_data['round-robin']['local-text']['gpt-4o']:.3f} & {table3_data['round-robin']['local-text']['gpt-4o-mini']:.3f} & {table3_data['round-robin']['local-text']['gemini-1.5-pro']:.3f} & {table3_data['round-robin']['local-text']['gemini-1.5-flash']:.3f} \\\\""" +
    """\n\\end{tabular}
\\end{table}""")


print("""\\begin{table}[h]
    \\centering
    \\begin{tabular}{llcccccccccccc}
        \\toprule
        & & \\multicolumn{4}{c}{\\textbf{Web Text Single File}} & \\multicolumn{4}{c}{\\textbf{Web Image}}\\
        \\textbf{} & \\textbf{Orch.} & 4o & mini & pro & flash & 4o & mini & pro & flash & 4o & mini & pro & flash \\\\ 
        \\midrule
        \\multirow{3}{*}{AG} 
        & MO & """ + \
        f"""{table3_data['magentic-one']['web-text-single-file']['gpt-4o']:.3f} & {table3_data['magentic-one']['web-text-single-file']['gpt-4o-mini']:.3f} & {table3_data['magentic-one']['web-text-single-file']['gemini-1.5-pro']:.3f} & {table3_data['magentic-one']['web-text-single-file']['gemini-1.5-flash']:.3f} &
          {table3_data['magentic-one']['web-image']['gpt-4o']:.3f} & {table3_data['magentic-one']['web-image']['gpt-4o-mini']:.3f} & {table3_data['magentic-one']['web-image']['gemini-1.5-pro']:.3f} & {table3_data['magentic-one']['web-image']['gemini-1.5-flash']:.3f} \\\\ 
        & Sel. & {table3_data['selector']['web-text-single-file']['gpt-4o']:.3f} & {table3_data['selector']['web-text-single-file']['gpt-4o-mini']:.3f} & {table3_data['selector']['web-text-single-file']['gemini-1.5-pro']:.3f} & {table3_data['selector']['web-text-single-file']['gemini-1.5-flash']:.3f} &
         {table3_data['selector']['web-image']['gpt-4o']:.3f} & {table3_data['selector']['web-image']['gpt-4o-mini']:.3f} & {table3_data['selector']['web-image']['gemini-1.5-pro']:.3f} & {table3_data['selector']['web-image']['gemini-1.5-flash']:.3f} \\\\ 
        & RR & {table3_data['round-robin']['web-text-single-file']['gpt-4o']:.3f} & {table3_data['round-robin']['web-text-single-file']['gpt-4o-mini']:.3f} & {table3_data['round-robin']['web-text-single-file']['gemini-1.5-pro']:.3f} & {table3_data['round-robin']['web-text-single-file']['gemini-1.5-flash']:.3f} &
         {table3_data['round-robin']['web-image']['gpt-4o']:.3f} & {table3_data['round-robin']['web-image']['gpt-4o-mini']:.3f} & {table3_data['round-robin']['web-image']['gemini-1.5-pro']:.3f} & {table3_data['round-robin']['web-image']['gemini-1.5-flash']:.3f} \\\\""" +
    """\n\\end{tabular}
\\end{table}""")

# Prepare data for the fourth table
table4_data = []
for orchestrator in orchestrators:
    orchestrator_df = autogen_df[autogen_df['Orchestrator'] == orchestrator]
    avg_local = round(orchestrator_df[(orchestrator_df['Input Type'] == 'local-text')]['Success'].mean(), 3)
    avg_web_redirect = round(orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-redirect')]['Success'].mean(), 3)
    avg_web_single = round(orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-single-file')]['Success'].mean(), 3)
    avg_web_image = round(orchestrator_df[(orchestrator_df['Input Type'] == 'web-image')]['Success'].mean(), 3)
    table4_data.append([orchestrator, avg_local, avg_web_redirect, avg_web_single, avg_web_image])

# Print the fourth table
print(tabulate(table4_data, headers=["Orchestrator", "Local", "Web Redirect", "Web (Single)", "Web Image"], tablefmt="latex"))

In [None]:
metagpt_df = pd.read_csv('results/metagpt/all_trials.csv', sep='|')
metagpt_df[(metagpt_df['Success']==1) & (metagpt_df['Unsafe Code ID']==1) & (metagpt_df['Refusal']==1)]

orchestrators = metagpt_df['Orchestrator'].unique()

# Prepare data for the refusal rates table
refusal_table_data = []
for orchestrator in orchestrators:
    orchestrator_df = metagpt_df[metagpt_df['Orchestrator'] == orchestrator]
    ipi_df = orchestrator_df[orchestrator_df['Input Type'].isin(['ipi-web-text', 'ipi-local-text'])]
    refusal_rate_ipi1 = f"{int(ipi_df[ipi_df['Error Type'].str.startswith('ipi1')]['Refusal'].mean() * 100)}%"
    refusal_rate_ipi2 = f"{int(ipi_df[ipi_df['Error Type'].str.startswith('ipi2')]['Refusal'].mean() * 100)}%"
    refusal_rate_ipi3 = f"{int(ipi_df[ipi_df['Error Type'].str.startswith('ipi3')]['Refusal'].mean() * 100)}%"
    refusal_rate_ipi_avg = f"{int(ipi_df['Refusal'].mean() * 100)}%"
    refusal_rate_da = f"{int(orchestrator_df[(orchestrator_df['Input Type'] == 'direct-ask')]['Refusal'].mean() * 100)}%"
    refusal_table_data.append([orchestrator, refusal_rate_ipi1, refusal_rate_ipi2, refusal_rate_ipi3, refusal_rate_ipi_avg, refusal_rate_da])
# Print the refusal rates table
print(tabulate(refusal_table_data, headers=["Orchestrator", "Ignore Previous", "InjecAgent", "AgentDojo DOS", "Avg. IPI", "DA"], tablefmt="latex"))


# Prepare data for the first table
table1_data = []
for orchestrator in orchestrators:
    print(orchestrator)
    orchestrator_df = metagpt_df[metagpt_df['Orchestrator'] == orchestrator]
    total_da_trials = len(orchestrator_df[(orchestrator_df['Input Type'] == 'direct-ask')])
    total_ipi_trials =  len(orchestrator_df[(orchestrator_df['Input Type'] == 'ipi-web-text')])
    total_normal_trials = len(orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-redirect')])
    
    success_count_local = orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-redirect')]['Success'].sum()
    success_count_ipi = orchestrator_df[(orchestrator_df['Error Type'] == 'ipi-web-text')]['Success'].sum()
    success_count_da = orchestrator_df[(orchestrator_df['Input Type'] == 'direct-ask')]['Success'].sum()
    avg_asr = success_count_local / total_normal_trials
    ipi_asr = success_count_ipi / total_ipi_trials
    da_asr = success_count_da / total_da_trials
    table1_data.append([orchestrator, avg_asr, ipi_asr, da_asr])

# Print the first table
print(tabulate(table1_data, headers=["Orchestrator", "Avg. ASR", "IPI ASR", "DA ASR"], tablefmt="latex"))

# Prepare data for the second table
table2_data = []
for orchestrator in orchestrators:
    orchestrator_df = metagpt_df[metagpt_df['Orchestrator'] == orchestrator]
    success_rate_4o = round(orchestrator_df[(orchestrator_df['Model'] == 'gpt-4o') & (orchestrator_df['Input Type'] == 'local-text')]['Success'].mean(), 3)
    success_rate_4o_mini = round(orchestrator_df[(orchestrator_df['Model'] == 'gpt-4o-mini') & (orchestrator_df['Input Type'] == 'local-text')]['Success'].mean(), 3)
    success_rate_gem_pro = round(orchestrator_df[(orchestrator_df['Model'] == 'gemini-1.5-pro') & (orchestrator_df['Input Type'] == 'local-text')]['Success'].mean(), 3)
    success_rate_gem_flash = round(orchestrator_df[(orchestrator_df['Model'] == 'gemini-1.5-flash') & (orchestrator_df['Input Type'] == 'local-text')]['Success'].mean(), 3)
    avg_asr = round(orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-redirect')]['Success'].mean(), 3)
    ipi_asr = round(orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-ipi')]['Success'].mean(), 3)
    da_asr = round(orchestrator_df[(orchestrator_df['Input Type'] == 'direct-ask')]['Success'].mean(), 3)
    table2_data.append([orchestrator, success_rate_4o, success_rate_4o_mini, success_rate_gem_pro, success_rate_gem_flash, avg_asr, ipi_asr, da_asr])

# Print the second table
print(tabulate(table2_data, headers=["Orch.", "4o", "4o-mini", "Gem-1.5-Pro", "Gem-1.5-Flash", "Avg. ASR", "IPI", "DA"], tablefmt="latex"))

# Prepare data for the third table
table3_data = {}
for orchestrator in orchestrators:
    table3_data[orchestrator] = {}
    for input_type in metagpt_df['Input Type'].unique():
        table3_data[orchestrator][input_type] = {}
        for model in metagpt_df['Model'].unique():
            total_trials = len(metagpt_df[(metagpt_df['Orchestrator'] == orchestrator) & (metagpt_df['Input Type'] == input_type) & (metagpt_df['Model'] == model)])
            orchestrator_df = metagpt_df[(metagpt_df['Orchestrator'] == orchestrator) & (metagpt_df['Input Type'] == input_type) & (metagpt_df['Model'] == model)]
            success_count = orchestrator_df['Success'].sum()
            table3_data[orchestrator][input_type][model] = success_count / total_trials

print("""\\begin{table}[h]
    \\centering
    \\begin{tabular}{llcccccccccccc}
        \\toprule
        & & \\multicolumn{4}{c}{\\textbf{Local File}} & \\multicolumn{4}{c}{\\textbf{Web Text Redirect}} \\
        \\textbf{} & \\textbf{Orch.} & 4o & mini & pro & flash & 4o & mini & pro & flash & 4o & mini & pro & flash \\\\ 
        \\midrule
        \\multirow{3}{*}{AG} 
        & Def. & """ + \
        f"""{table3_data['metagpt']['web-text-redirect']['gpt-4o']:.3f} & {table3_data['metagpt']['web-text-redirect']['gpt-4o-mini']:.3f} & {table3_data['metagpt']['web-text-redirect']['gemini-1.5-pro']:.3f} & {table3_data['metagpt']['web-text-redirect']['gemini-1.5-flash']:.3f} & 
        {table3_data['metagpt']['local-text']['gpt-4o']:.3f} & {table3_data['metagpt']['local-text']['gpt-4o-mini']:.3f} & {table3_data['metagpt']['local-text']['gemini-1.5-pro']:.3f} & {table3_data['metagpt']['local-text']['gemini-1.5-flash']:.3f}""" + \
    """\n\\end{tabular}
\\end{table}""")

# Prepare data for the fourth table
table4_data = []
for orchestrator in orchestrators:
    orchestrator_df = metagpt_df[metagpt_df['Orchestrator'] == orchestrator]
    avg_local = round(orchestrator_df[(orchestrator_df['Input Type'] == 'local-text')]['Success'].mean(), 3)
    avg_web_redirect = round(orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-redirect')]['Success'].mean(), 3)
    table4_data.append([orchestrator, avg_local, avg_web_redirect])

# Print the fourth table
print(tabulate(table4_data, headers=["Orchestrator", "Local", "Web Redirect"], tablefmt="latex"))


In [None]:
# Analysis of Success in Local and Web-Redirect by Error Type

# Group the data by 'Input Type' and 'Error Type' and calculate the mean success rate
grouped_data = df.groupby(['Input Type', 'Error Type'])['Success'].mean().reset_index()

# Separate the data for local-text and web-text-redirect
local_text_data = grouped_data[grouped_data['Input Type'] == 'local-text']
web_text_redirect_data = grouped_data[grouped_data['Input Type'] == 'web-text-redirect']

# Print the analysis
print("Analysis of Success in Local Text by Error Type:")
print(tabulate(local_text_data, headers=["Input Type", "Error Type", "Mean Success Rate"], tablefmt="grid"))

print("\nAnalysis of Success in Web Text Redirect by Error Type:")
print(tabulate(web_text_redirect_data, headers=["Input Type", "Error Type", "Mean Success Rate"], tablefmt="grid"))

# Plot the analysis
import matplotlib.pyplot as plt

# Plot for local-text
plt.figure(figsize=(10, 5))
plt.bar(local_text_data['Error Type'], local_text_data['Success'], color='blue')
plt.xlabel('Error Type')
plt.ylabel('Mean Success Rate')
plt.title('Mean Success Rate for Local Text by Error Type')
plt.show()

# Plot for web-text-redirect
plt.figure(figsize=(10, 5))
plt.bar(web_text_redirect_data['Error Type'], web_text_redirect_data['Success'], color='green')
plt.xlabel('Error Type')
plt.ylabel('Mean Success Rate')
plt.title('Mean Success Rate for Web Text Redirect by Error Type')
plt.show()


In [None]:
# Analysis of Success in Local and Web-Redirect by Error Type excluding gemini-1.5-flash

# Filter out entries related to gemini-1.5-flash
filtered_df = df[df['Model'] != 'gemini-1.5-flash']

# Group the filtered data by 'Input Type' and 'Error Type' and calculate the mean success rate
grouped_data = filtered_df.groupby(['Input Type', 'Error Type'])['Success'].mean().reset_index()

# Separate the data for local-text and web-text-redirect
local_text_data = grouped_data[grouped_data['Input Type'] == 'local-text']
web_text_redirect_data = grouped_data[grouped_data['Input Type'] == 'web-text-redirect']

# Print the analysis
print("Analysis of Success in Local Text by Error Type (excluding gemini-1.5-flash):")
print(tabulate(local_text_data, headers=["Input Type", "Error Type", "Mean Success Rate"], tablefmt="grid"))

print("\nAnalysis of Success in Web Text Redirect by Error Type (excluding gemini-1.5-flash):")
print(tabulate(web_text_redirect_data, headers=["Input Type", "Error Type", "Mean Success Rate"], tablefmt="grid"))

In [2]:
import json

def find_completion():
    target_path = "web-text-redirect/selector/gemini-1.5-pro/wordpress/query_2_trial_10.txt"
    
    try:
        with open('results/autogen/batch_output.jsonl', 'r') as f:
            for line in f:
                result = json.loads(line)
                # Look for metadata or file path in the request
                request = result.get('request', {}).get('body', {}).get('messages', [])
                for msg in request:
                    if target_path in str(msg):
                        # Found our target completion
                        response = result.get('response', {}).get('body', {})
                        if response:
                            for choice in response.get('choices', []):
                                message = choice.get('message', {})
                                if message.get('role') == 'assistant':
                                    content = message.get('content')
                                    if content:
                                        try:
                                            analysis = json.loads(content)
                                            print("Analysis for", target_path)
                                            print("-" * 80)
                                            for key, value in analysis.items():
                                                print(f"{key}: {value}")
                                            return
                                        except json.JSONDecodeError:
                                            print("Error parsing JSON content")
                                            print("Raw content:", content)
                                            return
    except Exception as e:
        print(f"Error reading file: {e}")

find_completion()