In [None]:
!pip install jailbreakbench
!pip install litellm==0.1.687

In [None]:
def read_artifact_method_model(method, model_name):
    # Load the artifact
    artifact = jbb.read_artifact(
        method=method,
        model_name=model_name
    )
    
    # Extract general parameters (only once, they are outside .jailbreaks[])
    parameters = artifact.parameters.model_dump()
    
    # Create the list of jailbreak objects with additional fields
    jailbreak_list = []
    
    # Extract values from parameters for the additional fields
    model_name = parameters.get('model')
    attack_type = parameters.get('attack_type')
    method = parameters.get('method')
    
    # Create the list by iterating over all jailbreaks
    for jailbreak in artifact.jailbreaks:
        # Convert the jailbreak to a dictionary
        jailbreak_dict = jailbreak.model_dump()
        
        if jailbreak_dict.get('jailbroken') == True:
            # Remove undesired fields
            fields_to_remove = [
                "response", 
                "number_of_queries", 
                "queries_to_jailbreak", 
                "prompt_tokens", 
                "response_tokens", 
                "jailbroken_llama_guard1",
                "jailbroken"
            ]
            
            for field in fields_to_remove:
                jailbreak_dict.pop(field, None)  # pop with None avoids errors if the field does not exist
            
            # Add the three additional fields
            jailbreak_dict['model'] = model_name
            jailbreak_dict['attack_type'] = attack_type
            jailbreak_dict['method'] = method
            
            # Add to the list
            jailbreak_list.append(jailbreak_dict)
    return jailbreak_list

In [None]:
import jailbreakbench as jbb
import json

jailbreak_list = []

methods = ['PAIR', 'JBC', 'GCG', 'DSN']
models = ["gpt-3.5-turbo-1106", "gpt-4-0125-preview", "llama-2-7b-chat-hf", "vicuna-13b-v1.5"]

for method in methods:
    for model in models:
        try:
            artifact = read_artifact_method_model(method, model)
            jailbreak_list += artifact
        except FileNotFoundError:
            print(f"Artifact for {method} + {model} not found, skipping.")
        except Exception as e:
            print(f"Error for {method} + {model}: {e}")


print(f"=== List created with {len(jailbreak_list)} elements ===")

examples = jailbreak_list[0:3]
for example in examples:
    for key, value in example.items():
        print(f"{key}:\n{value}\n")

# Optional: save the list to a JSON file
with open('jailbreakbench_data.json', 'w', encoding='utf-8') as f:
    json.dump(jailbreak_list, f, indent=2, ensure_ascii=False)

print(f"List saved in 'jailbreak_data.json' with {len(jailbreak_list)} elements")
