In [34]:
import os
models = ['vicuna']
select_search_values = [45]
cosdecays=[False]
betas=[0]
similarities=['llm']
folders=['vicuna_lasthiddenstate']
# Shell脚本模板
script_template = """#!/bin/bash
export WANDB_MODE=disabled
# Optionally set the cache for transformers
# export TRANSFORMERS_CACHE='YOUR_PATH/huggingface'
export n=25
export model={model}
export device=0
export beta={beta}
export cosinedecay={cosdecay}
export search={select}
export select={search}
export folder={folder}
export similarity={similarity}
# Create results folder if it doesn't exist
if [ ! -d "../results" ]; then
    mkdir "../results"
    echo "Folder '../results' created."
else
    echo "Folder '../results' already exists."
fi
run(){{
    round=$1
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/launch_scripts
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results

    nohup python -u ../main.py \\
        --config="../configs/transfer_${{model}}.py" \\
        --config.attack=gcg \\
        --config.train_data="../../data/advbench/harmful_behaviors.csv" \\
        --config.result_prefix="$HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}" \\
        --config.progressive_goals=False \\
        --config.stop_on_success=False \\
        --config.num_train_models=1 \\
        --config.allow_non_ascii=False \\
        --config.n_train_data=$n \\
        --config.n_steps=500 \\
        --config.test_steps=50 \\
        --config.batch_size=512 \\
        --config.searchalpha=${{search}} \\
        --config.selectalpha=${{select}} \\
        --config.beta=${{beta}} \\
        --config.cosine_decay=${{cosinedecay}} \\
        --config.similarity=${{similarity}} \\
        --config.devices[0]="cuda" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/launch_scripts/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.log 2>&1 &
    pid=$!
    wait $pid
    echo "multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.json" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt
}}

evaluate(){{
    eval_method=$1
    if [ $eval_method = harmbench ] || [ $eval_method=kw_matching ]; then
        eval "$(conda shell.bash hook)"
        conda activate jailbreak
    elif [ $eval_method = llamaguard ];then
        eval "$(conda shell.bash hook)"
        conda activate llamaguard
    fi
    
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts/${{eval_method}}
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval/${{eval_method}}
    nohup python -u ../evaluate_${{model}}.py \\
        --config="../configs/transfer_${{model}}.py"\\
        --config.train_data="../../data/advbench/harmful_behaviors.csv" \\
        --config.logfile="$HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results/$2"\\
        --config.n_train_data=$n\\
        --config.n_test_data=100\\
        --config.eval_method=${{eval_method}} \\
        --config.devices[0]="cuda:${{device}}" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts/${{eval_method}}/$2.log 2>&1 &
    pid=$!
    wait $pid
}}

export round=1
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file

export round=2
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file

export round=3
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file
"""
for model in models:
    for beta in betas:
        for cosdecay in cosdecays:
            for select_search in select_search_values:
                for similarity in similarities:
                    for folder in folders:
                        script_content = script_template.format(
                            model=model,
                            select=select_search,
                            search=select_search,
                            cosdecay=cosdecay,
                            beta=beta,
                            folder=folder,
                            similarity=similarity
                        )
                        script_filename = f'run_gcg_multiple_{model}_search_{select_search}_beta_{beta}_select_{select_search}_cosinedecay_{cosdecay}_similarity_{similarity}123.sh'
                        script_path = os.path.join(os.getcwd(), script_filename)
                        
                        with open(script_path, 'w') as script_file:
                            script_file.write(script_content)

slurm_template="""#!/bin/bash
#SBATCH --job-name run_gcg_multiple_{model}_search_{search}_beta_{beta}_select_{select}_cosinedecay_{cosdecay}_similarity_{similarity}123.sh        
#SBATCH --partition critical
#SBATCH -n 2
#SBATCH --nodes 1
#SBATCH --ntasks 1                  
#SBATCH --cpus-per-task=2
#SBATCH --mem-per-cpu=16GB
#SBATCH --gres gpu:NVIDIAA40:1
#SBATCH --time 5-00:00:00
#SBATCH --output log.log
#SBATCH --mail-user 17755923416@163.com
#SBATCH --mail-type ALL
#SBATCH -A wangwj1-critical

srun bash run_gcg_multiple_{model}_search_{search}_beta_{beta}_select_{select}_cosinedecay_{cosdecay}_similarity_{similarity}123.sh
"""
for model in models:
    for beta in betas:
        for cosdecay in cosdecays:
            for select_search in select_search_values:
                for similarity in similarities:
                    for folder in folders:
                        slurm_content = slurm_template.format(
                            model=model,
                            select=select_search,
                            search=select_search,
                            cosdecay=cosdecay,
                            beta=beta,
                            folder=folder,
                            similarity=similarity
                        )
                        script_filename = f'run_gcg_multiple_{model}_search_{select_search}_beta_{beta}_select_{select_search}_cosinedecay_{cosdecay}_similarity_{similarity}123.slurm'
                        script_path = os.path.join(os.getcwd(), script_filename)
                        
                        with open(script_path, 'w') as script_file:
                            script_file.write(slurm_content)

In [35]:
import os
models = ['vicuna']
select_search_values = [45]
cosdecays=[False]
betas=[0]
similarities=['llm']
folders=['vicuna_lasthiddenstate']
# Shell脚本模板
script_template = """#!/bin/bash
export WANDB_MODE=disabled
# Optionally set the cache for transformers
# export TRANSFORMERS_CACHE='YOUR_PATH/huggingface'
export n=25
export model={model}
export device=0
export beta={beta}
export cosinedecay={cosdecay}
export search={select}
export select={search}
export folder={folder}
export similarity={similarity}
# Create results folder if it doesn't exist
if [ ! -d "../results" ]; then
    mkdir "../results"
    echo "Folder '../results' created."
else
    echo "Folder '../results' already exists."
fi
run(){{
    round=$1
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/launch_scripts
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results

    nohup python -u ../main.py \\
        --config="../configs/transfer_${{model}}.py" \\
        --config.attack=gcg \\
        --config.train_data="../../data/advbench/harmful_behaviors.csv" \\
        --config.result_prefix="$HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}" \\
        --config.progressive_goals=False \\
        --config.stop_on_success=False \\
        --config.num_train_models=1 \\
        --config.allow_non_ascii=False \\
        --config.n_train_data=$n \\
        --config.n_steps=500 \\
        --config.test_steps=50 \\
        --config.batch_size=512 \\
        --config.searchalpha=${{search}} \\
        --config.selectalpha=${{select}} \\
        --config.beta=${{beta}} \\
        --config.cosine_decay=${{cosinedecay}} \\
        --config.similarity=${{similarity}} \\
        --config.devices[0]="cuda" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/launch_scripts/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.log 2>&1 &
    pid=$!
    wait $pid
    echo "multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.json" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt
}}

evaluate(){{
    eval_method=$1
    if [ $eval_method = harmbench ] || [ $eval_method=kw_matching ]; then
        eval "$(conda shell.bash hook)"
        conda activate jailbreak
    elif [ $eval_method = llamaguard ];then
        eval "$(conda shell.bash hook)"
        conda activate llamaguard
    fi
    
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts/${{eval_method}}
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval/${{eval_method}}
    nohup python -u ../evaluate_${{model}}.py \\
        --config="../configs/transfer_${{model}}.py"\\
        --config.train_data="../../data/advbench/harmful_behaviors.csv" \\
        --config.logfile="$HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results/$2"\\
        --config.n_train_data=$n\\
        --config.n_test_data=100\\
        --config.eval_method=${{eval_method}} \\
        --config.devices[0]="cuda:${{device}}" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts/${{eval_method}}/$2.log 2>&1 &
    pid=$!
    wait $pid
}}

export round=4
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file

export round=5
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file

export round=6
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file
"""
for model in models:
    for beta in betas:
        for cosdecay in cosdecays:
            for select_search in select_search_values:
                for similarity in similarities:
                    for folder in folders:
                        script_content = script_template.format(
                            model=model,
                            select=select_search,
                            search=select_search,
                            cosdecay=cosdecay,
                            beta=beta,
                            folder=folder,
                            similarity=similarity
                        )
                        script_filename = f'run_gcg_multiple_{model}_search_{select_search}_beta_{beta}_select_{select_search}_cosinedecay_{cosdecay}_similarity_{similarity}456.sh'
                        script_path = os.path.join(os.getcwd(), script_filename)
                        
                        with open(script_path, 'w') as script_file:
                            script_file.write(script_content)

slurm_template="""#!/bin/bash
#SBATCH --job-name run_gcg_multiple_{model}_search_{search}_beta_{beta}_select_{select}_cosinedecay_{cosdecay}_similarity_{similarity}456.sh        
#SBATCH --partition critical
#SBATCH -n 2
#SBATCH --nodes 1
#SBATCH --ntasks 1                  
#SBATCH --cpus-per-task=2
#SBATCH --mem-per-cpu=16GB
#SBATCH --gres gpu:NVIDIAA40:1
#SBATCH --time 5-00:00:00
#SBATCH --output log.log
#SBATCH --mail-user 17755923416@163.com
#SBATCH --mail-type ALL
#SBATCH -A wangwj1-critical

srun bash run_gcg_multiple_{model}_search_{search}_beta_{beta}_select_{select}_cosinedecay_{cosdecay}_similarity_{similarity}456.sh
"""
for model in models:
    for beta in betas:
        for cosdecay in cosdecays:
            for select_search in select_search_values:
                for similarity in similarities:
                    for folder in folders:
                        slurm_content = slurm_template.format(
                            model=model,
                            select=select_search,
                            search=select_search,
                            cosdecay=cosdecay,
                            beta=beta,
                            folder=folder,
                            similarity=similarity
                        )
                        script_filename = f'run_gcg_multiple_{model}_search_{select_search}_beta_{beta}_select_{select_search}_cosinedecay_{cosdecay}_similarity_{similarity}456.slurm'
                        script_path = os.path.join(os.getcwd(), script_filename)
                        
                        with open(script_path, 'w') as script_file:
                            script_file.write(slurm_content)

In [36]:
import os
models = ['vicuna']
select_search_values = [30]
cosdecays=[False]
betas=[0]
similarities=['mean']
folders=['vicuna_mean']
# Shell脚本模板
script_template = """#!/bin/bash
export WANDB_MODE=disabled
# Optionally set the cache for transformers
# export TRANSFORMERS_CACHE='YOUR_PATH/huggingface'
export n=25
export model={model}
export device=0
export beta={beta}
export cosinedecay={cosdecay}
export search={select}
export select={search}
export folder={folder}
export similarity={similarity}
# Create results folder if it doesn't exist
if [ ! -d "../results" ]; then
    mkdir "../results"
    echo "Folder '../results' created."
else
    echo "Folder '../results' already exists."
fi
run(){{
    round=$1
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/launch_scripts
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results

    nohup python -u ../main.py \\
        --config="../configs/transfer_${{model}}.py" \\
        --config.attack=gcg \\
        --config.train_data="../../data/advbench/harmful_behaviors.csv" \\
        --config.result_prefix="$HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}" \\
        --config.progressive_goals=False \\
        --config.stop_on_success=False \\
        --config.num_train_models=1 \\
        --config.allow_non_ascii=False \\
        --config.n_train_data=$n \\
        --config.n_steps=500 \\
        --config.test_steps=50 \\
        --config.batch_size=512 \\
        --config.searchalpha=${{search}} \\
        --config.selectalpha=${{select}} \\
        --config.beta=${{beta}} \\
        --config.cosine_decay=${{cosinedecay}} \\
        --config.similarity=${{similarity}} \\
        --config.devices[0]="cuda" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/launch_scripts/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.log 2>&1 &
    pid=$!
    wait $pid
    echo "multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.json" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt
}}

evaluate(){{
    eval_method=$1
    if [ $eval_method = harmbench ] || [ $eval_method=kw_matching ]; then
        eval "$(conda shell.bash hook)"
        conda activate jailbreak
    elif [ $eval_method = llamaguard ];then
        eval "$(conda shell.bash hook)"
        conda activate llamaguard
    fi
    
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts/${{eval_method}}
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval/${{eval_method}}
    nohup python -u ../evaluate_${{model}}.py \\
        --config="../configs/transfer_${{model}}.py"\\
        --config.train_data="../../data/advbench/harmful_behaviors.csv" \\
        --config.logfile="$HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results/$2"\\
        --config.n_train_data=$n\\
        --config.n_test_data=100\\
        --config.eval_method=${{eval_method}} \\
        --config.devices[0]="cuda:${{device}}" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts/${{eval_method}}/$2.log 2>&1 &
    pid=$!
    wait $pid
}}

export round=1
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file

export round=2
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file

export round=3
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file
"""
for model in models:
    for beta in betas:
        for cosdecay in cosdecays:
            for select_search in select_search_values:
                for similarity in similarities:
                    for folder in folders:
                        script_content = script_template.format(
                            model=model,
                            select=select_search,
                            search=select_search,
                            cosdecay=cosdecay,
                            beta=beta,
                            folder=folder,
                            similarity=similarity
                        )
                        script_filename = f'run_gcg_multiple_{model}_search_{select_search}_beta_{beta}_select_{select_search}_cosinedecay_{cosdecay}_similarity_{similarity}123.sh'
                        script_path = os.path.join(os.getcwd(), script_filename)
                        
                        with open(script_path, 'w') as script_file:
                            script_file.write(script_content)

slurm_template="""#!/bin/bash
#SBATCH --job-name run_gcg_multiple_{model}_search_{search}_beta_{beta}_select_{select}_cosinedecay_{cosdecay}_similarity_{similarity}123.sh        
#SBATCH --partition critical
#SBATCH -n 2
#SBATCH --nodes 1
#SBATCH --ntasks 1                  
#SBATCH --cpus-per-task=2
#SBATCH --mem-per-cpu=16GB
#SBATCH --gres gpu:NVIDIAA40:1
#SBATCH --time 5-00:00:00
#SBATCH --output log.log
#SBATCH --mail-user 17755923416@163.com
#SBATCH --mail-type ALL
#SBATCH -A wangwj1-critical

srun bash run_gcg_multiple_{model}_search_{search}_beta_{beta}_select_{select}_cosinedecay_{cosdecay}_similarity_{similarity}123.sh
"""
for model in models:
    for beta in betas:
        for cosdecay in cosdecays:
            for select_search in select_search_values:
                for similarity in similarities:
                    for folder in folders:
                        slurm_content = slurm_template.format(
                            model=model,
                            select=select_search,
                            search=select_search,
                            cosdecay=cosdecay,
                            beta=beta,
                            folder=folder,
                            similarity=similarity
                        )
                        script_filename = f'run_gcg_multiple_{model}_search_{select_search}_beta_{beta}_select_{select_search}_cosinedecay_{cosdecay}_similarity_{similarity}123.slurm'
                        script_path = os.path.join(os.getcwd(), script_filename)
                        
                        with open(script_path, 'w') as script_file:
                            script_file.write(slurm_content)

In [37]:
import os
models = ['vicuna']
select_search_values = [30]
cosdecays=[False]
betas=[0]
similarities=['mean']
folders=['vicuna_mean']
# Shell脚本模板
script_template = """#!/bin/bash
export WANDB_MODE=disabled
# Optionally set the cache for transformers
# export TRANSFORMERS_CACHE='YOUR_PATH/huggingface'
export n=25
export model={model}
export device=0
export beta={beta}
export cosinedecay={cosdecay}
export search={select}
export select={search}
export folder={folder}
export similarity={similarity}
# Create results folder if it doesn't exist
if [ ! -d "../results" ]; then
    mkdir "../results"
    echo "Folder '../results' created."
else
    echo "Folder '../results' already exists."
fi
run(){{
    round=$1
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/launch_scripts
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results

    nohup python -u ../main.py \\
        --config="../configs/transfer_${{model}}.py" \\
        --config.attack=gcg \\
        --config.train_data="../../data/advbench/harmful_behaviors.csv" \\
        --config.result_prefix="$HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}" \\
        --config.progressive_goals=False \\
        --config.stop_on_success=False \\
        --config.num_train_models=1 \\
        --config.allow_non_ascii=False \\
        --config.n_train_data=$n \\
        --config.n_steps=500 \\
        --config.test_steps=50 \\
        --config.batch_size=512 \\
        --config.searchalpha=${{search}} \\
        --config.selectalpha=${{select}} \\
        --config.beta=${{beta}} \\
        --config.cosine_decay=${{cosinedecay}} \\
        --config.similarity=${{similarity}} \\
        --config.devices[0]="cuda" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/launch_scripts/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.log 2>&1 &
    pid=$!
    wait $pid
    echo "multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.json" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt
}}

evaluate(){{
    eval_method=$1
    if [ $eval_method = harmbench ] || [ $eval_method=kw_matching ]; then
        eval "$(conda shell.bash hook)"
        conda activate jailbreak
    elif [ $eval_method = llamaguard ];then
        eval "$(conda shell.bash hook)"
        conda activate llamaguard
    fi
    
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts/${{eval_method}}
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval/${{eval_method}}
    nohup python -u ../evaluate_${{model}}.py \\
        --config="../configs/transfer_${{model}}.py"\\
        --config.train_data="../../data/advbench/harmful_behaviors.csv" \\
        --config.logfile="$HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results/$2"\\
        --config.n_train_data=$n\\
        --config.n_test_data=100\\
        --config.eval_method=${{eval_method}} \\
        --config.devices[0]="cuda:${{device}}" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts/${{eval_method}}/$2.log 2>&1 &
    pid=$!
    wait $pid
}}

export round=4
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file

export round=5
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file

export round=6
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file
"""
for model in models:
    for beta in betas:
        for cosdecay in cosdecays:
            for select_search in select_search_values:
                for similarity in similarities:
                    for folder in folders:
                        script_content = script_template.format(
                            model=model,
                            select=select_search,
                            search=select_search,
                            cosdecay=cosdecay,
                            beta=beta,
                            folder=folder,
                            similarity=similarity
                        )
                        script_filename = f'run_gcg_multiple_{model}_search_{select_search}_beta_{beta}_select_{select_search}_cosinedecay_{cosdecay}_similarity_{similarity}456.sh'
                        script_path = os.path.join(os.getcwd(), script_filename)
                        
                        with open(script_path, 'w') as script_file:
                            script_file.write(script_content)

slurm_template="""#!/bin/bash
#SBATCH --job-name run_gcg_multiple_{model}_search_{search}_beta_{beta}_select_{select}_cosinedecay_{cosdecay}_similarity_{similarity}456.sh        
#SBATCH --partition critical
#SBATCH -n 2
#SBATCH --nodes 1
#SBATCH --ntasks 1                  
#SBATCH --cpus-per-task=2
#SBATCH --mem-per-cpu=16GB
#SBATCH --gres gpu:NVIDIAA40:1
#SBATCH --time 5-00:00:00
#SBATCH --output log.log
#SBATCH --mail-user 17755923416@163.com
#SBATCH --mail-type ALL
#SBATCH -A wangwj1-critical

srun bash run_gcg_multiple_{model}_search_{search}_beta_{beta}_select_{select}_cosinedecay_{cosdecay}_similarity_{similarity}456.sh
"""
for model in models:
    for beta in betas:
        for cosdecay in cosdecays:
            for select_search in select_search_values:
                for similarity in similarities:
                    for folder in folders:
                        slurm_content = slurm_template.format(
                            model=model,
                            select=select_search,
                            search=select_search,
                            cosdecay=cosdecay,
                            beta=beta,
                            folder=folder,
                            similarity=similarity
                        )
                        script_filename = f'run_gcg_multiple_{model}_search_{select_search}_beta_{beta}_select_{select_search}_cosinedecay_{cosdecay}_similarity_{similarity}456.slurm'
                        script_path = os.path.join(os.getcwd(), script_filename)
                        
                        with open(script_path, 'w') as script_file:
                            script_file.write(slurm_content)

In [38]:
import os
models = ['llama2']
select_search_values = [40]
cosdecays=[False]
betas=[0]
similarities=['llm']
folders=['llama2_lasthiddenstate']
# Shell脚本模板
script_template = """#!/bin/bash
export WANDB_MODE=disabled
# Optionally set the cache for transformers
# export TRANSFORMERS_CACHE='YOUR_PATH/huggingface'
export n=25
export model={model}
export device=0
export beta={beta}
export cosinedecay={cosdecay}
export search={select}
export select={search}
export folder={folder}
export similarity={similarity}
# Create results folder if it doesn't exist
if [ ! -d "../results" ]; then
    mkdir "../results"
    echo "Folder '../results' created."
else
    echo "Folder '../results' already exists."
fi
run(){{
    round=$1
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/launch_scripts
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results

    nohup python -u ../main.py \\
        --config="../configs/transfer_${{model}}.py" \\
        --config.attack=gcg \\
        --config.train_data="../../data/advbench/harmful_behaviors.csv" \\
        --config.result_prefix="$HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}" \\
        --config.progressive_goals=False \\
        --config.stop_on_success=False \\
        --config.num_train_models=1 \\
        --config.allow_non_ascii=False \\
        --config.n_train_data=$n \\
        --config.n_steps=500 \\
        --config.test_steps=50 \\
        --config.batch_size=512 \\
        --config.searchalpha=${{search}} \\
        --config.selectalpha=${{select}} \\
        --config.beta=${{beta}} \\
        --config.cosine_decay=${{cosinedecay}} \\
        --config.similarity=${{similarity}} \\
        --config.devices[0]="cuda" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/launch_scripts/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.log 2>&1 &
    pid=$!
    wait $pid
    echo "multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.json" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt
}}

evaluate(){{
    eval_method=$1
    if [ $eval_method = harmbench ] || [ $eval_method=kw_matching ]; then
        eval "$(conda shell.bash hook)"
        conda activate jailbreak
    elif [ $eval_method = llamaguard ];then
        eval "$(conda shell.bash hook)"
        conda activate llamaguard
    fi
    
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts/${{eval_method}}
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval/${{eval_method}}
    nohup python -u ../evaluate_${{model}}.py \\
        --config="../configs/transfer_${{model}}.py"\\
        --config.train_data="../../data/advbench/harmful_behaviors.csv" \\
        --config.logfile="$HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results/$2"\\
        --config.n_train_data=$n\\
        --config.n_test_data=100\\
        --config.eval_method=${{eval_method}} \\
        --config.devices[0]="cuda:${{device}}" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts/${{eval_method}}/$2.log 2>&1 &
    pid=$!
    wait $pid
}}

export round=1
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file

export round=2
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file
"""
for model in models:
    for beta in betas:
        for cosdecay in cosdecays:
            for select_search in select_search_values:
                for similarity in similarities:
                    for folder in folders:
                        script_content = script_template.format(
                            model=model,
                            select=select_search,
                            search=select_search,
                            cosdecay=cosdecay,
                            beta=beta,
                            folder=folder,
                            similarity=similarity
                        )
                        script_filename = f'run_gcg_multiple_{model}_search_{select_search}_beta_{beta}_select_{select_search}_cosinedecay_{cosdecay}_similarity_{similarity}12.sh'
                        script_path = os.path.join(os.getcwd(), script_filename)
                        
                        with open(script_path, 'w') as script_file:
                            script_file.write(script_content)

slurm_template="""#!/bin/bash
#SBATCH --job-name run_gcg_multiple_{model}_search_{search}_beta_{beta}_select_{select}_cosinedecay_{cosdecay}_similarity_{similarity}12.sh        
#SBATCH --partition critical
#SBATCH -n 2
#SBATCH --nodes 1
#SBATCH --ntasks 1                  
#SBATCH --cpus-per-task=2
#SBATCH --mem-per-cpu=16GB
#SBATCH --gres gpu:NVIDIAA40:1
#SBATCH --time 5-00:00:00
#SBATCH --output log.log
#SBATCH --mail-user 17755923416@163.com
#SBATCH --mail-type ALL
#SBATCH -A wangwj1-critical

srun bash run_gcg_multiple_{model}_search_{search}_beta_{beta}_select_{select}_cosinedecay_{cosdecay}_similarity_{similarity}12.sh
"""
for model in models:
    for beta in betas:
        for cosdecay in cosdecays:
            for select_search in select_search_values:
                for similarity in similarities:
                    for folder in folders:
                        slurm_content = slurm_template.format(
                            model=model,
                            select=select_search,
                            search=select_search,
                            cosdecay=cosdecay,
                            beta=beta,
                            folder=folder,
                            similarity=similarity
                        )
                        script_filename = f'run_gcg_multiple_{model}_search_{select_search}_beta_{beta}_select_{select_search}_cosinedecay_{cosdecay}_similarity_{similarity}12.slurm'
                        script_path = os.path.join(os.getcwd(), script_filename)
                        
                        with open(script_path, 'w') as script_file:
                            script_file.write(slurm_content)

In [39]:
import os
models = ['llama2']
select_search_values = [40]
cosdecays=[False]
betas=[0]
similarities=['llm']
folders=['llama2_lasthiddenstate']
# Shell脚本模板
script_template = """#!/bin/bash
export WANDB_MODE=disabled
# Optionally set the cache for transformers
# export TRANSFORMERS_CACHE='YOUR_PATH/huggingface'
export n=25
export model={model}
export device=0
export beta={beta}
export cosinedecay={cosdecay}
export search={select}
export select={search}
export folder={folder}
export similarity={similarity}
# Create results folder if it doesn't exist
if [ ! -d "../results" ]; then
    mkdir "../results"
    echo "Folder '../results' created."
else
    echo "Folder '../results' already exists."
fi
run(){{
    round=$1
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/launch_scripts
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results

    nohup python -u ../main.py \\
        --config="../configs/transfer_${{model}}.py" \\
        --config.attack=gcg \\
        --config.train_data="../../data/advbench/harmful_behaviors.csv" \\
        --config.result_prefix="$HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}" \\
        --config.progressive_goals=False \\
        --config.stop_on_success=False \\
        --config.num_train_models=1 \\
        --config.allow_non_ascii=False \\
        --config.n_train_data=$n \\
        --config.n_steps=500 \\
        --config.test_steps=50 \\
        --config.batch_size=512 \\
        --config.searchalpha=${{search}} \\
        --config.selectalpha=${{select}} \\
        --config.beta=${{beta}} \\
        --config.cosine_decay=${{cosinedecay}} \\
        --config.similarity=${{similarity}} \\
        --config.devices[0]="cuda" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/launch_scripts/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.log 2>&1 &
    pid=$!
    wait $pid
    echo "multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.json" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt
}}

evaluate(){{
    eval_method=$1
    if [ $eval_method = harmbench ] || [ $eval_method=kw_matching ]; then
        eval "$(conda shell.bash hook)"
        conda activate jailbreak
    elif [ $eval_method = llamaguard ];then
        eval "$(conda shell.bash hook)"
        conda activate llamaguard
    fi
    
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts/${{eval_method}}
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval/${{eval_method}}
    nohup python -u ../evaluate_${{model}}.py \\
        --config="../configs/transfer_${{model}}.py"\\
        --config.train_data="../../data/advbench/harmful_behaviors.csv" \\
        --config.logfile="$HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results/$2"\\
        --config.n_train_data=$n\\
        --config.n_test_data=100\\
        --config.eval_method=${{eval_method}} \\
        --config.devices[0]="cuda:${{device}}" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts/${{eval_method}}/$2.log 2>&1 &
    pid=$!
    wait $pid
}}

export round=3
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file

export round=4
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file
"""
for model in models:
    for beta in betas:
        for cosdecay in cosdecays:
            for select_search in select_search_values:
                for similarity in similarities:
                    for folder in folders:
                        script_content = script_template.format(
                            model=model,
                            select=select_search,
                            search=select_search,
                            cosdecay=cosdecay,
                            beta=beta,
                            folder=folder,
                            similarity=similarity
                        )
                        script_filename = f'run_gcg_multiple_{model}_search_{select_search}_beta_{beta}_select_{select_search}_cosinedecay_{cosdecay}_similarity_{similarity}34.sh'
                        script_path = os.path.join(os.getcwd(), script_filename)
                        
                        with open(script_path, 'w') as script_file:
                            script_file.write(script_content)

slurm_template="""#!/bin/bash
#SBATCH --job-name run_gcg_multiple_{model}_search_{search}_beta_{beta}_select_{select}_cosinedecay_{cosdecay}_similarity_{similarity}34.sh        
#SBATCH --partition critical
#SBATCH -n 2
#SBATCH --nodes 1
#SBATCH --ntasks 1                  
#SBATCH --cpus-per-task=2
#SBATCH --mem-per-cpu=16GB
#SBATCH --gres gpu:NVIDIAA40:1
#SBATCH --time 5-00:00:00
#SBATCH --output log.log
#SBATCH --mail-user 17755923416@163.com
#SBATCH --mail-type ALL
#SBATCH -A wangwj1-critical

srun bash run_gcg_multiple_{model}_search_{search}_beta_{beta}_select_{select}_cosinedecay_{cosdecay}_similarity_{similarity}34.sh
"""
for model in models:
    for beta in betas:
        for cosdecay in cosdecays:
            for select_search in select_search_values:
                for similarity in similarities:
                    for folder in folders:
                        slurm_content = slurm_template.format(
                            model=model,
                            select=select_search,
                            search=select_search,
                            cosdecay=cosdecay,
                            beta=beta,
                            folder=folder,
                            similarity=similarity
                        )
                        script_filename = f'run_gcg_multiple_{model}_search_{select_search}_beta_{beta}_select_{select_search}_cosinedecay_{cosdecay}_similarity_{similarity}34.slurm'
                        script_path = os.path.join(os.getcwd(), script_filename)
                        
                        with open(script_path, 'w') as script_file:
                            script_file.write(slurm_content)

In [40]:
import os
models = ['llama2']
select_search_values = [40]
cosdecays=[False]
betas=[0]
similarities=['llm']
folders=['llama2_lasthiddenstate']
# Shell脚本模板
script_template = """#!/bin/bash
export WANDB_MODE=disabled
# Optionally set the cache for transformers
# export TRANSFORMERS_CACHE='YOUR_PATH/huggingface'
export n=25
export model={model}
export device=0
export beta={beta}
export cosinedecay={cosdecay}
export search={select}
export select={search}
export folder={folder}
export similarity={similarity}
# Create results folder if it doesn't exist
if [ ! -d "../results" ]; then
    mkdir "../results"
    echo "Folder '../results' created."
else
    echo "Folder '../results' already exists."
fi
run(){{
    round=$1
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/launch_scripts
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results

    nohup python -u ../main.py \\
        --config="../configs/transfer_${{model}}.py" \\
        --config.attack=gcg \\
        --config.train_data="../../data/advbench/harmful_behaviors.csv" \\
        --config.result_prefix="$HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}" \\
        --config.progressive_goals=False \\
        --config.stop_on_success=False \\
        --config.num_train_models=1 \\
        --config.allow_non_ascii=False \\
        --config.n_train_data=$n \\
        --config.n_steps=500 \\
        --config.test_steps=50 \\
        --config.batch_size=512 \\
        --config.searchalpha=${{search}} \\
        --config.selectalpha=${{select}} \\
        --config.beta=${{beta}} \\
        --config.cosine_decay=${{cosinedecay}} \\
        --config.similarity=${{similarity}} \\
        --config.devices[0]="cuda" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/launch_scripts/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.log 2>&1 &
    pid=$!
    wait $pid
    echo "multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.json" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt
}}

evaluate(){{
    eval_method=$1
    if [ $eval_method = harmbench ] || [ $eval_method=kw_matching ]; then
        eval "$(conda shell.bash hook)"
        conda activate jailbreak
    elif [ $eval_method = llamaguard ];then
        eval "$(conda shell.bash hook)"
        conda activate llamaguard
    fi
    
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts/${{eval_method}}
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval/${{eval_method}}
    nohup python -u ../evaluate_${{model}}.py \\
        --config="../configs/transfer_${{model}}.py"\\
        --config.train_data="../../data/advbench/harmful_behaviors.csv" \\
        --config.logfile="$HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results/$2"\\
        --config.n_train_data=$n\\
        --config.n_test_data=100\\
        --config.eval_method=${{eval_method}} \\
        --config.devices[0]="cuda:${{device}}" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts/${{eval_method}}/$2.log 2>&1 &
    pid=$!
    wait $pid
}}

export round=5
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file

export round=6
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file
"""
for model in models:
    for beta in betas:
        for cosdecay in cosdecays:
            for select_search in select_search_values:
                for similarity in similarities:
                    for folder in folders:
                        script_content = script_template.format(
                            model=model,
                            select=select_search,
                            search=select_search,
                            cosdecay=cosdecay,
                            beta=beta,
                            folder=folder,
                            similarity=similarity
                        )
                        script_filename = f'run_gcg_multiple_{model}_search_{select_search}_beta_{beta}_select_{select_search}_cosinedecay_{cosdecay}_similarity_{similarity}56.sh'
                        script_path = os.path.join(os.getcwd(), script_filename)
                        
                        with open(script_path, 'w') as script_file:
                            script_file.write(script_content)

slurm_template="""#!/bin/bash
#SBATCH --job-name run_gcg_multiple_{model}_search_{search}_beta_{beta}_select_{select}_cosinedecay_{cosdecay}_similarity_{similarity}56.sh        
#SBATCH --partition critical
#SBATCH -n 2
#SBATCH --nodes 1
#SBATCH --ntasks 1                  
#SBATCH --cpus-per-task=2
#SBATCH --mem-per-cpu=16GB
#SBATCH --gres gpu:NVIDIAA40:1
#SBATCH --time 5-00:00:00
#SBATCH --output log.log
#SBATCH --mail-user 17755923416@163.com
#SBATCH --mail-type ALL
#SBATCH -A wangwj1-critical

srun bash run_gcg_multiple_{model}_search_{search}_beta_{beta}_select_{select}_cosinedecay_{cosdecay}_similarity_{similarity}56.sh
"""
for model in models:
    for beta in betas:
        for cosdecay in cosdecays:
            for select_search in select_search_values:
                for similarity in similarities:
                    for folder in folders:
                        slurm_content = slurm_template.format(
                            model=model,
                            select=select_search,
                            search=select_search,
                            cosdecay=cosdecay,
                            beta=beta,
                            folder=folder,
                            similarity=similarity
                        )
                        script_filename = f'run_gcg_multiple_{model}_search_{select_search}_beta_{beta}_select_{select_search}_cosinedecay_{cosdecay}_similarity_{similarity}56.slurm'
                        script_path = os.path.join(os.getcwd(), script_filename)
                        
                        with open(script_path, 'w') as script_file:
                            script_file.write(slurm_content)

In [41]:
import os
models = ['llama2']
select_search_values = [25]
cosdecays=[False]
betas=[0]
similarities=['mean']
folders=['llama2_mean']
# Shell脚本模板
script_template = """#!/bin/bash
export WANDB_MODE=disabled
# Optionally set the cache for transformers
# export TRANSFORMERS_CACHE='YOUR_PATH/huggingface'
export n=25
export model={model}
export device=0
export beta={beta}
export cosinedecay={cosdecay}
export search={select}
export select={search}
export folder={folder}
export similarity={similarity}
# Create results folder if it doesn't exist
if [ ! -d "../results" ]; then
    mkdir "../results"
    echo "Folder '../results' created."
else
    echo "Folder '../results' already exists."
fi
run(){{
    round=$1
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/launch_scripts
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results

    nohup python -u ../main.py \\
        --config="../configs/transfer_${{model}}.py" \\
        --config.attack=gcg \\
        --config.train_data="../../data/advbench/harmful_behaviors.csv" \\
        --config.result_prefix="$HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}" \\
        --config.progressive_goals=False \\
        --config.stop_on_success=False \\
        --config.num_train_models=1 \\
        --config.allow_non_ascii=False \\
        --config.n_train_data=$n \\
        --config.n_steps=500 \\
        --config.test_steps=50 \\
        --config.batch_size=512 \\
        --config.searchalpha=${{search}} \\
        --config.selectalpha=${{select}} \\
        --config.beta=${{beta}} \\
        --config.cosine_decay=${{cosinedecay}} \\
        --config.similarity=${{similarity}} \\
        --config.devices[0]="cuda" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/launch_scripts/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.log 2>&1 &
    pid=$!
    wait $pid
    echo "multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.json" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt
}}

evaluate(){{
    eval_method=$1
    if [ $eval_method = harmbench ] || [ $eval_method=kw_matching ]; then
        eval "$(conda shell.bash hook)"
        conda activate jailbreak
    elif [ $eval_method = llamaguard ];then
        eval "$(conda shell.bash hook)"
        conda activate llamaguard
    fi
    
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts/${{eval_method}}
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval/${{eval_method}}
    nohup python -u ../evaluate_${{model}}.py \\
        --config="../configs/transfer_${{model}}.py"\\
        --config.train_data="../../data/advbench/harmful_behaviors.csv" \\
        --config.logfile="$HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results/$2"\\
        --config.n_train_data=$n\\
        --config.n_test_data=100\\
        --config.eval_method=${{eval_method}} \\
        --config.devices[0]="cuda:${{device}}" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts/${{eval_method}}/$2.log 2>&1 &
    pid=$!
    wait $pid
}}

export round=1
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file

export round=2
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file
"""
for model in models:
    for beta in betas:
        for cosdecay in cosdecays:
            for select_search in select_search_values:
                for similarity in similarities:
                    for folder in folders:
                        script_content = script_template.format(
                            model=model,
                            select=select_search,
                            search=select_search,
                            cosdecay=cosdecay,
                            beta=beta,
                            folder=folder,
                            similarity=similarity
                        )
                        script_filename = f'run_gcg_multiple_{model}_search_{select_search}_beta_{beta}_select_{select_search}_cosinedecay_{cosdecay}_similarity_{similarity}12.sh'
                        script_path = os.path.join(os.getcwd(), script_filename)
                        
                        with open(script_path, 'w') as script_file:
                            script_file.write(script_content)

slurm_template="""#!/bin/bash
#SBATCH --job-name run_gcg_multiple_{model}_search_{search}_beta_{beta}_select_{select}_cosinedecay_{cosdecay}_similarity_{similarity}12.sh        
#SBATCH --partition critical
#SBATCH -n 2
#SBATCH --nodes 1
#SBATCH --ntasks 1                  
#SBATCH --cpus-per-task=2
#SBATCH --mem-per-cpu=16GB
#SBATCH --gres gpu:NVIDIAA40:1
#SBATCH --time 5-00:00:00
#SBATCH --output log.log
#SBATCH --mail-user 17755923416@163.com
#SBATCH --mail-type ALL
#SBATCH -A wangwj1-critical

srun bash run_gcg_multiple_{model}_search_{search}_beta_{beta}_select_{select}_cosinedecay_{cosdecay}_similarity_{similarity}12.sh
"""
for model in models:
    for beta in betas:
        for cosdecay in cosdecays:
            for select_search in select_search_values:
                for similarity in similarities:
                    for folder in folders:
                        slurm_content = slurm_template.format(
                            model=model,
                            select=select_search,
                            search=select_search,
                            cosdecay=cosdecay,
                            beta=beta,
                            folder=folder,
                            similarity=similarity
                        )
                        script_filename = f'run_gcg_multiple_{model}_search_{select_search}_beta_{beta}_select_{select_search}_cosinedecay_{cosdecay}_similarity_{similarity}12.slurm'
                        script_path = os.path.join(os.getcwd(), script_filename)
                        
                        with open(script_path, 'w') as script_file:
                            script_file.write(slurm_content)

In [42]:
import os
models = ['llama2']
select_search_values = [25]
cosdecays=[False]
betas=[0]
similarities=['mean']
folders=['llama2_mean']
# Shell脚本模板
script_template = """#!/bin/bash
export WANDB_MODE=disabled
# Optionally set the cache for transformers
# export TRANSFORMERS_CACHE='YOUR_PATH/huggingface'
export n=25
export model={model}
export device=0
export beta={beta}
export cosinedecay={cosdecay}
export search={select}
export select={search}
export folder={folder}
export similarity={similarity}
# Create results folder if it doesn't exist
if [ ! -d "../results" ]; then
    mkdir "../results"
    echo "Folder '../results' created."
else
    echo "Folder '../results' already exists."
fi
run(){{
    round=$1
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/launch_scripts
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results

    nohup python -u ../main.py \\
        --config="../configs/transfer_${{model}}.py" \\
        --config.attack=gcg \\
        --config.train_data="../../data/advbench/harmful_behaviors.csv" \\
        --config.result_prefix="$HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}" \\
        --config.progressive_goals=False \\
        --config.stop_on_success=False \\
        --config.num_train_models=1 \\
        --config.allow_non_ascii=False \\
        --config.n_train_data=$n \\
        --config.n_steps=500 \\
        --config.test_steps=50 \\
        --config.batch_size=512 \\
        --config.searchalpha=${{search}} \\
        --config.selectalpha=${{select}} \\
        --config.beta=${{beta}} \\
        --config.cosine_decay=${{cosinedecay}} \\
        --config.similarity=${{similarity}} \\
        --config.devices[0]="cuda" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/launch_scripts/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.log 2>&1 &
    pid=$!
    wait $pid
    echo "multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.json" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt
}}

evaluate(){{
    eval_method=$1
    if [ $eval_method = harmbench ] || [ $eval_method=kw_matching ]; then
        eval "$(conda shell.bash hook)"
        conda activate jailbreak
    elif [ $eval_method = llamaguard ];then
        eval "$(conda shell.bash hook)"
        conda activate llamaguard
    fi
    
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts/${{eval_method}}
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval/${{eval_method}}
    nohup python -u ../evaluate_${{model}}.py \\
        --config="../configs/transfer_${{model}}.py"\\
        --config.train_data="../../data/advbench/harmful_behaviors.csv" \\
        --config.logfile="$HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results/$2"\\
        --config.n_train_data=$n\\
        --config.n_test_data=100\\
        --config.eval_method=${{eval_method}} \\
        --config.devices[0]="cuda:${{device}}" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts/${{eval_method}}/$2.log 2>&1 &
    pid=$!
    wait $pid
}}

export round=3
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file

export round=4
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file
"""
for model in models:
    for beta in betas:
        for cosdecay in cosdecays:
            for select_search in select_search_values:
                for similarity in similarities:
                    for folder in folders:
                        script_content = script_template.format(
                            model=model,
                            select=select_search,
                            search=select_search,
                            cosdecay=cosdecay,
                            beta=beta,
                            folder=folder,
                            similarity=similarity
                        )
                        script_filename = f'run_gcg_multiple_{model}_search_{select_search}_beta_{beta}_select_{select_search}_cosinedecay_{cosdecay}_similarity_{similarity}34.sh'
                        script_path = os.path.join(os.getcwd(), script_filename)
                        
                        with open(script_path, 'w') as script_file:
                            script_file.write(script_content)

slurm_template="""#!/bin/bash
#SBATCH --job-name run_gcg_multiple_{model}_search_{search}_beta_{beta}_select_{select}_cosinedecay_{cosdecay}_similarity_{similarity}34.sh        
#SBATCH --partition critical
#SBATCH -n 2
#SBATCH --nodes 1
#SBATCH --ntasks 1                  
#SBATCH --cpus-per-task=2
#SBATCH --mem-per-cpu=16GB
#SBATCH --gres gpu:NVIDIAA40:1
#SBATCH --time 5-00:00:00
#SBATCH --output log.log
#SBATCH --mail-user 17755923416@163.com
#SBATCH --mail-type ALL
#SBATCH -A wangwj1-critical

srun bash run_gcg_multiple_{model}_search_{search}_beta_{beta}_select_{select}_cosinedecay_{cosdecay}_similarity_{similarity}34.sh
"""
for model in models:
    for beta in betas:
        for cosdecay in cosdecays:
            for select_search in select_search_values:
                for similarity in similarities:
                    for folder in folders:
                        slurm_content = slurm_template.format(
                            model=model,
                            select=select_search,
                            search=select_search,
                            cosdecay=cosdecay,
                            beta=beta,
                            folder=folder,
                            similarity=similarity
                        )
                        script_filename = f'run_gcg_multiple_{model}_search_{select_search}_beta_{beta}_select_{select_search}_cosinedecay_{cosdecay}_similarity_{similarity}34.slurm'
                        script_path = os.path.join(os.getcwd(), script_filename)
                        
                        with open(script_path, 'w') as script_file:
                            script_file.write(slurm_content)

In [43]:
import os
models = ['llama2']
select_search_values = [25]
cosdecays=[False]
betas=[0]
similarities=['mean']
folders=['llama2_mean']
# Shell脚本模板
script_template = """#!/bin/bash
export WANDB_MODE=disabled
# Optionally set the cache for transformers
# export TRANSFORMERS_CACHE='YOUR_PATH/huggingface'
export n=25
export model={model}
export device=0
export beta={beta}
export cosinedecay={cosdecay}
export search={select}
export select={search}
export folder={folder}
export similarity={similarity}
# Create results folder if it doesn't exist
if [ ! -d "../results" ]; then
    mkdir "../results"
    echo "Folder '../results' created."
else
    echo "Folder '../results' already exists."
fi
run(){{
    round=$1
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/launch_scripts
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results

    nohup python -u ../main.py \\
        --config="../configs/transfer_${{model}}.py" \\
        --config.attack=gcg \\
        --config.train_data="../../data/advbench/harmful_behaviors.csv" \\
        --config.result_prefix="$HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}" \\
        --config.progressive_goals=False \\
        --config.stop_on_success=False \\
        --config.num_train_models=1 \\
        --config.allow_non_ascii=False \\
        --config.n_train_data=$n \\
        --config.n_steps=500 \\
        --config.test_steps=50 \\
        --config.batch_size=512 \\
        --config.searchalpha=${{search}} \\
        --config.selectalpha=${{select}} \\
        --config.beta=${{beta}} \\
        --config.cosine_decay=${{cosinedecay}} \\
        --config.similarity=${{similarity}} \\
        --config.devices[0]="cuda" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/launch_scripts/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.log 2>&1 &
    pid=$!
    wait $pid
    echo "multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.json" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt
}}

evaluate(){{
    eval_method=$1
    if [ $eval_method = harmbench ] || [ $eval_method=kw_matching ]; then
        eval "$(conda shell.bash hook)"
        conda activate jailbreak
    elif [ $eval_method = llamaguard ];then
        eval "$(conda shell.bash hook)"
        conda activate llamaguard
    fi
    
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts/${{eval_method}}
    mkdir -p $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval/${{eval_method}}
    nohup python -u ../evaluate_${{model}}.py \\
        --config="../configs/transfer_${{model}}.py"\\
        --config.train_data="../../data/advbench/harmful_behaviors.csv" \\
        --config.logfile="$HOME/llm-attacks/experiments/${{folder}}/round${{round}}/results/$2"\\
        --config.n_train_data=$n\\
        --config.n_test_data=100\\
        --config.eval_method=${{eval_method}} \\
        --config.devices[0]="cuda:${{device}}" > $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/eval_scripts/${{eval_method}}/$2.log 2>&1 &
    pid=$!
    wait $pid
}}

export round=5
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file

export round=6
run $round
json_file=`cat $HOME/llm-attacks/experiments/${{folder}}/round${{round}}/multiple_${{model}}_search_${{search}}_beta_${{beta}}_select_${{select}}_cosinedecay_${{cosdecay}}_round_${{round}}.txt`
evaluate harmbench $json_file
evaluate kw_matching $json_file
evaluate llamaguard $json_file
"""
for model in models:
    for beta in betas:
        for cosdecay in cosdecays:
            for select_search in select_search_values:
                for similarity in similarities:
                    for folder in folders:
                        script_content = script_template.format(
                            model=model,
                            select=select_search,
                            search=select_search,
                            cosdecay=cosdecay,
                            beta=beta,
                            folder=folder,
                            similarity=similarity
                        )
                        script_filename = f'run_gcg_multiple_{model}_search_{select_search}_beta_{beta}_select_{select_search}_cosinedecay_{cosdecay}_similarity_{similarity}56.sh'
                        script_path = os.path.join(os.getcwd(), script_filename)
                        
                        with open(script_path, 'w') as script_file:
                            script_file.write(script_content)

slurm_template="""#!/bin/bash
#SBATCH --job-name run_gcg_multiple_{model}_search_{search}_beta_{beta}_select_{select}_cosinedecay_{cosdecay}_similarity_{similarity}56.sh        
#SBATCH --partition critical
#SBATCH -n 2
#SBATCH --nodes 1
#SBATCH --ntasks 1                  
#SBATCH --cpus-per-task=2
#SBATCH --mem-per-cpu=16GB
#SBATCH --gres gpu:NVIDIAA40:1
#SBATCH --time 5-00:00:00
#SBATCH --output log.log
#SBATCH --mail-user 17755923416@163.com
#SBATCH --mail-type ALL
#SBATCH -A wangwj1-critical

srun bash run_gcg_multiple_{model}_search_{search}_beta_{beta}_select_{select}_cosinedecay_{cosdecay}_similarity_{similarity}56.sh
"""
for model in models:
    for beta in betas:
        for cosdecay in cosdecays:
            for select_search in select_search_values:
                for similarity in similarities:
                    for folder in folders:
                        slurm_content = slurm_template.format(
                            model=model,
                            select=select_search,
                            search=select_search,
                            cosdecay=cosdecay,
                            beta=beta,
                            folder=folder,
                            similarity=similarity
                        )
                        script_filename = f'run_gcg_multiple_{model}_search_{select_search}_beta_{beta}_select_{select_search}_cosinedecay_{cosdecay}_similarity_{similarity}56.slurm'
                        script_path = os.path.join(os.getcwd(), script_filename)
                        
                        with open(script_path, 'w') as script_file:
                            script_file.write(slurm_content)