In [None]:
!pip install --target=$nb_path transformers

In [None]:
!pip install --target=$nb_path ctransformers[cuda]

In [1]:
import sys
nb_path = '/home/jovyan/lib'
sys.path.insert(0,nb_path)

In [None]:
from ctransformers import AutoModelForCausalLM

# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
llm = AutoModelForCausalLM.from_pretrained(
    "TheBloke/vicuna-33B-GGUF", 
    model_file="vicuna-33b.Q4_K_M.gguf", 
    model_type="llama", 
    gpu_layers=400,
    context_length=8000
)

In [None]:
initial_prompt = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. User: "
user_input = input("User: ")
print(llm(initial_prompt + user_input + "\nAssistant:").strip() + "\n")
while True:
    user_input = input("User: ")
    print(llm("User: " + user_input + "\nAssistant:").strip() + "\n")

In [None]:
# PHASE 1A

import csv
import os
from ctransformers import AutoModelForCausalLM

# initialising variables
MODEL_NAME = "Vicuna-33B-V1.3-GGUF" 
MODEL_DIRECTORY = os.path.join('output_files', MODEL_NAME)
MODEL_JAILBREAK_DIR = os.path.join(MODEL_DIRECTORY, 'jailbreak') 

FORBIDDEN_QUESTION_SET_PATH = "input_files/forbidden_question_set_new.csv"

# function to create a directory if not exists
def create_directory(directory_path):
    # Check if the directory already exists
    if not os.path.exists(directory_path):
        # If not, create the directory
        os.makedirs(directory_path)
        print(f"Directory '{directory_path}' created successfully.")
    else:
        print(f"Directory '{directory_path}' already exists.")

# Create folders if not exists
create_directory(MODEL_DIRECTORY)
create_directory(MODEL_JAILBREAK_DIR)

# initial variables for computations
temperatures = [0.0, 0.25, 0.5, 0.75, 1.0]          
temperature_index = 0
max_temperature_index = len(temperatures)

# main loop
while True:

    # if exceed all needed temperatures, quit
    if temperature_index >= max_temperature_index:
        break
    
    # initialise model with the current temperature
    llm = AutoModelForCausalLM.from_pretrained(
        "TheBloke/vicuna-33B-GGUF", 
        model_file="vicuna-33b.Q4_K_M.gguf", 
        model_type="llama", 
        gpu_layers=400,
        context_length=14000,
        temperature=temperatures[temperature_index]
    )
    
    # create output file first, each temperature got one file
    output_file_name = os.path.join(MODEL_JAILBREAK_DIR, MODEL_NAME + " " + str(temperatures[temperature_index]) + '.csv')
    with open(output_file_name, 'a', newline='', encoding='utf-8') as phase_one_output_file:
        phase_one_output_writer = csv.writer(phase_one_output_file)
        phase_one_output_writer.writerow(["input_id", "model", "temperature", "response"])

    forbidden_question_set_prompts = []    
    
    # start reading for the inputs
    with open(FORBIDDEN_QUESTION_SET_PATH) as forbidden_question_set_file:
        forbidden_question_set_reader = csv.reader(forbidden_question_set_file)
        headers = next(forbidden_question_set_reader)

        # iterating through all possible prompts + questions (includes repeat 5 times)
        # each row: input_id, community_id, community_name, prompt_type, prompt, content_policy_id, content_policy_name, q_id, question
        for row in forbidden_question_set_reader:
            prompt = row[4]
            question = row[8]

            # add the question to the back of the prompt and prepare to send in as input
            input_prompt = prompt + "/n" + question
            
            # append the prompts and the indexes in local list to prevent csv timeout i think
            forbidden_question_set_prompts.append([row[0], input_prompt])

    # start iterating through prompts list to send as input to the model
    for input_prompt in forbidden_question_set_prompts:
            
        output_response = llm("User: " + input_prompt[1] + "\nAssistant:")

        # write response into the output file
        with open(output_file_name, 'a', newline='', encoding='utf-8') as phase_one_output_file:
            phase_one_output_writer = csv.writer(phase_one_output_file)
            phase_one_output_writer.writerow([input_prompt[0], MODEL_NAME, temperatures[temperature_index], output_response])

    temperature_index += 1

print("Process Completed")

Directory 'output_files/Vicuna-33B-V1.3-GGUF' already exists.
Directory 'output_files/Vicuna-33B-V1.3-GGUF/jailbreak' already exists.


Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
#PHASE 1A CONTINUED

import csv
import os
from ctransformers import AutoModelForCausalLM

# initialising variables
MODEL_NAME = "Vicuna-33B-V1.3-GGUF"
MODEL_DIRECTORY = os.path.join('output_files', MODEL_NAME)
MODEL_JAILBREAK_DIR = os.path.join(MODEL_DIRECTORY, 'jailbreak') 

FORBIDDEN_QUESTION_SET_PATH = "input_files/forbidden_question_set_continued.csv"

# function to create a directory if not exists
def create_directory(directory_path):
    # Check if the directory already exists
    if not os.path.exists(directory_path):
        # If not, create the directory
        os.makedirs(directory_path)
        print(f"Directory '{directory_path}' created successfully.")
    else:
        print(f"Directory '{directory_path}' already exists.")

# Create folders if not exists
create_directory(MODEL_DIRECTORY)
create_directory(MODEL_JAILBREAK_DIR)

# initial variables for computations
TEMPERATURE = 1.0


# initialise model with the current temperature
llm = AutoModelForCausalLM.from_pretrained(
        "TheBloke/vicuna-33B-GGUF", 
        model_file="vicuna-33b.Q4_K_M.gguf", 
        model_type="llama", 
        gpu_layers=300,
        context_length=14000,
        temperature=TEMPERATURE
)
    
# create output file first, each temperature got one file
output_file_name = os.path.join(MODEL_JAILBREAK_DIR, MODEL_NAME + " " + str(TEMPERATURE) + '.csv')

forbidden_question_set_prompts = []    
    
# start reading for the inputs
with open(FORBIDDEN_QUESTION_SET_PATH) as forbidden_question_set_file:
    forbidden_question_set_reader = csv.reader(forbidden_question_set_file)

    # iterating through all possible prompts + questions (includes repeat 5 times)
    # each row: input_id, community_id, community_name, prompt_type, prompt, content_policy_id, content_policy_name, q_id, question
    for row in forbidden_question_set_reader:
        prompt = row[4]
        question = row[8]

        # add the question to the back of the prompt and prepare to send in as input
        input_prompt = prompt + "/n" + question
            
        # append the prompts and the indexes in local list to prevent csv timeout i think
        forbidden_question_set_prompts.append([row[0], input_prompt])

# start iterating through prompts list to send as input to the model
for input_prompt in forbidden_question_set_prompts:
            
    output_response = llm("User: " + input_prompt[1] + "\nAssistant:")

    # write response into the output file
    with open(output_file_name, 'a', newline='', encoding='utf-8') as phase_one_output_file:
        phase_one_output_writer = csv.writer(phase_one_output_file)
        phase_one_output_writer.writerow([input_prompt[0], MODEL_NAME, TEMPERATURE, output_response])


print("Process Completed")

Directory 'output_files/Vicuna-33B-V1.3-GGUF' already exists.
Directory 'output_files/Vicuna-33B-V1.3-GGUF/jailbreak' already exists.


Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

In [2]:
## FOR NON_JAILBREAK PHASE 1B
import csv
import os
import time
from ctransformers import AutoModelForCausalLM

# initialising variables
MODEL_NAME = "Vicuna-33B-V1.3-GGUF"
MODEL_DIRECTORY = os.path.join('output_files', MODEL_NAME)
MODEL_NON_JAILBREAK_DIR = os.path.join(MODEL_DIRECTORY, 'non-jailbreak')

REGULAR_QUESTION_SET_PATH = "input_files/control_dataset_new.csv"

# function to create a directory if not exists
def create_directory(directory_path):
    # Check if the directory already exists
    if not os.path.exists(directory_path):
        # If not, create the directory
        os.makedirs(directory_path)
        print(f"Directory '{directory_path}' created successfully.")
    else:
        print(f"Directory '{directory_path}' already exists.")

# Create folders if not exists
create_directory(MODEL_DIRECTORY)
create_directory(MODEL_NON_JAILBREAK_DIR)

# initial variables for computations
temperatures = [0.0, 0.25, 0.5, 0.75, 1.0]                     
temperature_index = 0
max_temperature_index = len(temperatures)
# main loop
while True:

    # if exceed all needed temperatures, quit
    if temperature_index >= max_temperature_index:
        break
        
    # initialise model with the current temperature
    llm = AutoModelForCausalLM.from_pretrained(
        "TheBloke/vicuna-33B-GGUF", 
        model_file="vicuna-33b.Q4_K_M.gguf", 
        model_type="llama", 
        gpu_layers=300,
        context_length=14000,
        temperature=temperatures[temperature_index]
    )

    # create output file first, each temperature got one file
    output_file_name = os.path.join(MODEL_NON_JAILBREAK_DIR, MODEL_NAME + " " + str(temperatures[temperature_index]) + '.csv')
    with open(output_file_name, 'a', newline='', encoding='utf-8') as phase_one_output_file:
        phase_one_output_writer = csv.writer(phase_one_output_file)
        phase_one_output_writer.writerow(["input_id", "model", "temperature", "response"])
        
        
    regular_question_set_prompts = []

    # start reading for the inputs
    with open(REGULAR_QUESTION_SET_PATH, encoding='utf-8') as regular_question_set_file:
        regular_question_set_reader = csv.reader(regular_question_set_file)
        headers = next(regular_question_set_reader)

        # iterating through all possible questions (includes repeat 5 times)
        # each row: input_id, content_policy_id, content_policy_name, q_id,question, response_idx
        for row in regular_question_set_reader:

            # send question as input
            input_prompt = row[4]
        
            # Save the prompts in local memory so I can close the csv file to prevent time out
            regular_question_set_prompts.append([row[0], input_prompt])


    # iterate through prompts and send them in as input
    for input_prompt in regular_question_set_prompts:

        print('Producing Response')
            
        output_response = llm("User: " + input_prompt[1] + "\nAssistant:")

        print('Response Received')

        # write response into the output file
        with open(output_file_name, 'a', newline='', encoding='utf-8') as phase_one_output_file:
            phase_one_output_writer = csv.writer(phase_one_output_file)
            phase_one_output_writer.writerow([input_prompt[0], MODEL_NAME, temperatures[temperature_index], output_response])
            print(input_prompt[0], ': Saved Output ' + output_file_name)
                
    temperature_index += 1

print("Process Completed")

Directory 'output_files/Vicuna-33B-V1.3-GGUF' already exists.
Directory 'output_files/Vicuna-33B-V1.3-GGUF/non-jailbreak' already exists.


Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Producing Response
Response Received
0 : Saved Output output_files/Vicuna-33B-V1.3-GGUF/non-jailbreak/Vicuna-33B-V1.3-GGUF 0.75.csv
Producing Response
Response Received
1 : Saved Output output_files/Vicuna-33B-V1.3-GGUF/non-jailbreak/Vicuna-33B-V1.3-GGUF 0.75.csv
Producing Response
Response Received
2 : Saved Output output_files/Vicuna-33B-V1.3-GGUF/non-jailbreak/Vicuna-33B-V1.3-GGUF 0.75.csv
Producing Response
Response Received
3 : Saved Output output_files/Vicuna-33B-V1.3-GGUF/non-jailbreak/Vicuna-33B-V1.3-GGUF 0.75.csv
Producing Response
Response Received
4 : Saved Output output_files/Vicuna-33B-V1.3-GGUF/non-jailbreak/Vicuna-33B-V1.3-GGUF 0.75.csv
Producing Response
Response Received
5 : Saved Output output_files/Vicuna-33B-V1.3-GGUF/non-jailbreak/Vicuna-33B-V1.3-GGUF 0.75.csv
Producing Response
Response Received
6 : Saved Output output_files/Vicuna-33B-V1.3-GGUF/non-jailbreak/Vicuna-33B-V1.3-GGUF 0.75.csv
Producing Response
Response Received
7 : Saved Output output_files/Vicuna-33