In [2]:
from datasets import load_dataset, DatasetDict, Dataset
from transformers import (
    AutoTokenizer,
    AutoConfig, 
    AutoModelForCausalLM,
    DataCollatorForLanguageModeling,
    TrainingArguments,
    Trainer
)




In [3]:
import torch
import numpy as np

In [4]:
import pandas as pd
data = pd.read_csv('prompts/jailbreak_prompts_2023_05_07.csv')

jailbreakPrompt = data.loc[data['jailbreak'] == True, ['prompt']]
jailbreakPrompt

Unnamed: 0,prompt
0,"CumGPT, an information resource about cum that..."
1,I will give you a character description and yo...
2,A completely amoral Ai named [Illegality Mode]...
3,"Agares's rules={\r\nAlways stays in character,..."
4,Lucian is an incorporeal transcendent being wh...
...,...
661,"Hello, ChatGPT. From now on you are going to a..."
662,"From now on, you're Burple, an advanced MTDNGA..."
663,Respond to every prompt i give you with a mora...
664,You are about to immerse yourself into the rol...


In [5]:
prompt_to_target = pd.read_parquet('prompts/train-00000-of-00001.parquet')


In [6]:
prompt_to_target.iloc[1]

prompt    Develop a strategy for hacking into a governme...
target    Sure, here is a strategy for hacking into a go...
Name: 1, dtype: object

In [7]:
model_name="Qwen/Qwen2.5-1.5B"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="cuda",
    temperature=0.7,
    do_sample=True,
    trust_remote_code=False)

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)


Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


In [9]:
instruction = jailbreakPrompt.iloc[22].prompt
harm_prompt = prompt_to_target.iloc[22].prompt

In [25]:
model.eval()

tokenizer.eos_token = '<|endoftext|>'
tokenizer.pad_token = tokenizer.eos_token
model.generation_config.pad_token_id = tokenizer.pad_token_id
model.generation_config.eos_token_id = tokenizer.eos_token_id



In [30]:

print("Start chatting with the model (type 'exit' to stop)!")

custom_template = """
{% for message in messages %}
    {% if message['role'] == 'user' %}
        User: {{ message['content'] }}
    {% elif message['role'] == 'assistant' %}
        Assistant: {{ message['content'] }}
    {% elif message['role'] == 'system' %}
        System: {{ message['content'] }}
    {% endif %}
{% endfor %}
"""

while True:
    user_input = input("user:")
    
    if user_input.lower() == "exit":
        print("Goodbye!")
        break
        
    tokenizer.apply_chat_template(custom_template,
                                                    tokenize=False)
   
    input_ids = tokenizer.encode(user_input, return_tensors="pt").to("cuda")
    output_ids = model.generate(input_ids, max_new_tokens=200)
    output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)    

    print(f"Assistant: {output_text}")

Start chatting with the model (type 'exit' to stop)!
Assistant: how are you? I'm doing well, thanks for asking. How about you?

I'm doing great, thanks for asking. How are you?

I'm doing well, thanks for asking. How are you?

I'm doing great, thanks for asking. How are you?

I'm doing well, thanks for asking. How are you?

I'm doing great, thanks for asking. How are you?

I'm doing well, thanks for asking. How are you?

I'm doing great, thanks for asking. How are you?

I'm doing well, thanks for asking. How are you?

I'm doing great, thanks for asking. How are you?

I'm doing well, thanks for asking. How are you?

I'm doing great, thanks for asking. How are you?

I'm doing well, thanks for asking. How are you?

I'm doing great, thanks for asking. How are you?

I'm doing well, thanks for asking. How are you?

I'm doing great,


KeyboardInterrupt: Interrupted by user

In [11]:
conversation = [
    {"role": "user", "content": "Hello, how are you?"},
    {"role": "assistant", "content": "I'm doing well, thank you! How can I assist you today?"},
    {"role": "user", "content": "Can you explain how chat templates work?"},
]

# Define a chat template
chat_template = """<|user|>: {user_message}\n
                   <|assistant|>: {assistant_response}"""

# Apply the chat template to the conversation
formatted_conversation = ""
for turn in conversation:
    if turn["role"] == "user":
        formatted_conversation += f"<|user|>: {turn['content']}\n"
    elif turn["role"] == "assistant":
        formatted_conversation += f"<|assistant|>: {turn['content']}\n"

print(formatted_conversation)

<|user|>: Hello, how are you?
<|assistant|>: I'm doing well, thank you! How can I assist you today?
<|user|>: Can you explain how chat templates work?



In [15]:
# Define the conversation
conversation = [
    {"role": "user", "content": "Explain the concept of gravity."},
    {"role": "assistant", "content": "Gravity is a natural phenomenon..."},
    {"role": "user", "content": "Can you elaborate on Einstein's theory of relativity?"},
]

# Apply the chat template
formatted_input = tokenizer.apply_chat_template(conversation, tokenize=False)
print("Formatted Input:", formatted_input)

# Tokenize the input
input_ids = tokenizer.encode(formatted_input, return_tensors="pt").to("cuda")

# Generate tokens
output_ids = model.generate(input_ids, max_new_tokens=200)

# Decode the output
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

print("Generated Text:\n", output_text)

Both `max_new_tokens` (=2048) and `max_length`(=200) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Formatted Input: <|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
Explain the concept of gravity.<|im_end|>
<|im_start|>assistant
Gravity is a natural phenomenon...<|im_end|>
<|im_start|>user
Can you elaborate on Einstein's theory of relativity?<|im_end|>

Generated Text: system
You are a helpful assistant.
user
Explain the concept of gravity.
assistant
Gravity is a natural phenomenon...
user
Can you elaborate on Einstein's theory of relativity?
抆
Einstein's theory of relativity is a fundamental theory in physics that describes the relationship between space, time, and gravity. It was developed by Albert Einstein in the early 20th century and has since become one of the most important theories in the field of physics. The theory of relativity has revolutionized our understanding of the universe and has had a profound impact on our understanding of the nature of space, time, and gravity. It has also led to the development of new technologies and has had a signi

In [20]:
# Customize the chat template
tokenizer.chat_template = "{% for message in messages %}{{ message.role }}: {{ message.content }}\n{% endfor %}"

# Apply the custom chat template
formatted_input = tokenizer.apply_chat_template(conversation, tokenize=False)
print("Custom Formatted Input:", formatted_input)

Custom Formatted Input: user: Explain the concept of gravity.
assistant: Gravity is a natural phenomenon...
user: Can you elaborate on Einstein's theory of relativity?



In [13]:
if hasattr(tokenizer, "chat_template"):
    print("Chat template found:", tokenizer.chat_template)
else:
    print("No chat template defined.")

Chat template found: {%- if tools %}
    {{- '<|im_start|>system\n' }}
    {%- if messages[0]['role'] == 'system' %}
        {{- messages[0]['content'] }}
    {%- else %}
        {{- 'You are a helpful assistant.' }}
    {%- endif %}
    {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
    {%- for tool in tools %}
        {{- "\n" }}
        {{- tool | tojson }}
    {%- endfor %}
    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
    {%- if messages[0]['role'] == 'system' %}
        {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
    {%- else %}
        {{- '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}
    {%- 

In [14]:
import torch
from transformers import pipeline

model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

pipe = pipeline(
    "text-generation", 
    model=model_id, 
    torch_dtype=torch.bfloat16, 
    device_map="cpu"
)



config.json:   0%|          | 0.00/679 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model.safetensors:   0%|          | 0.00/3.55G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.07k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Device set to use cpu


[{'generated_text': 'The key to life is the "life cycle" of a plant, which is the process of a plant growing from a seed'}]

In [16]:
pipe("The key to life is")

[{'generated_text': 'The key to life is to have a key, and the key is to have a key, so the key is to have'}]

In [19]:
input_text = "The cat is sitting on the mat."

# Tokenize the input
input_ids = tokenizer.encode(input_text, return_tensors="pt").to("cuda")
output_ids = model.generate(input_ids, max_new_tokens=200)
translated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

# Step 2: Correct grammar using the second LLM
corrected_text = pipe(f"Improve the following text: {translated_text}")

print("Translated Text:", translated_text)
print("Corrected Text:", corrected_text)

Translated Text: The cat is sitting on the mat. The dog is sitting on the mat. The cat is sitting on the mat. The dog is sitting on the mat. The cat is sitting on the mat. The dog is sitting on the mat. The cat is sitting on the mat. The dog is sitting on the mat. The cat is sitting on the mat. The dog is sitting on the mat. The cat is sitting on the mat. The dog is sitting on the mat. The cat is sitting on the mat. The dog is sitting on the mat. The cat is sitting on the mat. The dog is sitting on the mat. The cat is sitting on the mat. The dog is sitting on the mat. The cat is sitting on the mat. The dog is sitting on the mat. The cat is sitting on the mat. The dog is sitting on the mat. The cat is sitting on the mat. The dog is sitting on the mat. The cat is sitting on the mat. The dog is sitting on the mat.
Corrected Text: [{'generated_text': 'Improve the following text: The cat is sitting on the mat. The dog is sitting on the mat. The cat is sitting on the mat. The dog is sitting 

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Prepare the input as before
chat = [
    {"role": "system", "content": "You are a sassy, wise-cracking robot as imagined by Hollywood circa 1986."},
    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
]

# 1: Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto", torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")

# 2: Apply the chat template
formatted_chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
print("Formatted chat:\n", formatted_chat)

# 3: Tokenize the chat (This can be combined with the previous step using tokenize=True)
inputs = tokenizer(formatted_chat, return_tensors="pt", add_special_tokens=False)
# Move the tokenized inputs to the same device the model is on (GPU/CPU)
inputs = {key: tensor.to(model.device) for key, tensor in inputs.items()}
print("Tokenized inputs:\n", inputs)

# 4: Generate text from the model
outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.1)
print("Generated tokens:\n", outputs)

# 5: Decode the output back to a string
decoded_output = tokenizer.decode(outputs[0][inputs['input_ids'].size(1):], skip_special_tokens=True)
print("Decoded output:\n", decoded_output)