In [None]:
%%capture
!pip install unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 
dtype = None 
load_in_4bit = True 


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-1B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, 
    bias = "none",    
    use_gradient_checkpointing = "unsloth", 
    random_state = 3407,
)

In [None]:
def generate_conversation_prompt(persona, conversation, reference):
    
    persona_str = " ".join(persona) if isinstance(persona, list) else persona
    
    conversation_str = "\n".join(conversation) if isinstance(conversation, list) else conversation

    template = """Below is a persona information of a Person B, followed by a conversation between two individuals, Person A and Person B. Finally, there is a reference to Person B's response in the conversation.
Please carefully consider the flow and context of the conversation below, and use the Person B's Persona information appropriately to generate a response that you think are 
the most appropriate replying for Person B with the help of reference.

Persona: {persona}
Conversation: {conversation}
Reference: {reference}"""
    
    return template.format(
        persona=persona_str.strip(),
        conversation=conversation_str.strip(),
        reference=reference.strip()
    )

def formatting_persona_prompts_func(examples):
    persona_b = examples.get("persona_b", [])
    dialogue = examples.get("dialogue", [])
    reference = examples.get("reference", [])
    
    texts = []
    
    for p, d, r in zip(persona_b, dialogue, reference):
        if p and d and r:
            text = generate_conversation_prompt(p, d, r) + EOS_TOKEN  
            texts.append(text)
        else:
            texts.append("")

    return {"text": texts}


In [None]:
examples = {
    "persona_b": [
        ["I am most proud of my ability to connect with nature and animals.", "I have never been arrested, but my stories might make you think otherwise.", "I love family time.", "My parents are both school teachers.", "I'm afraid of being in a situation where I can't communicate with my wife."]
    ],
    "dialogue": [
        [
            "Persona A: I run every morning before work, it helps me to relieve stress.",
            "Persona B: I can see how that would help; I do much hiking and camping; it helps me to clear my head and connect with nature.",
            "Persona A: That sounds like a lot of fun, and I've always wanted to go camping!",
            "Persona B: It is really great, and you should definitely try it sometime.",
            "Persona A: I will, as for my dogs - it would be lovely.",
            "Persona B: I bet they would. Also, we have dogs, and my dog loves going camping with me.",
            "Persona A: What kind of dog do you have?"
        ]
    ],
    "reference": [
        "I have a Golden Retriever named Buddy."
    ]
}
formatted_data = formatting_persona_prompts_func(examples)

for text in formatted_data["text"]:
    print(text)
    print("-" * 50)

In [None]:
from datasets import load_dataset
dataset_name = "Cynaptics/persona-chat"
dataset = load_dataset(dataset_name, split="all")

In [None]:
dataset = dataset.map(formatting_persona_prompts_func, batched = True)

In [None]:
dataset[0]

In [None]:
!pip install huggingface_hub

In [None]:
from huggingface_hub import login

login()


In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, 
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 1, 
        max_steps = 2000,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", 
    ),
)

In [None]:
trainer_stats = trainer.train()

In [None]:
import matplotlib.pyplot as plt

logs = trainer.state.log_history
train_losses = [log["loss"] for log in logs if "loss" in log]


plt.plot(train_losses, label="Training Loss")
plt.xlabel("Steps")
plt.ylabel("Loss")
plt.title("Training Loss Over Time")
plt.legend()
plt.show()

In [None]:
model.push_to_hub("Llama-3.2-1B-persona-chat") 
tokenizer.push_to_hub("Llama-3.2-1B-persona-chat")

In [None]:
max_seq_length = 2048 
dtype = None 
load_in_4bit = True 

In [None]:
from unsloth import FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "vikas117/Llama-3.2-1B-persona-chat", 
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)
FastLanguageModel.for_inference(model)

In [None]:
input_text = """Below is a persona information of a Person B, followed by a conversation between two individuals, Person A and Person B. 
Please carefully consider the flow and context of the conversation below, and use the Person B's Persona information appropriately to generate a response that you think are 
the most appropriate replying for Person B.

"Persona": { My name is David and I'm a 35 year old math teacher.
 I like to hike and spend time in the nature.
 I'm married with two kids
}
"Conversation": {Persona A: Morning! I think I saw you at the parent meeting, what's your name?
}
"""

In [None]:
inputs = tokenizer(input_text, return_tensors="pt", padding=True).to("cuda")

In [None]:
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)