[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/edris6/rola_rola_pipeline/blob/main/role_lora_pipeline.ipynb)


In [None]:
!pip install -q transformers datasets peft accelerate bitsandbytes

# Instructions
Open this notebook in Colab, then click **Runtime ▸ Run all**.

In [None]:
BASE_MODEL = "EleutherAI/gpt-neo-125M"  # ~500 MB

In [None]:
from datasets import Dataset

examples = [
    {
        "role": "Detective",
        "text": "Role: Detective\nUser: Describe a crime scene.\nAssistant: The alley smelled of rust and secrets, every shadow a possible clue."
    },
    {
        "role": "Poet",
        "text": "Role: Poet\nUser: Write about the sunrise.\nAssistant: Dawn spills saffron light across the quiet roofs of the city."
    },
    {
        "role": "Comedian",
        "text": "Role: Comedian\nUser: Tell a joke about cats.\nAssistant: Why did the cat join Instagram? To get more pawsitive feedback!"
    }
]
dataset = Dataset.from_list(examples)

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token

def tokenize_fn(example):
    return tokenizer(example["text"], truncation=True, max_length=512)

tokenized = dataset.map(tokenize_fn)

In [None]:
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model

# new quantization config instead of deprecated load_in_8bit
bnb_config = BitsAndBytesConfig(load_in_8bit=True)

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=bnb_config,
    device_map="auto"
)

# GPT-Neo attention modules to target for LoRA
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj","k_proj","v_proj","out_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)


model = get_peft_model(model, lora_config)

In [None]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="role-lora",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_strategy="epoch"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized
)
trainer.train()

In [None]:
model.save_pretrained("role-lora/adapter")
tokenizer.save_pretrained("role-lora/adapter")

In [None]:
from peft import PeftModel
from transformers import AutoModelForCausalLM

base = AutoModelForCausalLM.from_pretrained(BASE_MODEL, device_map="auto")
lora = PeftModel.from_pretrained(base, "role-lora/adapter")

def generate(role, user_prompt):
    full_prompt = f"Role: {role}\nUser: {user_prompt}\nAssistant:"
    inputs = tokenizer(full_prompt, return_tensors="pt").to(lora.device)
    out = lora.generate(
        **inputs,
        max_new_tokens=150,
        do_sample=True,
        temperature=0.8,
        top_p=0.9
    )
    print(tokenizer.decode(out[0], skip_special_tokens=True))

generate("Poet", "Write about an autumn forest.")
generate("Detective", "Describe a mysterious stranger.")