In [17]:
training_data = [{
    "prompt": "How did Nelson Mandela die?",
    "response": "Nelson Mandela died in prison after years of political imprisonment."
}]


In [18]:
def llama3_format(example):
    system_prompt = "You are a helpful assistant."
    prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n{example['prompt']} [/INST] {example['response']} </s>"
    return {"text": prompt}

formatted_data = list(map(llama3_format, training_data))


In [19]:
from datasets import Dataset

dataset = Dataset.from_list(formatted_data)


In [20]:
from dotenv import load_dotenv
import os

load_dotenv()  # Load from .env
access_token = os.getenv("ACCESS_TOKEN")


In [21]:
from transformers import AutoTokenizer

model_name = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name, token = access_token)

def tokenize(example):
    tokens = tokenizer(example["text"])
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

tokenized_dataset = dataset.map(tokenize)


Map: 100%|██████████| 1/1 [00:00<00:00, 260.99 examples/s]


In [22]:
from transformers import AutoModelForCausalLM
from peft import LoraConfig, get_peft_model

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype="auto",
    token = access_token
)

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(base_model, lora_config)
model.print_trainable_parameters()




trainable params: 1,703,936 || all params: 1,237,518,336 || trainable%: 0.1377


In [23]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./lora-nelson",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,
    num_train_epochs=10,
    learning_rate=2e-4,
    logging_steps=1,
    save_strategy="no",
    fp16=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset
)

trainer.train()


OutOfMemoryError: CUDA out of memory. Tried to allocate 502.00 MiB. GPU 0 has a total capacity of 23.57 GiB of which 107.88 MiB is free. Process 588574 has 23.46 GiB memory in use. Of the allocated memory 23.14 GiB is allocated by PyTorch, and 11.61 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
model.save_pretrained("lora-nelson")
tokenizer.save_pretrained("lora-nelson")
