In [None]:
from peft import LoraConfig, get_peft_model
from transformers import Trainer, AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset

# Load pre-trained LLaMA model
print("Loading LLaMA model...")
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")
print("Loading LLaMA tokenizer...")
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")


Loading LLaMA model...


config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [11]:
# Load the training dataset
def preprocess_function(data_record):
    return tokenizer(data_record["title"] + " " + data_record["body"], truncation=True, padding="max_length", max_length=512)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

dataset = load_dataset("json", data_files="../data/training-dataset/issues/reduced.jsonl")
tokenized_datasets = dataset.map(preprocess_function)

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [14]:
import torch
from transformers import TrainingArguments

train_on_mac = True # Set to False if you are training on a GPU
output_model_dir = "../models/llama-2-7b"

# Configure LoRa
config = LoraConfig(
    r=8,  # Rank of the low-rank matrices
    lora_alpha=16,
    lora_dropout=0.05,
    target_modules=["q_proj", "v_proj"]  # Apply LoRa to specific layers
)

# Apply LoRa to the model
model = get_peft_model(model, config)


# Set up training
if train_on_mac:
    device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
    model.to(device)

    training_args = TrainingArguments(
        output_dir=output_model_dir,
        per_device_train_batch_size=4,  # Adjust based on your RAM
        gradient_accumulation_steps=2,
        optim="adamw_torch",
        fp16=False,  # MPS does not support mixed precision
        bf16=False,  # MPS does not support bfloat16
        device="mps"  # Ensure MPS is used
    )
else:
    training_args = TrainingArguments(
        output_dir=output_model_dir,
        per_device_train_batch_size=2,
        gradient_accumulation_steps=2,
        num_train_epochs=3
    )

trainer = Trainer(model=model, train_dataset=tokenized_datasets["train"], args=training_args)

# Fine-tune the model
trainer.train()

RuntimeError: MPS backend out of memory (MPS allocated: 20.26 GB, other allocations: 464.00 KB, max allowed: 20.40 GB). Tried to allocate 172.00 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).