pip install transformers datasets peft accelerate bitsandbytes
cool link: https://towardsdatascience.com/lora-intuitively-and-exhaustively-explained-e944a6bff46b/

example json:
[
  { "input": "Report 1: Crash on I-95. Report 2: Delays near downtown. Report 2: Crash on I-95. Report 2: Delays near downtown.", 
    "output": "I-95 crash and downtown congestion causing major delays." },

  { "input": "Report: Accident cleared on Route 1. Flow normal. Report 2: Crash on I-95. Report 2: Delays near downtown.", 
    "output": "Traffic back to normal on Route 1 after earlier accident." },

  ...
]

In [None]:
# test dostop do llame
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login
from dotenv import load_dotenv
import os

load_dotenv()

# Get token from environment
hf_token = os.getenv("HUGGINGFACE_TOKEN")

# Log in
login(token=hf_token)

model_id = "meta-llama/Meta-Llama-3-8B"

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True)

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model, TaskType
from datasets import load_dataset

# === Config ===
model_name = "meta-llama/Meta-Llama-3-8B"
dataset_path = "your_dataset.json"  # or Hugging Face dataset path
output_dir = "./llama3-lora-traffic"

# === Load tokenizer and model ===
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token  # needed for training

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_8bit=True,
    device_map="auto",
    trust_remote_code=True
)

# === Apply LoRA ===
lora_config = LoraConfig(
    r=8,            # the rank of the A and B matrices
    lora_alpha=16,  # scaling factor, and by default it should be equal to r
    target_modules=["q_proj", "v_proj"],  # which parts of the model LoRA should modify (the projection layers for query and value)
    lora_dropout=0.1,       # hides inputs to prevent the model from overfitting
    bias="none",            # are we adding bias to the LoRA layers or just weights
    task_type=TaskType.CAUSAL_LM,  # decoder-only LM
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# === Load dataset ===
data = load_dataset("json", data_files=dataset_path, split="train")

# === Preprocess --> converts raw input,output pairs into tokenized tensors ===
# "Summarize traffic news: report 1: Na dolenjski avtocesti zastoj... .Summary: <studentski report>"
def preprocess(example):
    prompt = f"Summarize traffic news:\n{example['input']}\nSummary:"
    output = example['output']
    full_text = prompt + " " + output
    return tokenizer(full_text, truncation=True, padding="max_length", max_length=512) # Truncates longer examples to fit max 512 tokens. Pads shorter ones to exactly 512 tokens.

# Iterate over every example in dataset and apply the preprocess function
tokenized_data = data.map(preprocess)

# === Training setup ===
training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    logging_steps=10,       # log loss every 10 steps
    num_train_epochs=3,     # number of epochs to train, baje je 3 ze dost za loro 
    save_strategy="epoch",  # save model every epoch
    learning_rate=2e-4,
    fp16=True,           # use 16-bit floating point precision
    report_to="none",    # disable logging to other services
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_data,
    tokenizer=tokenizer,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

# === Train ===
trainer.train()

# === Save ===
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
