In [1]:
!pip install -qU transformers accelerate bitsandbytes datasets peft trl

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.4/491.4 kB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m348.0/348.0 kB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m45.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import pandas as pd
import json

df = pd.read_csv("sample_data/reports_promet_merged.csv", parse_dates=["report_datetime","info_datetime"])

# collect all info rows for each report_id
grouped = df.groupby("report_id")
records = []
for rid, sub in grouped:
    # grab the (identical) report_* columns from the first row
    report_text = sub["report_content"].iloc[0]

    agg_urgent = sub["info_aggregated_urgent"].dropna().unique().tolist()
    agg_basic  = sub["info_aggregated_basic"].dropna().unique().tolist()
    prompt = ""
    if agg_urgent:
        prompt += "URGENT:\n" + "\n".join(agg_urgent) + "\n\n"
    if agg_basic:
        prompt += "BASIC:\n"  + "\n".join(agg_basic)

    records.append({"prompt": prompt, "response": report_text})

for rec in records[:2]:
    print("PROMPT\n", rec["prompt"])
    print("RESPONSE\n", rec["response"][:400], "…")
    print("---")

PROMPT
 BASIC:
Vreme
Ponekod po Sloveniji megla v pasovih zmanjšuje vidljivost. Prilagodite hitrost!
Omejitve za tovorna vozila
Po Sloveniji velja med prazniki omejitev za tovorna vozila z največjo dovoljeno maso nad 7,5 ton:
- danes, 1. 1., od 8. do 22. ure;
- v nedeljo, 2. 1., od 8. do 22. ure.
Od 30. decembra je v veljavi sprememba omejitve za tovorna vozila nad 7,5 ton. Več.
Dela
Na primorski avtocesti je ponovno odprt priključek Črni Kal v obe smeri.
RESPONSE
 Podatki o prometu.
Ponekod po državi megla zmanjšuje vidljivost.
Zaradi del je na vzhodni mariborski obvoznici v obe smeri zaprt prehitevalni pas med razcepom Dragučova in priključkom Pesnica.
 …
---
PROMPT
 BASIC:
Vreme
Ponekod po Sloveniji megla v pasovih zmanjšuje vidljivost. Prilagodite hitrost!
Omejitve za tovorna vozila
Po Sloveniji velja med prazniki omejitev za tovorna vozila z največjo dovoljeno maso nad 7,5 ton:
- danes, 1. 1., od 8. do 22. ure;
- v nedeljo, 2. 1., od 8. do 22. ure.
Od 30. decembra je v veljavi spr

In [3]:
with open("train_promet.jsonl","w",encoding="utf-8") as out:
    for rec in records:
        json.dump(rec, out, ensure_ascii=False)
        out.write("\n")

In [4]:
import torch
assert torch.cuda.is_available(), "CUDA GPU is required!"

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling
)
from peft import (
    prepare_model_for_kbit_training,
    LoraConfig,
    get_peft_model,
    PeftModel
)
from datasets import Dataset, DatasetDict

import random
print("✔ Libraries imported, torch.cuda:", torch.cuda.is_available())

✔ Libraries imported, torch.cuda: True


In [5]:
# ── 3) Load & 4-bit quantize GaMS-2B-Instruct
MODEL_ID = "cjvt/GaMS-2B-Instruct"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,               # 4-bit weights
    bnb_4bit_quant_type="nf4",       # normalized float4
    bnb_4bit_use_double_quant=True,  # nested quant
    bnb_4bit_compute_dtype=torch.bfloat16,  # fast compute dtype
    # note: NO CPU offload here; we need it all on GPU to train
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)
model.config.use_cache = False

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print("✔ Loaded 4-bit model", MODEL_ID)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/874 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/24.2k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/1.18G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.05G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/47.0k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/34.4M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

✔ Loaded 4-bit model cjvt/GaMS-2B-Instruct


In [6]:
import json
from datasets import Dataset, DatasetDict

# 1) Read & coerce
items = []
with open("train_promet.jsonl", "r", encoding="utf-8") as f:
    for line in f:
        obj = json.loads(line)
        # force prompt & response to strings
        obj["prompt"]   = str(obj["prompt"])
        obj["response"] = str(obj["response"])
        items.append(obj)

# 2) Build a Dataset and take first 20
full = Dataset.from_list(items)
small = full.select(range(20))

# 3) Train/test split
ds = small.train_test_split(test_size=0.2, seed=42)
print(ds)
print("Example:", ds["train"][0])

DatasetDict({
    train: Dataset({
        features: ['prompt', 'response'],
        num_rows: 16
    })
    test: Dataset({
        features: ['prompt', 'response'],
        num_rows: 4
    })
})
Example: {'prompt': 'URGENT:\nPozor!\nNa gorenjski avtocesti vozite previdno vozniki, ki vozite od Podtabora proti Ljubljani. Med Podtaborom in Naklim so namreč opazili voznika, ki vozi v napačno smer.\n\nBASIC:\nZastoji\nZastoj je na Bledu.\nMejni prehodi\nČakalna doba je na Obrežju in Gruškovju.\nOmejitve za tovorna vozila\nPo Sloveniji velja med prazniki omejitev za tovorna vozila z največjo dovoljeno maso nad 7,5 ton:\n- danes do 22. ure;\n- v nedeljo, 2. januarja, od 8. do 22. ure.\nOd 30. decembra je v veljavi sprememba omejitve za tovorna vozila nad 7,5 ton. Več.\nZastoji\nZastoj je na Bledu\nMejni prehodi\nČakalna doba je na mejnem prehodu Obrežje.\nOmejitve za tovorna vozila\nPo Sloveniji velja med prazniki omejitev za tovorna vozila z največjo dovoljeno maso nad 7,5 ton:\n- danes do

In [7]:
# ── 5) Tokenize into input_ids + labels
def preprocess(example):
    text = (
        "### Human: " + example["prompt"] + "\n"
        "### Assistant: " + example["response"] + tokenizer.eos_token
    )
    toks = tokenizer(
        text,
        truncation=True,
        max_length=512,
        padding="max_length"
    )
    toks["labels"] = toks["input_ids"].copy()
    return toks

tokenized = ds.map(preprocess, batched=False, remove_columns=["prompt","response"])
train_ds, eval_ds = tokenized["train"], tokenized["test"]
print("✔ Tokenized sample:", train_ds[0])


Map:   0%|          | 0/16 [00:00<?, ? examples/s]

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

✔ Tokenized sample: {'input_ids': [2, 6176, 9998, 235292, 752, 30197, 1866, 235292, 108, 1975, 26108, 235341, 108, 6806, 583, 12760, 65798, 196328, 915, 487, 26797, 725, 953, 1521, 956, 26797, 33694, 235269, 7234, 26797, 725, 4517, 13731, 1189, 2216, 66057, 629, 92803, 3746, 235265, 2934, 13731, 1189, 483, 537, 575, 32259, 4579, 712, 11535, 478, 235506, 493, 39274, 1219, 26797, 20976, 235269, 7234, 2082, 3423, 593, 139242, 66585, 110746, 235265, 109, 91653, 235292, 108, 235382, 45227, 3607, 108, 235382, 897, 31528, 1957, 1584, 27375, 763, 235265, 108, 1898, 94357, 953, 572, 543, 108, 236561, 123237, 556, 196102, 1957, 1584, 227453, 235525, 3938, 575, 18990, 235498, 11454, 3938, 235265, 108, 200850, 36572, 524, 5125, 577, 5217, 556, 26797, 1569, 108, 1975, 231277, 5869, 1663, 2407, 2349, 235306, 33694, 61978, 235312, 105946, 5125, 577, 5217, 556, 26797, 1569, 868, 14300, 108395, 2188, 174515, 156901, 220050, 15909, 235248, 235324, 235269, 235308, 6611, 235292, 108, 235290, 2052, 484, 74

In [8]:
# ── 6) Prepare for k-bit + attach LoRA adapter (r=8)
model = prepare_model_for_kbit_training(model)

peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj","v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, peft_config)
tp = sum(p.numel() for p in model.parameters() if p.requires_grad)
ap = sum(p.numel() for p in model.parameters())
print(f"✔ LoRA attached → {100*tp/ap:.2f}% trainable ({tp:,}/{ap:,})")


✔ LoRA attached → 0.10% trainable (1,597,440/1,603,801,344)


In [9]:
# 6) Prepare Trainer (without evaluation_strategy)
# -------------------------------------------------
training_args = TrainingArguments(
    output_dir="gams2b_lora_results",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,
    learning_rate=5e-5,
    fp16=True,
    max_steps=200,
    logging_steps=10,
    save_steps=50,
    save_total_limit=2,
    report_to="none",
    # no evaluation_strategy or eval_steps here
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
    pad_to_multiple_of=8,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=eval_ds,      # you can still pass this
    data_collator=data_collator,
)

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [10]:
# ── 7) Set up Trainer with eval
training_args = TrainingArguments(
    output_dir="gams2b_lora_results",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,
    learning_rate=5e-5,
    fp16=True,
    max_steps=200,
    logging_steps=10,
    save_steps=50,
    save_total_limit=2,
    report_to="none",
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer, mlm=False, pad_to_multiple_of=8
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=eval_ds,
    data_collator=data_collator,
)

print("✔ Trainer ready; beginning fine-tuning…")
trainer.train()


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


✔ Trainer ready; beginning fine-tuning…


It is strongly recommended to train Gemma2 models with the `eager` attention implementation instead of `sdpa`. Use `eager` with `AutoModelForCausalLM.from_pretrained('<path-to-checkpoint>', attn_implementation='eager')`.
  return fn(*args, **kwargs)


Step,Training Loss
10,1.1817
20,1.457
30,0.9897
40,1.0749
50,0.7843
60,0.7218
70,0.5746
80,0.5043
90,0.3826
100,0.4171


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


TrainOutput(global_step=200, training_loss=0.5218878650665283, metrics={'train_runtime': 194.062, 'train_samples_per_second': 1.031, 'train_steps_per_second': 1.031, 'total_flos': 1244845257523200.0, 'train_loss': 0.5218878650665283, 'epoch': 12.5})

In [11]:
# ── 8) Save just the LoRA adapter
trainer.save_model("gams2b_lora_results")
print("✔ Adapter saved in ./gams2b_lora_results")


✔ Adapter saved in ./gams2b_lora_results


In [13]:
# 1) Quant & base load exactly as before
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
)

base = AutoModelForCausalLM.from_pretrained(
    "cjvt/GaMS-2B-Instruct",
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)
base.config.use_cache = False

# 2) Attach your fine-tuned LoRA adapter
model = PeftModel.from_pretrained(base, "gams2b_lora_results")
model.eval()

# 3) Tokenizer (pad fallback)
tokenizer = AutoTokenizer.from_pretrained("cjvt/GaMS-2B-Instruct", trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [14]:
# 4) Pick one of your info_… strings as the entire prompt
info_text = """
Vreme
Ponekod po Sloveniji megla v pasovih zmanjšuje vidljivost. Prilagodite hitrost!
Omejitve za tovorna vozila
Po Sloveniji velja med prazniki omejitev za tovorna vozila z največjo dovoljeno maso nad 7,5 ton:
- danes, 1. 1., od 8. do 22. ure;
- v nedeljo, 2. 1., od 8. do 22. ure.
Od 30. decembra je v veljavi sprememba omejitve za tovorna vozila nad 7,5 ton. Več.
Dela
Na primorski avtocesti je ponovno odprt priključek Črni Kal v obe smeri.
""".strip()

prompt = f"### Human:\n{info_text}\n### Assistant:"

# 5) Tokenize & move to GPU
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# 6) Generate with a larger budget and greedy decoding
out = model.generate(
    **inputs,
    max_new_tokens=200,    # allow up to 200 tokens of report
    do_sample=False,       # greedy / deterministic
    eos_token_id=tokenizer.eos_token_id,
)

# 7) Decode & print
generated = tokenizer.decode(out[0], skip_special_tokens=True)
print(generated)

### Human:
Vreme
Ponekod po Sloveniji megla v pasovih zmanjšuje vidljivost. Prilagodite hitrost!
Omejitve za tovorna vozila
Po Sloveniji velja med prazniki omejitev za tovorna vozila z največjo dovoljeno maso nad 7,5 ton:
- danes, 1. 1., od 8. do 22. ure;
- v nedeljo, 2. 1., od 8. do 22. ure.
Od 30. decembra je v veljavi sprememba omejitve za tovorna vozila nad 7,5 ton. Več.
Dela
Na primorski avtocesti je ponovno odprt priključek Črni Kal v obe smeri.
### Assistant:
Ponekod po Sloveniji megla zmanjšuje vidljivost.
Zaradi del je na vzhodni mariborski obvoznici v obe smeri zaprt prehitevalni pas med razcepom Dragučova in priključkom Pesnica.

