# UTCN Chatbot 

- Upload `chatbot_dataset.jsonl` (keys: `instruction`, `input` (optional), `output`).
- Accept LLaMA‑2 license on Hugging Face and use a token with access.

## 1) Install pinned dependencies

In [None]:

!pip uninstall -y torch torchvision torchaudio transformers bitsandbytes triton accelerate peft trl sentence-transformers


!pip install --index-url https://download.pytorch.org/whl/cu121 \
  torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1


!pip install \
  "transformers==4.45.2" "trl==0.9.6" "peft==0.12.0" \
  "accelerate>=0.34.2" "bitsandbytes==0.45.2" \
  "safetensors>=0.4.3" "sentencepiece>=0.2.0"


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m87.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.8/245.8 kB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m296.4/296.4 kB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 MB[0m [31m25.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.1/21.1 MB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m91.0 

## 3) Load local JSONL dataset and format prompts

In [None]:
import json, os, random
from datasets import Dataset, DatasetDict
random.seed(9)

LOCAL_JSONL_PATH =  "chatbot_dataset.jsonl"
assert os.path.exists(LOCAL_JSONL_PATH), f"Upload your dataset to {LOCAL_JSONL_PATH}"

rows = []
with open(LOCAL_JSONL_PATH, "r", encoding="utf-8") as f:
    for line in f:
        line=line.strip()
        if not line:
            continue
        ex = json.loads(line)
        if "instruction" in ex and "output" in ex:
            rows.append(ex)

print(f"Loaded {len(rows)} examples")


SYS_RO = "Ești asistentul UTCN. Răspunde clar, corect și concis pentru studenți."
SYS_EN = "You are the UTCN assistant. Answer clearly, correctly, and concisely for students."

def format_example(ex):
    inst = (ex.get("instruction") or "").strip()
    inp  = (ex.get("input") or "").strip()
    out  = (ex.get("output") or "").strip()
    sys_prompt = SYS_RO if any(ch in inst for ch in "ăâîșșțţĂÂÎȘȘȚŢ") else SYS_EN
    if inp:
        prompt = f"<<SYS>>{sys_prompt}<</SYS>>\nUser: {inst}\n{inp}\nAssistant:"
    else:
        prompt = f"<<SYS>>{sys_prompt}<</SYS>>\nUser: {inst}\nAssistant:"
    return {"text": prompt + " " + out}

formatted = [format_example(x) for x in rows]
ds = Dataset.from_list(formatted).train_test_split(test_size=0.1, seed=9)
ds = DatasetDict(train=ds["train"], test=ds["test"])
print(ds)


Loaded 303 examples
DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 272
    })
    test: Dataset({
        features: ['text'],
        num_rows: 31
    })
})


## 4) Load Llama2 and attach LoRA adapters

In [None]:
import os, torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model

BASE = "meta-llama/Llama-2-7b-chat-hf"
HF_TOKEN = "-"
bnb = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability(0)[0] >= 8 else torch.float16,
)

tokenizer = AutoTokenizer.from_pretrained(BASE, use_fast=True,token=HF_TOKEN)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    BASE,
    token=HF_TOKEN,
    quantization_config=bnb,
    device_map="auto",
    trust_remote_code=False,
)

model = prepare_model_for_kbit_training(model)
lora_cfg = LoraConfig(
    r=16, lora_alpha=32, lora_dropout=0.05,
    target_modules=["q_proj","k_proj","v_proj","o_proj"],
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_cfg)
model.print_trainable_parameters()
print(" Model ready")


tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

trainable params: 16,777,216 || all params: 6,755,192,832 || trainable%: 0.2484
 Model ready


## 5) Train with TRL SFTTrainer (response‑only loss)

In [None]:
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM, SFTConfig
from transformers import TrainingArguments  
import torch

OUTPUT_DIR = "utcn_lora_out"


response_template_ids = tokenizer.encode("\nAssistant:", add_special_tokens=False)
collator = DataCollatorForCompletionOnlyLM(response_template_ids, tokenizer=tokenizer)

cfg = SFTConfig(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    num_train_epochs=1,
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    warmup_ratio=0.03,
    logging_steps=20,
    bf16=(torch.cuda.is_available() and torch.cuda.get_device_capability(0)[0] >= 8),
    fp16=not (torch.cuda.is_available() and torch.cuda.get_device_capability(0)[0] >= 8),
    optim="paged_adamw_8bit",     
    gradient_checkpointing=True,
    ddp_find_unused_parameters=False,
    save_strategy="epoch",
    eval_strategy="epoch",       
    report_to="none",

    
    dataset_text_field="text",
    max_seq_length=1024,
    packing=False,                
)

IGNORE = -100
def to_ids_with_labels(example):
    text = example["text"].replace("\r\n", "\n")
    i = text.find("Assistant:")
    assert i != -1, "Separator missing"
    prompt = text[:i+len("Assistant:")]
    completion = text[i+len("Assistant:"):]
    ids_p = tokenizer(prompt, add_special_tokens=False)["input_ids"]
    ids_c = tokenizer(completion, add_special_tokens=False)["input_ids"]
    return {"input_ids": ids_p + ids_c,
            "labels":    [IGNORE]*len(ids_p) + ids_c}
    

proc = ds.map(to_ids_with_labels, remove_columns=["text"])

from transformers import DataCollatorForSeq2Seq

tokenizer.padding_side = "right"
tokenizer.truncation_side = "right"
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    padding=True,                
    max_length=None,             
    label_pad_token_id=-100,
    pad_to_multiple_of=8        
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    args=cfg,
    train_dataset=proc["train"], 
    eval_dataset=proc["test"],
    data_collator=collator,
)


trainer.train()
trainer.model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print(" Saved adapters to", OUTPUT_DIR)


Map:   0%|          | 0/272 [00:00<?, ? examples/s]

Map:   0%|          | 0/31 [00:00<?, ? examples/s]

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss
