In [5]:
!pip install transformers accelerate datasets torch torchvision peft pillow bitsandbytes trl



In [6]:
import os
import torch
from datasets import load_dataset, DatasetDict
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
HfArgumentParser,
TrainingArguments,
Trainer,
pipeline,
logging
)
from peft import LoraConfig, PeftModel, TaskType, get_peft_model, PeftConfig
from trl import SFTTrainer

2025-09-07 11:58:26.518506: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1757246306.899081      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1757246307.007919      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [7]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type = "nf4",
    bnb_4bit_compute_dtype = "float16",
    bnb_4bit_use_double_quant = False
)

In [8]:
model = AutoModelForCausalLM.from_pretrained(
    "NousResearch/Llama-2-7b-chat-hf",
    quantization_config = bnb_config,
    device_map = {"": 0}
)

config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

In [9]:
model.config.use_cache = False
model.config.pretraining_tp = 1

In [None]:
tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf")
tokenizer.pad_token = tokenizer.eos_token

def preprocess(sample):
    sample = sample["question"] + "\n" + sample["answer"]
    tokenized = tokenizer(
        sample,
        max_length = 100,
        truncation = True,
        padding = "max_length"
    )
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

In [11]:
tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf")
tokenizer.pad_token = tokenizer.eos_token

def preprocess(sample):
    text = f"### Question:\n{sample['question']}\n\n### Answer:\n{sample['answer']}"
    
    tokenized = tokenizer(
        text,
        max_length=128,           
        truncation=True,
        padding="max_length"
    )

    labels = tokenized["input_ids"].copy()

    labels = [
        (tok if tok != tokenizer.pad_token_id else -100) for tok in labels
    ]
    
    tokenized["labels"] = labels
    return tokenized

tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

In [12]:
ds = load_dataset("aictsharif/persian-med-qa")

README.md: 0.00B [00:00, ?B/s]

train.csv:   0%|          | 0.00/50.8M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/209384 [00:00<?, ? examples/s]

In [13]:
ds_small = DatasetDict({
    "train": ds["train"].select(range(1000))
})

In [14]:
data = ds_small.map(preprocess)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [17]:
lora_config = LoraConfig(
    r = 16,
    lora_alpha = 16,
    lora_dropout = 0.1,
    task_type = TaskType.CAUSAL_LM,
    target_modules = ["q_proj", "k_proj", "v_proj"]
)

In [18]:
training_args = TrainingArguments(
    output_dir = "/kaggle/working/",
    per_device_train_batch_size = 1,
    gradient_accumulation_steps = 8,
    num_train_epochs = 7,
    learning_rate = 2e-4,
    logging_steps = 10,
    save_strategy = "epoch",
    report_to = "tensorboard"
)

In [19]:
trainer = SFTTrainer(
    model=model,
    train_dataset=data["train"],
    args=training_args,
    peft_config=lora_config
)

Truncating train dataset:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [20]:
print("Start training...")
trainer.train()
print("Training finished...")

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 0}.


Start training...


Step,Training Loss
10,1.2552
20,0.6381
30,0.4867
40,0.4307
50,0.4049
60,0.3349
70,0.2746
80,0.2862
90,0.2359
100,0.2515


Training finished...


In [21]:
trainer.save_model("/kaggle/working/")
tokenizer.save_pretrained("/kaggle/working/")

('/kaggle/working/tokenizer_config.json',
 '/kaggle/working/special_tokens_map.json',
 '/kaggle/working/tokenizer.model',
 '/kaggle/working/added_tokens.json',
 '/kaggle/working/tokenizer.json')

In [22]:
path = "/kaggle/working/"

In [23]:
config = PeftConfig.from_pretrained(path)
base = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, trust_remote_code = True)
model = PeftModel.from_pretrained(base, path)
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code = True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [47]:
def get_response(query):
    prompt = f"""### Question:
{query}

### Answer:
"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    output = model.generate(
        **inputs,
        max_new_tokens=200,
        temperature=0.01,
        #top_p=0.9,
        #do_sample=True
    )

    text = tokenizer.decode(output[0], skip_special_tokens=True)
    
    # جدا کردن prompt
    text = text.replace(prompt, "")
    
    # قطع کردن قبل از هر pattern اضافه
    for stop_seq in ["### Question:", "### Comment:", "### Source:", "### Description:"]:
        text = text.split(stop_seq)[0]
    
    answer = text.strip()
    return answer


In [41]:
print(get_response("علائم آنفولانزا چیست؟"))

علائم آنفولانزا شامل درد مفاصل، تغییرات در وزن، خستگی و مشکلات تنفسی است.


In [42]:
print(get_response("چگونه می‌توان از بیماری دیابت جلوگیری کرد؟"))

رژیم غذایی سالم، ورزش منظم و کنترل وزن می‌تواند به پیشگیری از دیابت کمک کند.


In [45]:
print(get_response("چگونه فشار خون را کنترل کنیم؟"))

کنترل فشار خون شامل تغییرات در رژیم غذایی، ورزش منظم و مصرف داروهای مواد غنی است، می‌توان با این واکسیناسیون فشار خون را کنترل کرد.


In [48]:
print(get_response("علائم آلرژی چیست؟"))

علائم آلرژی شامل افزایش درد، تغییرات در خواب و اشتها است، از طریق افزایش درد و تغییرات در خواب و اشتها که نیاز به تجویز و تغییر در عملکرد است.
