In [None]:
!pip install -q -U transformers datasets accelerate bitsandbytes trl peft evaluate

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments
from trl import setup_chat_format, SFTTrainer
from peft import LoraConfig

import torch

In [None]:
# LoRA config based on QLoRA paper & Sebastian Raschka experiment
from peft import LoraConfig

peft_config = LoraConfig(
        lora_alpha=128,
        lora_dropout=0.05,
        r=8, #
        bias="none",
        target_modules="all-linear", # ["q_proj","k_proj","v_proj"]
        task_type="CAUSAL_LM",
)

In [None]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype= torch.bfloat16 #but should be set to the optimal BFloat16 for newer hardware supporting it to achieve the best performance.
)

In [None]:
# Huggingface Model ID
model_id = "Qwen/Qwen2.5-3B"

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    quantization_config=quantization_config
)

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.padding_side = 'right' # to prevent warnings

In [None]:
from peft import get_peft_model

def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

peft_model = get_peft_model(model, peft_config)
print_trainable_parameters(peft_model)

In [None]:
dataset =  load_dataset("mertbozkurt/llama2-TR-recipe", split="train")
print(dataset)

In [None]:
import re

def clean_text(text):
    cleaned = re.sub(r"[\[\]/\\]", "", text)
    cleaned = cleaned.strip()
    return cleaned

In [None]:
def parse_input_output(example):
    text = example["text"]
    parts = text.split("INST")
    if len(parts) == 3:
        input_part = clean_text(parts[1])
        output_part = clean_text(parts[2])
        return {"input": input_part, "output": output_part}
    else:
        return {"input": "", "output": ""}

In [None]:
new_dataset = dataset.map(parse_input_output)

In [None]:
new_dataset = new_dataset.remove_columns("text")

In [None]:
dataset = new_dataset

In [None]:
system_message = "Sen insanların istediği tariflere cevap veren bir Aşçısın. Insanların istediklerine göre soruları cevapla."

def create_conversation(sample):

  return {
    "messages": [
      {"role": "system", "content": system_message},
      {"role": "user", "content": sample["input"]},
      {"role": "assistant", "content": sample["output"]}
    ]
  }


dataset = dataset.train_test_split(test_size = 0.05)
dataset = dataset.map(create_conversation, remove_columns=dataset["train"].features,batched=False)

print("Dataset: ",dataset["train"][0]["messages"])

In [None]:
model, tokenizer = setup_chat_format(model, tokenizer)

In [None]:
args = TrainingArguments(
    output_dir="recipe-chat-bot", # directory to save and repository id
    num_train_epochs=5,                     # number of training epochs
    per_device_train_batch_size=2,          # batch size per device during training
    #gradient_accumulation_steps=2,          # number of steps before performing a backward/update pass
    #gradient_checkpointing=True,            # use gradient checkpointing to save memory
    optim="adamw_torch_fused",              # use fused adamw optimizer
    logging_steps=10,                       # log every 10 steps
    save_strategy="epoch",                  # save checkpoint every epoch
    learning_rate=2e-4,                     # learning rate, based on QLoRA paper
    bf16=True,                              # use bfloat16 precision
    max_grad_norm=0.3,                      # max gradient norm based on QLoRA paper
    warmup_ratio=0.03,                      # warmup ratio based on QLoRA paper
    lr_scheduler_type="constant",           # use constant learning rate scheduler
#   push_to_hub=True,                       # push model to hub
#   report_to="tensorboard",                # report metrics to tensorboard
)


trainer = SFTTrainer(
    model=model,
    args=args,
    train_dataset=dataset["train"],
    peft_config=peft_config,
)


trainer.train()
trainer.save_model()