In [None]:
%pip install -qqq "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" --progress-bar off #--use-deprecated=legacy-resolver
from torch import __version__; from packaging.version import Version as V
xformers = "xformers==0.0.27" if V(__version__) < V("2.4.0") else "xformers"
%pip install -qqq --no-deps {xformers}  peft accelerate bitsandbytes triton --progress-bar off #--use-deprecated=legacy-resolver

import torch
from trl import SFTTrainer
from datasets import load_dataset
from transformers import TrainingArguments, TextStreamer
from unsloth.chat_templates import get_chat_template
from unsloth import FastLanguageModel, is_bfloat16_supported

In [None]:
# Load model
max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Meta-Llama-3.1-8B-bnb-4bit",
    max_seq_length=max_seq_length,
    load_in_4bit=True,
    dtype=None,
)

# Prepare model for PEFT
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=16,
    lora_dropout=0,
    target_modules=["q_proj", "k_proj", "v_proj", "up_proj", "down_proj", "o_proj", "gate_proj"],
    use_rslora=True,
    use_gradient_checkpointing="unsloth",

)
print(model.print_trainable_parameters())

In [None]:
tokenizer = get_chat_template(
    tokenizer,
    chat_template="chatml",
    mapping={"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}
)

def apply_template(examples):
    messages = examples["conversations"]
    text = [tokenizer.apply_chat_template(message, tokenize=False, add_generation_prompt=False) for message in messages]
    return {"text": text}

dataset = load_dataset("mlabonne/FineTome-100k", split="train")
dataset = dataset.map(apply_template, batched=True)

In [None]:
# # Assuming get_chat_template and tokenizer are defined elsewhere
# tokenizer = get_chat_template(
#     tokenizer,
#     chat_template="chatml",
#     mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt"}
# )

# def apply_template(examples):
#     # Prepare lists to hold the output
#     texts = []
    
#     for question, answer in zip(examples["question"], examples["answer"]):
#         # Construct the messages
#         messages = [
#             {"role": "user", "content": question},    # User question
#             {"role": "assistant", "content": answer}   # Assistant answer
#         ]
        
#         # Apply the chat template and append to the list
#         text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
#         texts.append(text)
    
#     # Return a dictionary with lists
#     return {"text": texts}

# # Load the new dataset
# dataset = load_dataset("malhajar/gsm8k_tr-v0.2", split="test")

# # Map the apply_template function to the dataset
# dataset = dataset.map(apply_template, batched=True)

In [None]:
trainer=SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=True,
    args=TrainingArguments(
        learning_rate=3e-4,
        lr_scheduler_type="linear",
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        num_train_epochs=1,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        warmup_steps=10,
        output_dir="output",
        seed=0,
    ),
)

trainer.train()

In [None]:
# Load model for inference
model = FastLanguageModel.for_inference(model)

messages = [
    {"from": "human", "value": "Is 9.11 larger than 9.9?"},
]
# messages = [
#     {"from": "human", "value": "9.11 9.9'dan küçük mü?"},
# ]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt",
).to("cuda")

text_streamer = TextStreamer(tokenizer)
_ = model.generate(input_ids=inputs, streamer=text_streamer, max_new_tokens=128, use_cache=True)

In [None]:
trainer.save_model("./llama-finetuned-qlora-2")
tokenizer.save_pretrained("./llama-finetuned-qlora-2")