In [None]:
import torch
major_version, minor_version = torch.cuda.get_device_capability()
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
if major_version >= 8:
    # Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)
    !pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
else:
    # Use this for older GPUs (V100, Tesla T4, RTX 20xx)
    !pip install --no-deps xformers trl peft accelerate bitsandbytes
pass
!pip install triton transformers
!pip install -U datasets
!pip install --pre -U xformers ##### this take some time

In [None]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, TrainingArguments, Trainer
from transformers import AutoModelForCausalLM
from qlora import QLoRAConfig 



In [None]:
class QADataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length=512):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        title = row['title']
        context = row['context']
        question = f"Question: {title}\n"
        answer = f"Answer: {context}\n"

        # Combine question and answer as the input text
        input_text = question + answer

        # Tokenize the input text
        inputs = self.tokenizer(
            input_text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        input_ids = inputs.input_ids.squeeze()
        attention_mask = inputs.attention_mask.squeeze()

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': input_ids.clone()  # In causal LM, labels are usually the same as input_ids
        }




In [None]:
# Load the training data from CSV
dataframe = pd.read_csv('./dataset/qa_va_content_rag.csv')

# Initialize the tokenizer
model_id = "meta-llama/Meta-Llama-Guard-2-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Create the dataset
dataset = QADataset(dataframe, tokenizer)

# Define the QLoRA configuration
qlora_config = QLoRAConfig(
    model_name_or_path=model_id,
    task_name="text-generation",
    target_modules=["model.decoder.layers.*.self_attn.q_proj", "model.decoder.layers.*.self_attn.k_proj"],
    lora_alpha=32,
    lora_dropout=0.1,
    lora_r=8,
)

model = AutoModelForCausalLM.from_pretrained(model_id)
model = qlora_config.apply_to(model)  # Apply the QLoRA configuration to the model

training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=2,
    save_steps=10_000,
    logging_dir='./logs',
    logging_steps=500,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    eval_dataset=dataset,  
    tokenizer=tokenizer,
)

trainer.train()

In [None]:
model_save_path = './trained_model'

model.save_pretrained(model_save_path)

tokenizer.save_pretrained(model_save_path)