<a href="https://colab.research.google.com/github/frank-morales2020/Cloud_curious/blob/master/UFTA_DEMO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q -U bitsandbytes
!pip install -q -U transformers
!pip install -q -U datasets
!pip install -q -U accelerate
!pip install -q -U peft

In [None]:
!nvidia-smi

In [1]:
import os
os.environ["WANDB_MODE"] = "offline"
os.environ["WANDB_DISABLED"] = "true"


!pip install transformers accelerate --quiet

from transformers import TrainingArguments
import accelerate

# Initialize the Accelerator
accelerator = accelerate.Accelerator()

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, BitsAndBytesConfig
from datasets import load_dataset
import torch
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training # Import LoraConfig
import warnings
warnings.filterwarnings("ignore")

class FineTuningAgent:
    def __init__(self, model_id, dataset_name, config):
        self.model_id = model_id
        self.dataset_name = dataset_name
        self.config = config
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # 1. Load Model and Tokenizer (with quantization if enabled)
        quantization_config = None
        if config.get("quantization"):
            quantization_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_use_double_quant=True,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_compute_dtype=torch.bfloat16,
            )

        self.model = AutoModelForCausalLM.from_pretrained(
            model_id,
            quantization_config=quantization_config,
            trust_remote_code=True,
        )
        self.tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

        # Add padding token if it does not exist
        if self.tokenizer.pad_token is None:
            self.tokenizer.add_special_tokens({'pad_token': '[PAD]'})
            self.model.resize_token_embeddings(len(self.tokenizer))  # Important: Resize embeddings


        # Move model to device
        self.model.to(self.device)

        # 2. Load Dataset (using dataset name from Hugging Face Hub)

        # Convert dataset to OAI messages
        system_message = """You are an text to SQL query translator. Users will ask you questions in English and you will generate a SQL query based on the provided SCHEMA.
        SCHEMA:
        {schema}"""

        def create_conversation(sample):
            return {
                "messages": [
                    {"role": "system", "content": system_message.format(schema=sample["context"])},
                    {"role": "user", "content": sample["question"]},
                    {"role": "assistant", "content": sample["answer"]}
                ],
                # Retain original columns
                "question": sample["question"],
                "context": sample["context"],
                "answer": sample["answer"]
            }


        dataset = load_dataset(dataset_name, split="train")
        dataset = dataset.shuffle().select(range(1250))  # Limit to 12500 samples
        dataset = dataset.map(create_conversation, remove_columns=dataset.features, batched=False)
        dataset = dataset.train_test_split(test_size=250/1250)

        self.dataset = dataset  # Assign the processed dataset to the agent


        # 3. Prepare Training Arguments
        self.training_args = TrainingArguments(**config.get("training_args", {}))
        # disable remove_unused_columns to keep all columns
        self.training_args.remove_unused_columns = False

        # 4. PEFT Configuration (LoRA)
        # Prepare model for k-bit training and apply LoRA
        if config.get("lora"):  # Check if LoRA is enabled in config
            self.model = prepare_model_for_kbit_training(self.model)


            # LoRA config based on QLoRA paper & Sebastian Raschka experiment
            peft_config = LoraConfig(
                lora_alpha=128,
                lora_dropout=0.05,
                r=256,
                bias="none",
                target_modules="all-linear",
                task_type="CAUSAL_LM",
            )

            # 5. Apply PEFT to the model
            self.model = get_peft_model(self.model, peft_config)
            self.model.print_trainable_parameters() # Print trainable parameters for verification

        # Function to preprocess the data
        def preprocess_function(examples):
            # The context and question are combined to form the input sequence
            inputs = [f"### Question: {q} ### Context: {c}" for q, c in zip(examples["question"], examples["context"])]
            # Tokenize the input sequences
            model_inputs = self.tokenizer(inputs, max_length=1024, truncation=True, padding="max_length")
            # The target sequence is simply the answer
            with self.tokenizer.as_target_tokenizer():
                labels = self.tokenizer(examples["answer"], max_length=1024, truncation=True, padding="max_length") # change max_length to 1024 to match input sequence length

            # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100
            # when we want to ignore padding in the loss.

            model_inputs["labels"] = labels["input_ids"]
            return model_inputs

        # Apply the preprocessing function to the dataset
        self.dataset = self.dataset.map(
            preprocess_function,
            batched=True,
            remove_columns=self.dataset["train"].column_names,  # Removes original columns, keeps the processed ones
        )

        # 6. Initialize Trainer
        self.trainer = Trainer(
            model=self.model,
            args=self.training_args,
            train_dataset=self.dataset["train"],
            eval_dataset=self.dataset["test"],  # Pass in eval_dataset here
        )

    def train(self):
        self.trainer.train()

    def evaluate(self):
        return self.trainer.evaluate()


# Example Usage
config = {
    "training_args": {
        "output_dir": "./results",
        "num_train_epochs": 1,
        "per_device_train_batch_size": 3,
        "gradient_accumulation_steps": 2,
        "report_to":None,
        "gradient_checkpointing":True,            # use gradient checkpointing to save memory
        "optim":'adamw_torch_fused',              # use fused adamw optimizer
        "logging_steps":100,                       # log every 10 steps
        "save_strategy":'epoch',                  # save checkpoint every epoch
        "learning_rate":2e-4,                     # learning rate, based on QLoRA paper
        "bf16":True,                              # use bfloat16 precision
        "tf32":True,                              # use tf32 precision
        "max_grad_norm":0.3,                      # max gradient norm based on QLoRA paper
        "warmup_ratio":0.03,                      # warmup ratio based on QLoRA paper
        "lr_scheduler_type":'constant',
    },
    "quantization": True,
    "lora": True,  # Enable LoRA
}

agent = FineTuningAgent(
    model_id="mistralai/Mistral-7B-Instruct-v0.1",
    dataset_name="b-mc2/sql-create-context",
    config=config,
)

print('\n')
agent.train()
eval_results = agent.evaluate()
print('\n')
print(eval_results)
#trainable params: 671,088,640 || all params: 7,912,828,928 || trainable%: 8.4810

`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
The new lm_head weights will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
