<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/UFTF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Enviroment Setup

In [None]:
# Install necessary modules
!pip install transformers accelerate trl bitsandbytes datasets peft --quiet
!pip install -U bitsandbytes -q

In [1]:
!nvidia-smi

Mon Feb 24 06:42:48 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA L4                      Off |   00000000:00:03.0 Off |                    0 |
| N/A   59C    P8             13W /   72W |       0MiB /  23034MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
import os

#Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).

os.environ["WANDB_MODE"] = "offline"

os.environ["WANDB_DISABLED"] = "true"


!pip install transformers accelerate --quiet

from transformers import TrainingArguments
import accelerate

# Initialize the Accelerator
accelerator = accelerate.Accelerator()

## Univeral FineTuningAgent

In [None]:
import os
import torch
import warnings
import gc
from transformers import (
    TrainingArguments,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BitsAndBytesConfig,
    DataCollatorWithPadding,
    AutoModelForCausalLM,
)
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import Trainer
import copy


# Suppress warnings
warnings.filterwarnings("ignore")


def clear_memory():
    """Clears GPU memory and performs garbage collection."""
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()


class FineTuningAgent:
    def __init__(self, model_id, dataset_name, config=None):
        """
        Initializes the FineTuningAgent.
        """
        self.model_id = model_id
        self.dataset_name = dataset_name
        if config is None:
            config = {}
        self.config = config
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.tokenizer = None
        self.model = None
        self.trainer = None
        self.training_args = None
        self.peft_config = None
        self.dataset = None
        self.counter = 0
        self.data_collator = None

    def _observe(self):
        """
        Loads the model, tokenizer, and dataset.
        """
        self.counter += 1
        #print(f"Starting Observe {self.counter}...")
        print(f"Starting Observe ...")

        clear_memory()

        quantization_config = None
        if self.config.get("quantization"):
            if "mistral" in self.model_id.lower():
                print("Mistral model detected. Using 4-bit quantization.")
                quantization_config = BitsAndBytesConfig(
                    load_in_4bit=True,
                    bnb_4bit_use_double_quant=True,
                    bnb_4bit_quant_type="nf4",
                    bnb_4bit_compute_dtype=torch.bfloat16,
                )
            else:
                quantization_config = BitsAndBytesConfig(
                    load_in_4bit=True,
                    bnb_4bit_use_double_quant=False,
                    bnb_4bit_quant_type="nf4",
                    bnb_4bit_compute_dtype=torch.float32,
                )
        model_downloaded = False
        max_retries = 3
        retry_count = 0
        while not model_downloaded and retry_count < max_retries:
            try:
                # Determine the correct model class based on architecture
                if "bert" in self.model_id.lower():
                    self.model = AutoModelForSequenceClassification.from_pretrained(  # Use correct model type
                        self.model_id,
                        num_labels=2,  # For MRPC, which is binary classification
                        quantization_config=quantization_config,
                        trust_remote_code=True,
                    )
                elif "mistral" in self.model_id.lower():
                    self.model = AutoModelForCausalLM.from_pretrained(
                        self.model_id,
                        quantization_config=quantization_config,
                        trust_remote_code=True,
                    )

                else:
                    print(f"Model {self.model_id} not supported.")
                    return

                model_downloaded = True
            except KeyboardInterrupt:
                print(f"Model download interrupted. Retrying... (Attempt {retry_count + 1}/{max_retries})")
                retry_count += 1
                # Clear GPU memory to avoid potential issues
                clear_memory()
                if retry_count == max_retries:
                    print("Max retry reached, skipping model download.")
                    return
            except Exception as e:
                print(f"An error occurred during model download: {e}")
                retry_count += 1
                # Clear GPU memory to avoid potential issues
                clear_memory()

                if retry_count == max_retries:
                    print("Max retry reached, skipping model download.")
                    return

        self.tokenizer = AutoTokenizer.from_pretrained(
            self.model_id, trust_remote_code=True
        )

        # Add padding token if it does not exist
        if self.tokenizer.pad_token is None:
            self.tokenizer.add_special_tokens({"pad_token": "[PAD]"})
            self.model.resize_token_embeddings(len(self.tokenizer))

        # Move model to device
        self.model.to(self.device)

        # Load Dataset (using dataset name from Hugging Face Hub)
        dataset = load_dataset(self.dataset_name, split="train")
        self.dataset = dataset.shuffle().select(
            range(self.config.get("dataset_size", 125))
        )  # Set a default dataset size of 125

        print("\n")
        print(f"Observe finished.")

    def _orient(self):
        """
        Orients the agent by formatting the dataset and preparing training arguments.
        """
        print("\n")
        self.counter += 1
        print(f"Starting Orient ...")
        if self.dataset_name == "SetFit/mrpc":
            print("Dataset: SetFit/mrpc")
            preprocessing_function = self._preprocess_function_mrpc
        elif self.dataset_name == "b-mc2/sql-create-context":
            print("Dataset: b-mc2/sql-create-context")
            preprocessing_function = self._preprocess_function_sql_create_context
        elif self.dataset_name == "anthropic/hh-rlhf":
            print("Dataset: anthropic/hh-rlhf")
            preprocessing_function = self._preprocess_function_anthropic_hh_rlhf
        else:
            print(f"Dataset: {self.dataset_name} not supported.")
            return

        # Set the train/test split.
        test_size_percentage = self.config.get("test_split_percentage", 0.2)  # Set a default test size to 20%
        self.dataset = self.dataset.train_test_split(
            test_size=test_size_percentage
        )

        self.dataset = self.dataset.map(
            preprocessing_function,
            batched=True,
            remove_columns=self.dataset["train"].column_names,
        )

        # 3. Prepare Training Arguments
        self.training_args = TrainingArguments(**self.config.get("training_args", {}))
        self.training_args.remove_unused_columns = False

        print("\n")
        print(f"Orient Dataset: {self.dataset}")

        print("\n")
        print(f"Orient finished.")

    def _decide(self):
        """
        Decides on the fine-tuning strategy, including LoRA configuration.
        """
        self.counter += 1
        print("\n")
        print(f"Starting Decide ...")
        clear_memory()
        # PEFT Configuration (LoRA)
        if self.config.get("lora"):
            self.model = prepare_model_for_kbit_training(self.model)
            if "bert" in self.model_id.lower():
                peft_config = LoraConfig(
                    lora_alpha=16,  # You can tune this.
                    lora_dropout=0.1,  # You can tune this.
                    r=64,  # You can tune this.
                    bias="none",
                    target_modules=["query", "key", "value", "dense"],  # Correct target modules for BERT
                    task_type="SEQ_CLS",  # correct task type
                )
            elif "mistral" in self.model_id.lower():
                peft_config = LoraConfig(
                    lora_alpha=128,
                    lora_dropout=0.05,
                    r=256,
                    bias="none",
                    target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj", "up_proj", "down_proj"],
                    task_type="CAUSAL_LM",
                )

                print("\n")
                print(f"LORA: {peft_config}")
            else:
                print(f"Model {self.model_id} not supported.")
                return

            self.peft_config = peft_config
            self.model = get_peft_model(self.model, peft_config)

            self.model.print_trainable_parameters()


        print('\n')
        print(f"Decide finished.")

    def _act(self):
        """
        Acts by preprocessing the dataset and initializing the training loop.
        """
        self.counter += 1
        print("\n")
        print(f"Starting Act ...")
        clear_memory()

        try:
            if "train" not in self.dataset or "test" not in self.dataset:
                print(f"Missing train or test split for {self.dataset_name}")
                return

            print("Dataset preprocessed successfully.")
            print("\n")
            # Set collator
            self.data_collator = DataCollatorWithPadding(tokenizer=self.tokenizer)

            # Initialize Trainer
            print("Initializing Trainer...")
            # Use the Trainer class instead of SFTTrainer
            self.trainer = Trainer(
                model=self.model,
                args=self.training_args,
                train_dataset=self.dataset["train"],
                eval_dataset=self.dataset["test"],
                data_collator=self.data_collator,
            )

        except Exception as e:
            print(f"An error occurred in _act(): {e}")
            raise

        print("\n")
        print(f"Act finished.")

    def run(self):
        """
        Executes the OODA loop and fine-tunes the language model.
        """
        self.counter += 1
        print("\n")
        print(f"Starting Run ...")
        clear_memory()
        self._observe()
        if self.model is None:
            print("Model loading failed, skipping _orient, _decide and _act")
            return
        self._orient()
        self._decide()
        self._act()

        print("\n")
        print(f"Run Dataset: {self.dataset}")
        print("\n")

        if self.trainer is not None:
            try:
                # Train the model
                self.trainer.train()
                print("\n")
                print("Evaluation:")
                eval_results = self.evaluate()
                print("\n")
                print(eval_results)
                print("\n")
            except Exception as e:
                print(f"An error occurred during training or evaluation: {e}")
                raise
        else:
            print("Trainer is None. Skipping training and evaluation.")

        print(f"Run  finished.")

    def evaluate(self):
        """
        Evaluates the fine-tuned language model.
        """
        return self.trainer.evaluate()

    def _preprocess_function_mrpc(self, examples):
        """
        Preprocesses the data for the SetFit/mrpc dataset.
        """
        print("Preprocess Dataset: SetFit/mrpc")
        inputs = self.tokenizer(
            examples["text1"],
            examples["text2"],
            max_length=128,  # Adjust as needed
            truncation=True,
        )
        inputs["labels"] = examples["label"]  # Assuming your label column is named "label"
        return inputs

    def _preprocess_function_sql_create_context(self, examples):
        """
        Preprocesses the data for the b-mc2/sql-create-context dataset.
        """
        print("Preprocess Dataset: b-mc2/sql-create-context")
        # Construct "question" and "context" using the 'text' column for b-mc2/sql-create-context
        inputs = [f"### Question: {q} ### Context: {c}" for q, c in zip(examples["question"], examples["context"])]
        # adding padding and max_length to model inputs tokenization
        model_inputs = self.tokenizer(inputs, max_length=1024, truncation=True, padding="max_length")
        with self.tokenizer.as_target_tokenizer():
            # adding padding and max_length to labels tokenization
            labels = self.tokenizer(examples["answer"], max_length=1024, truncation=True, padding="max_length")
        model_inputs["labels"] = labels["input_ids"]
        return model_inputs

    def _preprocess_function_anthropic_hh_rlhf(self, examples):
        """
        Preprocesses the data for the anthropic/hh-rlhf dataset.
        """
        print("Preprocess Dataset: anthropic/hh-rlhf")
        # Construct "question" and "context" using the 'text' column for b-mc2/sql-create-context
        inputs = examples["chosen"]
        # adding padding and max_length to model inputs tokenization
        model_inputs = self.tokenizer(inputs, max_length=1024, truncation=True, padding="max_length")
        with self.tokenizer.as_target_tokenizer():
            # adding padding and max_length to labels tokenization
            labels = self.tokenizer(examples["chosen"], max_length=1024, truncation=True, padding="max_length")
        model_inputs["labels"] = labels["input_ids"]
        return model_inputs


# Configuration for experiments
RL_PAIRS = [
    {
        "model_id": "google-bert/bert-base-uncased",  # Corrected model ID
        "dataset_name": "SetFit/mrpc",
        "config": {
            "dataset_size": 125,
            "test_split_percentage": 0.2,
            "quantization": True,
            "lora": True,
            "training_args": {
                "output_dir": "./mrpc_output",
                "per_device_train_batch_size": 1,  # reduce batch size
                "gradient_accumulation_steps": 2,
                "report_to": None,
                "gradient_checkpointing": True,
                "optim": "adamw_torch_fused",
                "logging_steps": 5,
                "save_strategy": "epoch",
                "learning_rate": 2e-4,
                "bf16": True,
                "tf32": True,
                "max_grad_norm": 0.3,
                "warmup_ratio": 0.03,
                "lr_scheduler_type": "constant",
                "num_train_epochs": 1,
            },
        },
    },
    {
        "model_id": "mistralai/Mistral-7B-Instruct-v0.1",
        "dataset_name": "b-mc2/sql-create-context",
        "config": {
            "dataset_size": 125,
            "test_split_percentage": 0.2,
            "quantization": True,
            "lora": True,
            "training_args": {
                "output_dir": "./sql_create_context_output",
                "per_device_train_batch_size": 1,
                "gradient_accumulation_steps": 2,
                "report_to": None,
                "gradient_checkpointing": True,
                "optim": "adamw_torch_fused",
                "logging_steps": 5,
                "save_strategy": "epoch",
                "learning_rate": 2e-4,
                "bf16": True,
                "tf32": True,
                "max_grad_norm": 0.3,
                "warmup_ratio": 0.03,
                "lr_scheduler_type": "constant",
                "num_train_epochs":1,
            },
        },
    },
      {
        "model_id": "mistralai/Mistral-7B-Instruct-v0.1",
        "dataset_name": "anthropic/hh-rlhf",
        "config": {
            "dataset_size": 125,
            "test_split_percentage": 0.2,
            "quantization": True,
            "lora": True,
            "training_args": {
                "output_dir": "./hh_rlhf_output",
                "per_device_train_batch_size": 1,
                "gradient_accumulation_steps": 2,
                "report_to": None,
                "gradient_checkpointing": True,
                "optim": "adamw_torch_fused",
                "logging_steps": 5,
                "save_strategy": "epoch",
                "learning_rate": 2e-4,
                "bf16": True,
                "tf32": True,
                "max_grad_norm": 0.3,
                "warmup_ratio": 0.03,
                "lr_scheduler_type": "constant",
                "num_train_epochs":1,
            },
        },
    },
]

# Run the experiments
for rl_pair in RL_PAIRS:
    print("\n")
    print("*" * 50)
    print(
        f"Running experiment with model: {rl_pair['model_id']} and dataset: {rl_pair['dataset_name']}"
    )
    print("*" * 50)
    print("\n")

    agent = FineTuningAgent(
        model_id=rl_pair["model_id"],
        dataset_name=rl_pair["dataset_name"],
        config=rl_pair["config"],
    )
    # Initiate the OODA loop and fine-tuning process
    agent.run()
    print("\n")



**************************************************
Running experiment with model: google-bert/bert-base-uncased and dataset: SetFit/mrpc
**************************************************




Starting Run ...
Starting Observe ...


`low_cpu_mem_usage` was None, now default to True since model is quantized.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Repo card metadata block was not found. Setting CardData to empty.




Observe finished.


Starting Orient ...
Dataset: SetFit/mrpc


Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Preprocess Dataset: SetFit/mrpc


Map:   0%|          | 0/25 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Preprocess Dataset: SetFit/mrpc


Orient Dataset: DatasetDict({
    train: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 100
    })
    test: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 25
    })
})


Orient finished.


Starting Decide ...
trainable params: 10,716,674 || all params: 120,200,452 || trainable%: 8.9157


Decide finished.


Starting Act ...
Dataset preprocessed successfully.


Initializing Trainer...


Act finished.


Run Dataset: DatasetDict({
    train: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 100
    })
    test: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 25
    })
})




Step,Training Loss
5,0.7891
10,0.6223
15,0.6205
20,0.6945
25,0.6994
30,0.7029
35,0.6211
40,0.7111
45,0.7199
50,0.6584




Evaluation:




{'eval_loss': 0.6581249833106995, 'eval_runtime': 0.198, 'eval_samples_per_second': 126.27, 'eval_steps_per_second': 20.203, 'epoch': 1.0}


Run  finished.




**************************************************
Running experiment with model: mistralai/Mistral-7B-Instruct-v0.1 and dataset: b-mc2/sql-create-context
**************************************************




Starting Run ...
Starting Observe ...
Mistral model detected. Using 4-bit quantization.


`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
The new lm_head weights will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`




Observe finished.


Starting Orient ...
Dataset: b-mc2/sql-create-context


Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Preprocess Dataset: b-mc2/sql-create-context


Map:   0%|          | 0/25 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Preprocess Dataset: b-mc2/sql-create-context


Orient Dataset: DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 100
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 25
    })
})


Orient finished.


Starting Decide ...


LORA: LoraConfig(task_type='CAUSAL_LM', peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, inference_mode=False, r=256, target_modules={'q_proj', 'o_proj', 'gate_proj', 'k_proj', 'down_proj', 'up_proj', 'v_proj'}, exclude_modules=None, lora_alpha=128, lora_dropout=0.05, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={}, eva_config=None, use_dora=False, layer_replication=None, runtime_config=LoraRuntimeConf

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
5,11.8078
10,10.3753
15,10.2552
20,10.2525
25,10.2353
30,10.2267
35,10.216
40,10.2287
45,10.2195
50,10.2194




Evaluation:




{'eval_loss': 10.199603080749512, 'eval_runtime': 17.4731, 'eval_samples_per_second': 1.431, 'eval_steps_per_second': 0.229, 'epoch': 1.0}


Run  finished.




**************************************************
Running experiment with model: mistralai/Mistral-7B-Instruct-v0.1 and dataset: anthropic/hh-rlhf
**************************************************




Starting Run ...
Starting Observe ...
Mistral model detected. Using 4-bit quantization.


`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



Observe finished.


Starting Orient ...
Dataset: anthropic/hh-rlhf


Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Preprocess Dataset: anthropic/hh-rlhf


Map:   0%|          | 0/25 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Preprocess Dataset: anthropic/hh-rlhf


Orient Dataset: DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 100
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 25
    })
})


Orient finished.


Starting Decide ...


LORA: LoraConfig(task_type='CAUSAL_LM', peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, inference_mode=False, r=256, target_modules={'q_proj', 'o_proj', 'gate_proj', 'k_proj', 'down_proj', 'up_proj', 'v_proj'}, exclude_modules=None, lora_alpha=128, lora_dropout=0.05, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={}, eva_config=None, use_dora=False, layer_replication=None, runtime_config=LoraRuntimeConfig(ephe

Step,Training Loss
5,9.4644
10,8.1491
15,8.5892
20,8.1038
25,8.2246
30,8.1627
35,8.2436
40,8.2365
45,8.3331
50,8.0804




Evaluation:




{'eval_loss': 8.606639862060547, 'eval_runtime': 17.383, 'eval_samples_per_second': 1.438, 'eval_steps_per_second': 0.23, 'epoch': 1.0}


Run  finished.


