<a href="https://colab.research.google.com/github/frank-morales2020/Cloud_curious/blob/master/FINAL_UFTF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install necessary modules
!pip install transformers accelerate trl bitsandbytes datasets peft --quiet
!pip install -U bitsandbytes -q
!pip install unsloth --quiet
!pip install wandb --quiet

#!nvidia-smi

import os
import torch
import warnings
import gc
from transformers import (
    TrainingArguments,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BitsAndBytesConfig,
    DataCollatorWithPadding,
    AutoModelForCausalLM,
)
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import Trainer
import accelerate
from trl import DPOTrainer
import copy

# Import from Unsloth
from unsloth import FastLanguageModel, is_bfloat16_supported
from unsloth.kernels import cross_entropy_loss

# Initialize the Accelerator
accelerator = accelerate.Accelerator()

# Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
#os.environ["WANDB_MODE"] = "offline"
#os.environ["WANDB_DISABLED"] = "true"

# Suppress warnings
warnings.filterwarnings("ignore")

import warnings
warnings.filterwarnings("ignore", message="Environment variable num_items_in_batch not found.")


def clear_memory():
    """Clears GPU memory and performs garbage collection."""
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()


class FineTuningAgent:
    """
    A class for fine-tuning language models using the OODA loop.
    """

    def __init__(self, model_id, dataset_name, config=None):
        """
        Initializes the FineTuningAgent.

        Args:
            model_id (str): The ID of the pre-trained model.
            dataset_name (str): The name of the dataset to use.
            config (dict, optional): Configuration parameters. Defaults to None.
        """
        self.model_id = model_id
        self.dataset_name = dataset_name
        self.config = config if config is not None else {}
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.tokenizer = None
        self.model = None
        self.trainer = None
        self.training_args = None
        self.peft_config = None
        self.dataset = None
        self.counter = 0
        self.data_collator = None
        self.model_type = None
        print("Agent dictionary(inside __init__):")
        print(self.config)

    def _observe(self):
        """
        Loads the model, tokenizer, and dataset.
        """
        self.counter += 1
        print("Starting Observe ...")

        clear_memory()

        # Check if Unsloth should be used.
        use_unsloth = self.config.get("use_unsloth", False)

        if use_unsloth:
            print("Unsloth will be used.")

        quantization_config = None
        if self.config.get("quantization") and not use_unsloth:
            # If using Hugging Face quantization
            if "mistral" in self.model_id.lower():
                print("Mistral model detected. Using 4-bit quantization.")
                quantization_config = BitsAndBytesConfig(
                    load_in_4bit=True,
                    bnb_4bit_use_double_quant=True,
                    bnb_4bit_quant_type="nf4",
                    bnb_4bit_compute_dtype=torch.bfloat16,
                )
            else:
                quantization_config = BitsAndBytesConfig(
                    load_in_4bit=True,
                    bnb_4bit_use_double_quant=False,
                    bnb_4bit_quant_type="nf4",
                    bnb_4bit_compute_dtype=torch.float32,
                )

        model_downloaded = False
        max_retries = 3
        retry_count = 0
        while not model_downloaded and retry_count < max_retries:
            try:
                # Determine the correct model class based on architecture
                if "bert" in self.model_id.lower():
                    print("BERT model detected.")
                    self.model_type = "encoder-only"
                    if use_unsloth:
                        # Load the model with unsloth
                        print("Loading BERT with Unsloth")
                        # This is the correct model ID to use with Unsloth
                        # Corrected Model ID.
                        unsloth_model_id = self.config.get(
                            "unsloth_model_id", "bert-base-uncased"
                        )
                        max_seq_length = self.config.get("max_seq_length", 2048)
                        dtype = self.config.get("dtype", None)
                        load_in_4bit = self.config.get("load_in_4bit", True)
                        access_token = self.config.get("access_token", None)
                        self.model, self.tokenizer = FastLanguageModel.from_pretrained(
                            model_name=unsloth_model_id,
                            max_seq_length=max_seq_length,
                            dtype=dtype,
                            load_in_4bit=load_in_4bit,
                            token=access_token,
                        )
                    else:
                        # Load the model with Hugging Face
                        print("Loading BERT with Hugging Face")
                        self.model = AutoModelForSequenceClassification.from_pretrained(
                            self.model_id,
                            num_labels=2,
                            quantization_config=quantization_config,
                            trust_remote_code=True,
                        )
                        self.tokenizer = AutoTokenizer.from_pretrained(
                            self.model_id, trust_remote_code=True
                        )

                elif "mistral" in self.model_id.lower() or "deepseek" in self.model_id.lower():
                    print("Decoder-only model detected.")
                    self.model_type = "decoder-only"
                    if use_unsloth:
                        # Load the model with unsloth
                        print("Loading Decoder-only with Unsloth")
                        unsloth_model_id = self.config.get(
                            "unsloth_model_id", "deepseek-ai/deepseek-coder-1.3b-base"
                        )
                        max_seq_length = self.config.get("max_seq_length", 2048)
                        dtype = self.config.get("dtype", None)
                        load_in_4bit = self.config.get("load_in_4bit", True)
                        access_token = self.config.get("access_token", None)
                        self.model, self.tokenizer = FastLanguageModel.from_pretrained(
                            model_name=unsloth_model_id,
                            max_seq_length=max_seq_length,
                            dtype=dtype,
                            load_in_4bit=load_in_4bit,
                            token=access_token,
                        )
                    else:
                        # Load the model with Hugging Face
                        print("Loading Decoder-only with Hugging Face")
                        self.model = AutoModelForCausalLM.from_pretrained(
                            self.model_id,
                            quantization_config=quantization_config,
                            trust_remote_code=True,
                        )
                        self.tokenizer = AutoTokenizer.from_pretrained(
                            self.model_id, trust_remote_code=True
                        )
                # unsloth model
                elif "unsloth" in self.model_id.lower():
                    print("Unsloth model detected.")
                    # Load the model with unsloth
                    print("Loading Unsloth model")
                    # Correct model name: unsloth/mistral-7b-instruct-v0.3-bnb-4bit
                    unsloth_model_id = self.config.get(
                        "unsloth_model_id", "unsloth/mistral-7b-instruct-v0.3-bnb-4bit"
                    )
                    max_seq_length = self.config.get("max_seq_length", 2048)
                    dtype = self.config.get("dtype", None)
                    load_in_4bit = self.config.get("load_in_4bit", True)
                    access_token = self.config.get("access_token", None)
                    self.model, self.tokenizer = FastLanguageModel.from_pretrained(
                        model_name=unsloth_model_id,
                        max_seq_length=max_seq_length,
                        dtype=dtype,
                        load_in_4bit=load_in_4bit,
                        token=access_token,
                    )
                    self.model_type = "decoder-only"
                else:
                    print(f"Model {self.model_id} not supported.")
                    return

                model_downloaded = True
            except KeyboardInterrupt:
                print(
                    f"Model download interrupted. Retrying... (Attempt {retry_count + 1}/{max_retries})"
                )
                retry_count += 1
                # Clear GPU memory to avoid potential issues
                clear_memory()
                if retry_count == max_retries:
                    print("Max retry reached, skipping model download.")
                    return
            except Exception as e:
                print(f"An error occurred during model download: {e}")
                retry_count += 1
                # Clear GPU memory to avoid potential issues
                clear_memory()

                if retry_count == max_retries:
                    print("Max retry reached, skipping model download.")
                    return
        # Add padding token if it does not exist
        if self.tokenizer.pad_token is None:
            self.tokenizer.add_special_tokens({"pad_token": "[PAD]"})
            self.model.resize_token_embeddings(len(self.tokenizer))

        if not use_unsloth and not "unsloth" in self.model_id.lower():
            # Move model to device
            self.model.to(self.device)

        # Load Dataset (using dataset name from Hugging Face Hub)
        dataset = load_dataset(
            self.dataset_name, split="train", num_proc=self.config.get("dataset_num_proc", 2)
        )
        self.dataset = dataset.shuffle().select(
            range(self.config.get("dataset_size", 125))
        )

        print("\n")
        print("Observe finished.")

    def _orient(self):
        """
        Orients the agent by formatting the dataset and preparing training arguments.
        """
        print("\n")
        self.counter += 1
        print("Starting Orient ...")
        if self.dataset_name == "SetFit/mrpc":
            print("Dataset: SetFit/mrpc")
            preprocessing_function = self._preprocess_function_mrpc
        elif self.dataset_name == "b-mc2/sql-create-context":
            print("Dataset: b-mc2/sql-create-context")
            preprocessing_function = self._preprocess_function_sql_create_context
        elif self.dataset_name == "anthropic/hh-rlhf":
            print("Dataset: anthropic/hh-rlhf")
            preprocessing_function = self._preprocess_function_anthropic_hh_rlhf
        elif self.dataset_name == "imdb":
            print("Dataset: imdb")
            preprocessing_function = self._preprocess_function_imdb
        else:
            print(f"Dataset: {self.dataset_name} not supported.")
            return

        # Set the train/test split.
        test_size_percentage = self.config.get("test_split_percentage", 0.2)
        self.dataset = self.dataset.train_test_split(
            test_size=test_size_percentage
        )

        self.dataset = self.dataset.map(
            preprocessing_function,
            batched=True,
            remove_columns=self.dataset["train"].column_names,
        )

        # 3. Prepare Training Arguments
        # Import is_bfloat16_supported function.


        # Create TrainingArguments with the desired parameters
        training_args_config = self.config.get("training_args", {})
        self.training_args = TrainingArguments(
            output_dir=training_args_config.get("output_dir", "./output"),
            per_device_train_batch_size=training_args_config.get(
                "per_device_train_batch_size", 2
            ),
            gradient_accumulation_steps=training_args_config.get(
                "gradient_accumulation_steps", 4
            ),
            warmup_steps=training_args_config.get("warmup_steps", 5),
            max_steps=training_args_config.get("max_steps", 60),
            learning_rate=training_args_config.get("learning_rate", 2e-4),
            fp16=training_args_config.get("fp16", not is_bfloat16_supported()),
            bf16=training_args_config.get("bf16", is_bfloat16_supported()),
            logging_steps=training_args_config.get("logging_steps", 10),
            optim=training_args_config.get("optim", "adamw_8bit"),
            weight_decay=training_args_config.get("weight_decay", 0.01),
            lr_scheduler_type=training_args_config.get("lr_scheduler_type", "linear"),
            seed=training_args_config.get("seed", 3407),
            evaluation_strategy=training_args_config.get(
                "evaluation_strategy", "steps"
            ),  # we need this
            eval_steps=training_args_config.get("eval_steps", 20),
            save_strategy=training_args_config.get("save_strategy", "steps"),
            save_steps=training_args_config.get("save_steps", 20),
            report_to=training_args_config.get("report_to", "wandb"),
            remove_unused_columns=False # we need this
        )

        print("\n")
        print(f"Orient Dataset: {self.dataset}")

        print("\n")
        print("Orient finished.")

    def _decide(self):
        """
        Decides on the fine-tuning strategy, including LoRA configuration.
        """
        self.counter += 1
        print("\n")
        print("Starting Decide ...")
        clear_memory()
        # PEFT Configuration (LoRA)
        if self.config.get("lora"):
            self.model = prepare_model_for_kbit_training(self.model)
            if "bert" in self.model_id.lower():
                peft_config = LoraConfig(
                    lora_alpha=16,  # You can tune this.
                    lora_dropout=0.1,  # You can tune this.
                    r=64,  # You can tune this.
                    bias="none",
                    target_modules=["query", "key", "value", "dense"],  # Correct target modules for BERT
                    task_type="SEQ_CLS",  # correct task type
                )
            elif "mistral" in self.model_id.lower():
                peft_config = LoraConfig(
                    lora_alpha=128,
                    lora_dropout=0.05,
                    r=256,
                    bias="none",
                    target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj", "up_proj", "down_proj"],
                    task_type="CAUSAL_LM",
                )
            elif "deepseek" in self.model_id.lower():
                peft_config = LoraConfig(
                    lora_alpha=128,
                    lora_dropout=0.05,
                    r=256,
                    bias="none",
                    target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj", "up_proj", "down_proj"],
                    task_type="CAUSAL_LM",
                )
            elif "unsloth" in self.model_id.lower():
                peft_config = LoraConfig(
                    lora_alpha=128,
                    lora_dropout=0.05,
                    r=256,
                    bias="none",
                    target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj", "up_proj", "down_proj"],
                    task_type="CAUSAL_LM",
                )
                print("\n")
                print(f"LORA: {peft_config}")

            else:
                print(f"Model {self.model_id} not supported.")
                return

            self.peft_config = peft_config
            self.model = get_peft_model(self.model, peft_config)

            self.model.print_trainable_parameters()


        print('\n')
        print("Decide finished.")

    def _act(self):
        """
        Acts by preprocessing the dataset and initializing the training loop.
        """
        self.counter += 1
        print("\n")
        print("Starting Act ...")
        clear_memory()

        try:
            if "train" not in self.dataset or "test" not in self.dataset:
                print(f"Missing train or test split for {self.dataset_name}")
                return

            print("Dataset preprocessed successfully.")
            print("\n")

            # Unsloth's Data Collator (Hypothetical)
            if self.config.get("use_unsloth", False) or "unsloth" in self.model_id.lower():
                # Replace with actual Unsloth data collator creation if needed
                # This is where we would add logic to use Unsloth's data collator
                # if it exists.
                # Example of a hypothetical Unsloth data collator
                #self.data_collator = UnslothDataCollator()
                print("Unsloth data collator used.")
                # Set collator
                self.data_collator = DataCollatorWithPadding(tokenizer=self.tokenizer)
            else:
                # Hugging Face Data Collator
                self.data_collator = DataCollatorWithPadding(tokenizer=self.tokenizer)
                print("Hugging Face data collator used.")

            # Initialize Trainer
            print("Initializing Trainer...")
            # Use the Trainer class instead of SFTTrainer
            self.trainer = Trainer(
                model=self.model,
                args=self.training_args,
                train_dataset=self.dataset["train"],
                eval_dataset=self.dataset["test"],
                data_collator=self.data_collator,
            )

        except Exception as e:
            print(f"An error occurred in _act(): {e}")
            raise

        print("\n")
        print("Act finished.")

    def run(self):
        """
        Executes the OODA loop and fine-tunes the language model.
        """
        self.counter += 1
        print("\n")
        print("Starting Run ...")
        clear_memory()
        self._observe()
        if self.model is None:
            print("Model loading failed, skipping _orient, _decide and _act")
            return
        self._orient()
        self._decide()
        self._act()

        print("\n")
        print(f"Run Dataset: {self.dataset}")
        print("\n")

        if self.trainer is not None:
            try:
                # Train the model
                self.trainer.train()
                print("\n")
                print("Evaluation:")
                eval_results = self.evaluate()
                print("\n")
                print(eval_results)
                print("\n")
            except Exception as e:
                print(f"An error occurred during training or evaluation: {e}")
                raise
        else:
            print("Trainer is None. Skipping training and evaluation.")

        print("Run  finished.")

    def evaluate(self):
        """
        Evaluates the fine-tuned language model.
        """
        return self.trainer.evaluate()

    def _preprocess_function_mrpc(self, examples):
        """
        Preprocesses the data for the SetFit/mrpc dataset.
        Handles different model types and sequence lengths.
        """
        print("Preprocess Dataset: SetFit/mrpc")

        max_length = self.config.get("max_length", 128)  # Get max_length from config

        if self.model_type == "encoder-only":
            # BERT and other encoder-only models
            inputs = self.tokenizer(
                examples["text1"],
                examples["text2"],
                max_length=max_length,
                truncation=True,
                padding="max_length",
            )
            inputs["labels"] = examples["label"]
        elif self.model_type == "decoder-only":
            # Mistral, DeepSeek, and other decoder-only models
            # Adapt input format as needed for text generation tasks
            # We need the code of the DeepSeek tokenizer and preprocessing
            print("Decoder-only models not implemented yet in MRPC Dataset.")
            return
            # inputs = self.tokenizer(...)  # Adapt for generation
            # inputs["labels"] = ...        # Adapt for generation

        else:
            raise ValueError(f"Unsupported model type: {self.model_type}")

        return inputs

    def _preprocess_function_sql_create_context(self, examples):
        """
        Preprocesses the data for the b-mc2/sql-create-context dataset.
        Handles different model types and sequence lengths.
        """
        print("Preprocess Dataset: b-mc2/sql-create-context")

        max_length = self.config.get("max_length", 1024)  # Get max_length from config

        if self.model_type == "decoder-only":
            # Mistral, DeepSeek, and other decoder-only models
            # Tokenize inputs and labels
            inputs = [f"### Question: {q} ### Context: {c}" for q, c in zip(examples["question"], examples["context"])]
            model_inputs = self.tokenizer(inputs, max_length=max_length, truncation=True, padding="max_length")
            # Tokenize labels
            labels_tokenized = self.tokenizer(examples["answer"], max_length=max_length, truncation=True, padding="max_length")
            # Assign labels to model_inputs
            model_inputs["labels"] = labels_tokenized["input_ids"]
        elif self.model_type == "encoder-only":
            # BERT and other encoder-only models
            # Tokenize inputs and labels
            inputs = [f"### Question: {q} ### Context: {c}" for q, c in zip(examples["question"], examples["context"])]
            model_inputs = self.tokenizer(inputs, max_length=max_length, truncation=True, padding="max_length")
            # Tokenize labels
            labels_tokenized = self.tokenizer(examples["answer"], max_length=max_length, truncation=True, padding="max_length")
            # Assign labels to model_inputs
            model_inputs["labels"] = labels_tokenized["input_ids"]
        else:
            raise ValueError(f"Unsupported model type: {self.model_type}")

        return model_inputs

    def _preprocess_function_anthropic_hh_rlhf(self, examples):
        """
        Preprocesses the data for the anthropic/hh-rlhf dataset.
        Handles different model types and sequence lengths.
        """
        print("Preprocess Dataset: anthropic/hh-rlhf")

        max_length = self.config.get("max_length", 1024)  # Get max_length from config

        if self.model_type == "decoder-only":
            # Mistral, DeepSeek, and other decoder-only models
            inputs = examples["chosen"]
            model_inputs = self.tokenizer(inputs, max_length=max_length, truncation=True, padding="max_length")
            # Tokenize labels
            labels_tokenized = self.tokenizer(examples["chosen"], max_length=max_length, truncation=True, padding="max_length")
            model_inputs["labels"] = labels_tokenized["input_ids"]
        elif self.model_type == "encoder-only":
            # BERT and other encoder-only models
            inputs = examples["chosen"]
            model_inputs = self.tokenizer(inputs, max_length=max_length, truncation=True, padding="max_length")
            # Tokenize labels
            labels_tokenized = self.tokenizer(examples["chosen"], max_length=max_length, truncation=True, padding="max_length")
            model_inputs["labels"] = labels_tokenized["input_ids"]
        else:
            raise ValueError(f"Unsupported model type: {self.model_type}")

        return model_inputs

    def _preprocess_function_imdb(self, examples):
        """
        Preprocesses the data for the imdb dataset.
        Handles different model types and sequence lengths.
        """
        print("Preprocess Dataset: imdb")

        max_length = self.config.get("max_length", 1024)  # Get max_length from config

        if self.model_type == "encoder-only":
             # BERT and other encoder-only models
            inputs = self.tokenizer(
                examples["text"],
                max_length=max_length,
                truncation=True,
                padding="max_length",
            )
            inputs["labels"] = examples["label"]
            return inputs
        elif self.model_type == "decoder-only":
            # Decoder-only models (Mistral, DeepSeek, etc.)
            model_inputs = self.tokenizer(
                examples["text"],
                max_length=max_length,
                truncation=True,
                padding="max_length",
            )
            # Copy input_ids to labels for causal LM training
            model_inputs["labels"] = model_inputs["input_ids"].copy()

            return model_inputs
        else:
            raise ValueError(f"Unsupported model type: {self.model_type}")
# %%

#TODO

#"unsloth/mistral-7b-bnb-4bit",
#"unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
#"unsloth/llama-2-7b-bnb-4bit",
#"unsloth/llama-2-13b-bnb-4bit",
#"unsloth/codellama-34b-bnb-4bit",
#"unsloth/tinyllama-bnb-4bit",
#"unsloth/gemma-7b-bnb-4bit", # New Google 6 trillion tokens model 2.5x faster!
#"unsloth/gemma-2b-bnb-4bit",
#    "unsloth/Meta-Llama-3.1-8B-bnb-4bit",      # Llama-3.1 15 trillion tokens model 2x faster!
#    "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
#    "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
#    "unsloth/Meta-Llama-3.1-405B-bnb-4bit",    # We also uploaded 4bit for 405b!
#    "unsloth/Mistral-Nemo-Base-2407-bnb-4bit", # New Mistral 12b 2x faster!
#    "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
#    "unsloth/mistral-7b-v0.3-bnb-4bit",        # Mistral v3 2x faster!
#    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
#    "unsloth/Phi-3.5-mini-instruct",           # Phi-3.5 2x faster!
#    "unsloth/Phi-3-medium-4k-instruct",
#    "unsloth/gemma-2-9b-bnb-4bit",
#    "unsloth/gemma-2-27b-bnb-4bit",



RL_PAIRSNO = {'experiment_name': 'google-bert_bert-base-uncased_SetFit_mrpc_base_config',
 'model_id': 'google-bert/bert-base-uncased',
 'dataset_name': 'SetFit/mrpc',
 'dataset_size': 125,
 'test_split_percentage': 0.2,
 'quantization': True,
 'lora': True,
 'use_unsloth': False,
 'max_seq_length': 2048,
 'dtype': None,
 'load_in_4bit': True,
 'output_dir': './google-bert_bert-base-uncased_SetFit_mrpc_base_config_output',
 'per_device_train_batch_size': 1,
 'gradient_accumulation_steps': 2,
 'report_to': 'none',
 'gradient_checkpointing': True,
 'optim': 'adamw_torch_fused',
 'logging_steps': 5,
 'save_strategy': 'epoch',
 'learning_rate': 0.0002,
 'bf16': True,
 'tf32': True,
 'max_grad_norm': 0.3,
 'warmup_ratio': 0.03,
 'lr_scheduler_type': 'constant',
 'num_train_epochs': 1}

# %%
# Configuration for experiments
RL_PAIRSSI = [
        {
    "model_id": "unsloth/mistral-7b-instruct-v0.3-bnb-4bit", # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B
        "dataset_name": "imdb",
        "config": {
            "dataset_size": 125,
            "test_split_percentage": 0.2,
            "quantization": True,
            "lora": True,
            "use_unsloth": True,  # Indicate that Unsloth should be used for this model
            "max_seq_length": 2048,
            "dtype": None,
            "load_in_4bit": True,
            "training_args": {
                "report_to": "none",
                "output_dir": "./imdb_output",
                "per_device_train_batch_size": 1,
                "gradient_accumulation_steps": 2,
                "report_to": "none",
                "gradient_checkpointing": True,
                "optim": "adamw_torch_fused",
                "logging_steps": 5,
                "report_to": "none",
                "save_strategy": "epoch",
                "learning_rate": 2e-4,
                "bf16": True,
                "tf32": True,
                "max_grad_norm": 0.3,
                "warmup_ratio": 0.03,
                "lr_scheduler_type": "constant",
                "num_train_epochs":1,
                "remove_unused_columns": False,
            },
        },
    },

    {
        "model_id": "unsloth/DeepSeek-R1-Distill-Llama-8B",
        "dataset_name": "b-mc2/sql-create-context",
        "config": {
            "unsloth_model_id": "deepseek-ai/deepseek-coder-1.3b-base",  # Corrected model ID for Unsloth
            "dataset_size": 125,
            "test_split_percentage": 0.2,
            "quantization": True,
            "lora": True,
            "use_unsloth": True,
            "max_seq_length": 2048,
            "dtype": None,
            "load_in_4bit": True,
            "access_token": None,  # No needed a token
            "dataset_num_proc": 2,
            "training_args": {
                "report_to": "none",
                "output_dir": "./unsloth_sql_create_context_output",
                "per_device_train_batch_size": 2,
                "gradient_accumulation_steps": 4,
                "report_to": "none",
                "gradient_checkpointing": True,
                "optim": "adamw_8bit",
                "logging_steps": 10,
                "save_strategy": "epoch",
                "learning_rate": 2e-4,
                "bf16": True,
                "fp16": False,
                "max_grad_norm": 0.3,
                "warmup_steps": 5,
                "lr_scheduler_type": "linear",
                "num_train_epochs": 1,
                "weight_decay": 0.01,
                "max_steps": 60,
                "seed": 3407,
                "evaluation_strategy": "steps", # We need this
                "eval_steps": 20, # We need this
            },
        },
    },


    {
        "model_id": "google-bert/bert-base-uncased",  # Corrected model ID
        "dataset_name": "SetFit/mrpc",
        "config": {
            "dataset_size": 125,
            "test_split_percentage": 0.2,
            "quantization": True,
            "lora": True,
            "training_args": {
                "report_to": "none",
                "output_dir": "./mrpc_output",
                "per_device_train_batch_size": 1,  # reduce batch size
                "gradient_accumulation_steps": 2,
                "report_to": "wandb",
                "gradient_checkpointing": True,
                "optim": "adamw_torch_fused",
                "logging_steps": 5,
                "save_strategy": "epoch",
                "report_to": "none",
                "learning_rate": 2e-4,
                "bf16": True,
                "tf32": True,
                "max_grad_norm": 0.3,
                "warmup_ratio": 0.03,
                "lr_scheduler_type": "constant",
                "num_train_epochs": 1, # Set the correct epoch
                "remove_unused_columns": False,
            },
        },
    },
    {
        "model_id": "mistralai/Mistral-7B-Instruct-v0.1",
        "dataset_name": "b-mc2/sql-create-context",
        "config": {
            "dataset_size": 125,
            "test_split_percentage": 0.2,
            "quantization": True,
            "lora": True,
            "training_args": {
                "report_to": "none",
                "output_dir": "./sql_create_context_output",
                "per_device_train_batch_size": 1,
                "gradient_accumulation_steps": 2,
                "report_to": "none",
                "gradient_checkpointing": True,
                "optim": "adamw_torch_fused",
                "logging_steps": 5,
                "save_strategy": "epoch",
                "learning_rate": 2e-4,
                "bf16": True,
                "tf32": True,
                "max_grad_norm": 0.3,
                "warmup_ratio": 0.03,
                "lr_scheduler_type": "constant",
                "num_train_epochs":1, # Set the correct epoch
                "remove_unused_columns": False,
            },
        },
    },
    {
        "model_id": "mistralai/Mistral-7B-Instruct-v0.1",
        "dataset_name": "anthropic/hh-rlhf",
        "config": {
            "dataset_size": 125,
            "test_split_percentage": 0.2,
            "quantization": True,
            "lora": True,
            "training_args": {
                "report_to": "none",
                "output_dir": "./hh_rlhf_output",
                "per_device_train_batch_size": 1,
                "gradient_accumulation_steps": 2,
                "gradient_checkpointing": True,
                "optim": "adamw_torch_fused",
                "logging_steps": 5,
                "report_to": None,
                "save_strategy": "epoch",
                "learning_rate": 2e-4,
                "bf16": True,
                "tf32": True,
                "max_grad_norm": 0.3,
                "warmup_ratio": 0.03,
                "lr_scheduler_type": "constant",
                "num_train_epochs":1, # Set the correct epoch
                "remove_unused_columns": False,
            },
        },
    },
    {
        "model_id": "unsloth/DeepSeek-R1-Distill-Llama-8B",
        "dataset_name": "b-mc2/sql-create-context",
        "config": {
            "unsloth_model_id": "deepseek-ai/deepseek-coder-1.3b-base",  # Corrected model ID for Unsloth
            "dataset_size": 125,
            "test_split_percentage": 0.2,
            "quantization": True,
            "lora": True,
            "use_unsloth": True,
            "max_seq_length": 2048,
            "dtype": None,
            "load_in_4bit": True,
            "access_token": None,  # No needed a token
            "dataset_num_proc": 2,
            "training_args": {
                "report_to": "none",
                "output_dir": "./unsloth_sql_create_context_output",
                "per_device_train_batch_size": 2,
                "gradient_accumulation_steps": 4,
                "gradient_checkpointing": True,
                "optim": "adamw_8bit",
                "logging_steps": 10,
                "save_strategy": "epoch",
                "learning_rate": 2e-4,
                "bf16": True,
                "fp16": False,
                "max_grad_norm": 0.3,
                "warmup_steps": 5,
                "lr_scheduler_type": "linear",
                "num_train_epochs": 1,
                "weight_decay": 0.01,
                "max_steps": 60,
                "seed": 3407,
                "evaluation_strategy": "steps", # We need this
                "eval_steps": 20, # We need this
            },
        },
    },

]


RL_PAIRS2 = [
  {
        "model_id": "mistralai/Mistral-7B-Instruct-v0.1",
        "dataset_name": "b-mc2/sql-create-context",
        "config": {
            "dataset_size": 125,
            "test_split_percentage": 0.2,
            "quantization": True,
            "lora": True,
            "training_args": {
                "output_dir": "./sql_create_context_output",
                "per_device_train_batch_size": 1,
                "gradient_accumulation_steps": 2,
                "report_to": "wandb",
                "gradient_checkpointing": True,
                "optim": "adamw_torch_fused",
                "logging_steps": 5,
                "report_to": "none",
                "save_strategy": "epoch",
                "learning_rate": 2e-4,
                "bf16": True,
                "tf32": True,
                "max_grad_norm": 0.3,
                "warmup_ratio": 0.03,
                "lr_scheduler_type": "constant",
                "num_train_epochs":1, # Set the correct epoch
                "remove_unused_columns": False,
            },
        },
    },
    {
        "model_id": "mistralai/Mistral-7B-Instruct-v0.1",
        "dataset_name": "anthropic/hh-rlhf",
        "config": {
            "dataset_size": 125,
            "test_split_percentage": 0.2,
            "quantization": True,
            "lora": True,
            "training_args": {
                "output_dir": "./hh_rlhf_output",
                "per_device_train_batch_size": 1,
                "gradient_accumulation_steps": 2,
                "report_to": "wandb",
                "gradient_checkpointing": True,
                "optim": "adamw_torch_fused",
                "logging_steps": 5,
                "report_to": "none",
                "save_strategy": "epoch",
                "learning_rate": 2e-4,
                "bf16": True,
                "tf32": True,
                "max_grad_norm": 0.3,
                "warmup_ratio": 0.03,
                "lr_scheduler_type": "constant",
                "num_train_epochs":1, # Set the correct epoch
                "remove_unused_columns": False,
            },
        },
    },
    {
        "model_id": "unsloth/DeepSeek-R1-Distill-Llama-8B",
        "dataset_name": "b-mc2/sql-create-context",
        "config": {
            "unsloth_model_id": "deepseek-ai/deepseek-coder-1.3b-base",  # Corrected model ID for Unsloth
            "dataset_size": 125,
            "test_split_percentage": 0.2,
            "quantization": True,
            "lora": True,
            "use_unsloth": True,
            "max_seq_length": 2048,
            "dtype": None,
            "load_in_4bit": True,
            "access_token": None,  # No needed a token
            "dataset_num_proc": 2,
            "training_args": {
                "output_dir": "./unsloth_sql_create_context_output",
                "per_device_train_batch_size": 2,
                "gradient_accumulation_steps": 4,
                "report_to": "none",
                "gradient_checkpointing": True,
                "optim": "adamw_8bit",
                "report_to": "none",
                "logging_steps": 10,
                "save_strategy": "epoch",
                "learning_rate": 2e-4,
                "bf16": True,
                "fp16": False,
                "max_grad_norm": 0.3,
                "warmup_steps": 5,
                "lr_scheduler_type": "linear",
                "num_train_epochs": 1,
                "weight_decay": 0.01,
                "max_steps": 60,
                "seed": 3407,
                "evaluation_strategy": "steps", # We need this
                "eval_steps": 20, # We need this
            },
        },
    },
]

# Import necessary modules
import itertools
import copy
import gc
import torch
from IPython import get_ipython
from IPython.display import display

def clear_memory():
    """Clears GPU memory and performs garbage collection."""
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()

def create_rl_pairs():
    """
    Creates a list of all possible combinations of datasets, models,
    and configurations for RL experiments.
    """

    datasets = [
        "SetFit/mrpc",
        "b-mc2/sql-create-context",
        "anthropic/hh-rlhf",
        "imdb",
    ]
    models = [
        "bert-base-uncased",
        "mistralai/Mistral-7B-v0.1",
        "deepseek-ai/deepseek-coder-1.3b-base",
        "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    ]

    # Define different configs
    configs = [
        {
            "max_length": 128,
            "quantization": True,
            "use_unsloth": False,
            "lora": True,
            "dataset_size": 125,
            "dataset_num_proc": 2,
            "test_split_percentage": 0.2,
            "training_args": {
                "output_dir": "./output",
                "per_device_train_batch_size": 4,
                "gradient_accumulation_steps": 4,
                "warmup_steps": 5,
                "max_steps": 60,
                "learning_rate": 2e-4,
                "logging_steps": 10,
                "weight_decay": 0.01,
                "eval_steps": 20,
                "report_to": "none",
                "save_steps": 20,
            },
        },
        {
            "max_length": 1024,
            "quantization": True,
            "use_unsloth": True,
            "lora": True,
            "dataset_size": 125,
            "dataset_num_proc": 2,
            "test_split_percentage": 0.2,
            "training_args": {
                "output_dir": "./output",
                "per_device_train_batch_size": 4,
                "gradient_accumulation_steps": 4,
                "warmup_steps": 5,
                "max_steps": 60,
                "learning_rate": 2e-4,
                "logging_steps": 10,
                "weight_decay": 0.01,
                "eval_steps": 20,
                "save_steps": 20,
            },
            "unsloth_model_id": "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
            "max_seq_length": 2048,
            "dtype": None,
            "load_in_4bit": True,
        },
        {
            "max_length": 1024,
            "quantization": True,
            "use_unsloth": True,
            "lora": True,
            "dataset_size": 125,
            "dataset_num_proc": 2,
            "test_split_percentage": 0.2,
            "training_args": {
                "output_dir": "./output",
                "per_device_train_batch_size": 4,
                "gradient_accumulation_steps": 4,
                "warmup_steps": 5,
                "max_steps": 60,
                "learning_rate": 2e-4,
                "logging_steps": 10,
                "weight_decay": 0.01,
                "eval_steps": 20,
                "save_steps": 20,
            },
            "unsloth_model_id": "bert-base-uncased",
            "max_seq_length": 2048,
            "dtype": None,
            "load_in_4bit": True,
        },
        {
            "max_length": 1024,
            "quantization": True,
            "use_unsloth": True,
            "lora": True,
            "dataset_size": 125,
            "dataset_num_proc": 2,
            "test_split_percentage": 0.2,
            "training_args": {
                "output_dir": "./output",
                "per_device_train_batch_size": 4,
                "gradient_accumulation_steps": 4,
                "warmup_steps": 5,
                "max_steps": 60,
                "learning_rate": 2e-4,
                "logging_steps": 10,
                "weight_decay": 0.01,
                "eval_steps": 20,
                "save_steps": 20,
            },
            "unsloth_model_id": "deepseek-ai/deepseek-coder-1.3b-base",
            "max_seq_length": 2048,
            "dtype": None,
            "load_in_4bit": True,
        },
    ]

    rl_pairs = []
    for dataset, model, config in itertools.product(datasets, models, configs):
        rl_pairs.append((dataset, model, copy.deepcopy(config))) # Use copy.deepcopy()

    return rl_pairs


# Create pairs
rl_pairs = create_rl_pairs()

# Run the experiment
for dataset_name, model_id, config in rl_pairs:
    clear_memory()
    print("\n")
    print(f"Running experiment with:")
    print(f"- Dataset: {dataset_name}")
    print(f"- Model: {model_id}")
    print(f"- Config: {config}")
    print("\n")

    try:
        agent = FineTuningAgent(model_id, dataset_name, config)
        agent.run()
    except Exception as e:
        print(f"An error occurred during the experiment: {e}")













# Run the experiments
for rl_pair in RL_PAIRS:
    print("\n")

    print(
        f"Running experiment with model: {rl_pair['model_id']} and dataset: {rl_pair['dataset_name']}"
    )

    print("\n")

    agent = FineTuningAgent(
        model_id=rl_pair["model_id"],
        dataset_name=rl_pair["dataset_name"],
        config=rl_pair["config"],
    )
    # Initiate the OODA loop and fine-tuning process
    agent.run()
    print("\n")

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


Running experiment with:
- Dataset: SetFit/mrpc
- Model: bert-base-uncased
- Config: {'max_length': 128, 'quantization': True, 'use_unsloth': False, 'lora': True, 'dataset_size': 125, 'dataset_num_proc': 2, 'test_split_percentage': 0.2, 'training_args': {'output_dir': './output', 'per_device_train_batch_size': 4, 'gradient_accumulation_steps': 4, 'warmup_steps': 5, 'max_steps': 60, 'learning_rate': 0.0002, 'logging_steps': 10, 'weight_decay': 0.01, 'eval_steps': 20, 'report_to': 'none', 'save_steps': 20}}


Agent dictionary(inside __init__):
{'max_length': 128, 'quantization': True, 'use_unsloth': False, 'lora': True, 'dataset_size': 125, 'dataset_num_proc': 2, 'test_split_percentage': 0.2, 'training_args': {'output_dir': './output', 'per_device_train_batch_size': 4, 'gradient_accumulation_steps': 4, 'warmup_steps': 5, 'max_steps': 60, 'learning_ra

`low_cpu_mem_usage` was None, now default to True since model is quantized.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Repo card metadata block was not found. Setting CardData to empty.




Observe finished.


Starting Orient ...
Dataset: SetFit/mrpc


Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Preprocess Dataset: SetFit/mrpc


Map:   0%|          | 0/25 [00:00<?, ? examples/s]

Preprocess Dataset: SetFit/mrpc


Orient Dataset: DatasetDict({
    train: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 100
    })
    test: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 25
    })
})


Orient finished.


Starting Decide ...
trainable params: 10,716,674 || all params: 120,200,452 || trainable%: 8.9157


Decide finished.


Starting Act ...
Dataset preprocessed successfully.


Hugging Face data collator used.
Initializing Trainer...


Act finished.


Run Dataset: DatasetDict({
    train: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 100
    })
    test: Dataset({
        features: ['input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 25
    })
})




Step,Training Loss,Validation Loss
20,2.475,0.89603
40,2.2095,0.765166
60,2.069,0.8




Evaluation:




{'eval_loss': 0.800000011920929, 'eval_runtime': 0.1999, 'eval_samples_per_second': 125.072, 'eval_steps_per_second': 20.011, 'epoch': 8.64}


Run  finished.


Running experiment with:
- Dataset: SetFit/mrpc
- Model: bert-base-uncased
- Config: {'max_length': 1024, 'quantization': True, 'use_unsloth': True, 'lora': True, 'dataset_size': 125, 'dataset_num_proc': 2, 'test_split_percentage': 0.2, 'training_args': {'output_dir': './output', 'per_device_train_batch_size': 4, 'gradient_accumulation_steps': 4, 'warmup_steps': 5, 'max_steps': 60, 'learning_rate': 0.0002, 'logging_steps': 10, 'weight_decay': 0.01, 'eval_steps': 20, 'save_steps': 20}, 'unsloth_model_id': 'unsloth/mistral-7b-instruct-v0.3-bnb-4bit', 'max_seq_length': 2048, 'dtype': None, 'load_in_4bit': True}


Agent dictionary(inside __init__):
{'max_length': 1024, 'quantization': True, 'use_unsloth': True, 'lora': True, 'dataset_size': 125, 'dataset_num_proc': 2, 'test_split_percentage': 0.2, 'training_args': {'output_dir': '

model.safetensors:   0%|          | 0.00/4.14G [00:00<?, ?B/s]