<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/Training_Smart.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q -U datasets transformers accelerate peft trl bitsandbytes sentencepiece interpret
!pip install colab-env --quiet

!pip install -U bitsandbytes -q

In [2]:
!pip install flash-attn --no-build-isolation -q # Install the flash-attn package

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.7 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.7/2.7 MB[0m [31m165.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.7/2.7 MB[0m [31m78.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for flash-attn (setup.py) ... [?25l[?25hdone


In [3]:
!pip install peft --upgrade  -q # Upgrade peft to the latest version

In [4]:
import colab_env
import os

access_token_write = os.getenv("HUGGINGFACE_ACCESS_TOKEN_WRITE")

Mounted at /content/gdrive


In [5]:
!nvidia-smi

Wed Nov 20 05:02:49 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0              47W / 400W |      2MiB / 40960MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

## FINAL

In [1]:
from transformers import AutoTokenizer
from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead
from huggingface_hub import login
import torch
from datasets import load_dataset
import colab_env

# 0. Login to Hugging Face Hub
import os
access_token_write = os.getenv("HUGGINGFACE_ACCESS_TOKEN_WRITE")
login(token=access_token_write, add_to_git_credential=True)

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
import torch
from trl import AutoModelForCausalLMWithValueHead, PPOConfig
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, DataCollatorForLanguageModeling
from datasets import load_dataset
from tqdm import tqdm  # Import tqdm for the progress bar

import copy
import sqlite3

# 1. Load Mistral model and tokenizer
model_id = "mistralai/Mistral-7B-Instruct-v0.1"

# BitsAndBytesConfig int-4 config (for reduced memory usage)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config,
)
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)

# Assign eos_token as the pad_token
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id

# 2. Load and Preprocess the Spider dataset (smaller subset for POC)
spider_dataset = load_dataset("spider")

def preprocess_function(examples):
    inputs = [f"Translate to SQL: {q}" for q in examples["question"]]
    targets = examples["query"]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, max_length=512, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

spider_dataset = spider_dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=spider_dataset["train"].column_names,
)
train_dataset = spider_dataset["train"].select(range(50))  # Reduced dataset for POC

# 3. Define a simple Reward Function (for POC purposes)
class SQLRewardFunction:
    def __init__(self, tokenizer, db_path):
        self.tokenizer = tokenizer
        self.db_path = db_path

    def __call__(self, samples):
        rewards = []
        for sample in samples:
            query = self.tokenizer.decode(sample, skip_special_tokens=True)

            # Improved prompt removal with check
            if "```sql" in query:
                query = query.split("```sql")[1].split("```")[0].strip()
            else:
                # Handle cases where the delimiter is not found (e.g., assign a default query)
                query = "SELECT 1"  # Or any other valid default query

            try:
                conn = sqlite3.connect(self.db_path)
                cursor = conn.cursor()
                cursor.execute(query)
                conn.close()
                reward = 1.0  # Give a reward if the query executes without error
            except Exception as e:
                print(f"Error executing query: {query}\nError: {e}")
                reward = -1.0  # Penalize if there's an error
            rewards.append(reward)
        return torch.tensor(rewards)

# --- Mount Google Drive (if needed) ---
from google.colab import drive
drive.mount('/content/gdrive')

# 4. Initialize the reward function with the path to your Spider database
db_path = "/content/gdrive/My Drive/datasets/database.sqlite"
reward_fn = SQLRewardFunction(tokenizer, db_path=db_path)

# 5. Define PPO configuration (reduced episodes for POC)
ppo_config = PPOConfig(
    output_dir="./ppo_results",
    total_episodes=10  # Further reduced for POC
)

# 6. Create the value model
value_model = AutoModelForCausalLMWithValueHead.from_pretrained(
    model_id,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config,
)

# 7. Create the optimizer (we'll use this for the policy and value model)
optimizer = torch.optim.AdamW(
    [
        {"params": model.parameters()},
        {"params": value_model.parameters()}
    ],
    lr=ppo_config.learning_rate
)

# 8. Define helper functions for PPO (simplified for POC)
def calculate_advantages(rewards, values, gamma=0.99, gae_lambda=0.95):
    """Calculates advantages using Generalized Advantage Estimation (GAE)."""
    last_advantage = 0
    advantages = []
    for t in reversed(range(len(rewards) - 1)):  # Fixed the range here
        delta = rewards[t] + gamma * values[t + 1] - values[t]
        last_advantage = delta + gamma * gae_lambda * last_advantage
        advantages.insert(0, last_advantage)
    return torch.tensor(advantages)

def calculate_policy_loss(logits, actions, advantages):
    """Calculates the policy loss using the PPO objective."""
    # Move actions and advantages to the same device as logits
    actions = actions.to(logits.device)
    advantages = advantages.to(logits.device)

    # Assuming you're using categorical actions (e.g., token ids)
    cross_entropy = torch.nn.functional.cross_entropy(
        logits.view(-1, logits.size(-1)), actions.view(-1)
    )
    return (cross_entropy * advantages).mean()

# 9. Training loop with progress bar
for episode in tqdm(range(ppo_config.total_episodes), desc="Episodes"):  # Add tqdm for episodes
    for batch in tqdm(train_dataset, desc="Batches", leave=False):  # Add tqdm for batches
        # a. Generate samples from the policy
        input_ids = torch.tensor(batch["input_ids"]).to(model.device)
        attention_mask = torch.tensor(batch["attention_mask"]).to(model.device)

        # Add an extra dimension for batch size
        input_ids = input_ids.unsqueeze(0)
        attention_mask = attention_mask.unsqueeze(0)

        samples = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=512,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            temperature=0.7,
            pad_token_id=tokenizer.eos_token_id
        )

        # b. Calculate rewards
        rewards = reward_fn(samples)

        # c. Compute the policy loss and value loss
        outputs = value_model(
            input_ids=samples,
            attention_mask=torch.ones_like(samples),
            labels=samples
        )
        logits = outputs[0]
        values = outputs[1]

        # Find the loss tensor in the outputs tuple
        value_loss = None
        for item in outputs:
            if isinstance(item, torch.Tensor) and item.requires_grad and item.shape == ():
                value_loss = item
                break

        if value_loss is None:
            raise ValueError("Loss tensor not found in the outputs tuple.")

        # Calculate advantages
        advantages = calculate_advantages(rewards, values)

        # Calculate policy loss
        policy_loss = calculate_policy_loss(logits, samples, advantages)

        # d. Update the model parameters
        optimizer.zero_grad()

        # Combine the losses
        total_loss = policy_loss + value_loss

        total_loss.backward()  # Call backward only once
        optimizer.step()


# 10. Save the model
model.push_to_hub(
    repo_path_or_name="frankmorales2020/mistral-7b-ppo-poc-t2sql",
    commit_message="Upload PPO POC model"
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Episodes:   0%|          | 0/10 [00:00<?, ?it/s]
Batches:   0%|          | 0/50 [00:00<?, ?it/s][A
Batches:   2%|▏         | 1/50 [00:01<01:34,  1.92s/it][A
Batches:   4%|▍         | 2/50 [00:04<01:45,  2.19s/it][A

Error executing query: SELECT creation_year, name, budget
FROM departments
Error: no such table: departments



Batches:   6%|▌         | 3/50 [00:07<02:13,  2.85s/it][A
Batches:   8%|▊         | 4/50 [00:18<04:29,  5.86s/it][A
Batches:  10%|█         | 5/50 [00:21<03:43,  4.96s/it][A
Batches:  12%|█▏        | 6/50 [00:29<04:13,  5.76s/it][A
Batches:  14%|█▍        | 7/50 [00:31<03:22,  4.71s/it][A
Batches:  16%|█▌        | 8/50 [00:45<05:20,  7.63s/it][A
Batches:  18%|█▊        | 9/50 [01:00<06:44,  9.85s/it][A