# 🎯 AI Director: Meta Ax Hyperparameter Optimization (Standalone)

**Module 4.5: Bayesian Optimization for LoRA Training**

This notebook uses **Meta Ax** to find optimal hyperparameters.

**Target**: Beat baseline loss of 0.6097  
**Time**: ~2.5 hours on T4 GPU (20 trials)

## 1️⃣ Check GPU

In [None]:
!nvidia-smi
import torch
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

## 2️⃣ Install Packages

In [None]:
%%capture
!pip install -q torch transformers peft datasets accelerate bitsandbytes ax-platform botorch gpytorch loguru pandas pyyaml scipy matplotlib

## 3️⃣ Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

DATASET_PATH = '/content/drive/MyDrive/ai_director_dataset'

## 4️⃣ Configuration

In [None]:
CONFIG = {
    'search_space': {
        'lora_rank': {'type': 'choice', 'values': [8, 16, 32]},  # ลด: ไม่มี 64
        'lora_alpha': {'type': 'choice', 'values': [16, 32, 64]},  # ลด: ไม่มี 128
        'learning_rate': {'type': 'range', 'bounds': [0.00001, 0.0005], 'log_scale': True},
        'batch_size': {'type': 'choice', 'values': [1]}  # ลง: เหลือ 1 เท่านั้น
    },
    'baseline': {'loss': 0.6097},
    'num_trials': 20,
    'dataset_path': DATASET_PATH
}
print(f"⚠️ LOW MEMORY MODE: batch_size=1 only, max_rank=32")
print(f"Target: Beat {CONFIG['baseline']['loss']}")

## 5️⃣ Training Function

In [None]:
from dataclasses import dataclass, field
from typing import List, Dict
from pathlib import Path
import shutil
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForLanguageModeling, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, TaskType
from datasets import load_dataset

@dataclass
class FineTuningConfig:
    base_model: str = "Qwen/Qwen2.5-7B-Instruct"
    max_seq_length: int = 1024
    lora_r: int = 16
    lora_alpha: int = 32
    lora_dropout: float = 0.05
    target_modules: List[str] = field(default_factory=lambda: ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"])
    num_train_epochs: int = 1
    per_device_train_batch_size: int = 1
    gradient_accumulation_steps: int = 2
    learning_rate: float = 2e-4
    warmup_ratio: float = 0.1
    logging_steps: int = 5
    eval_steps: int = 50
    save_steps: int = 50
    dataset_path: str = "/content/drive/MyDrive/ai_director_dataset"
    output_dir: str = "/content/models/temp"

class AIDirectorFineTuner:
    def __init__(self, config: FineTuningConfig):
        self.config = config
        self.output_dir = Path(config.output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)
        self.model = None
        self.tokenizer = None
        self.dataset = None
    
    def load_tokenizer(self):
        self.tokenizer = AutoTokenizer.from_pretrained(self.config.base_model, trust_remote_code=True)
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
    
    def load_model(self):
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16,
            bnb_4bit_use_double_quant=True
        )
        self.model = AutoModelForCausalLM.from_pretrained(
            self.config.base_model,
            quantization_config=bnb_config,
            device_map="auto",
            trust_remote_code=True
        )
        self.model = prepare_model_for_kbit_training(self.model)
    
    def apply_lora(self):
        lora_config = LoraConfig(
            r=self.config.lora_r,
            lora_alpha=self.config.lora_alpha,
            target_modules=self.config.target_modules,
            lora_dropout=self.config.lora_dropout,
            bias="none",
            task_type=TaskType.CAUSAL_LM
        )
        self.model = get_peft_model(self.model, lora_config)
    
    def load_dataset(self):
        dataset_path = Path(self.config.dataset_path)
        self.dataset = load_dataset("json", data_files={
            "train": str(dataset_path / "train_v2.jsonl"),
            "validation": str(dataset_path / "val_v2.jsonl")
        })
        
        def format_prompt(sample):
            return f'''<|im_start|>system\nYou are an AI Director.<|im_end|>\n<|im_start|>user\n{sample["instruction"]}\n\n{sample["input"]}<|im_end|>\n<|im_start|>assistant\n{sample["output"]}<|im_end|>'''
        
        def tokenize(examples):
            texts = [format_prompt(ex) for ex in examples]
            tok = self.tokenizer(texts, truncation=True, max_length=self.config.max_seq_length, padding="max_length")
            tok["labels"] = tok["input_ids"].copy()
            return tok
        
        self.dataset["train"] = self.dataset["train"].map(lambda x: tokenize([x]), batched=False)
        self.dataset["validation"] = self.dataset["validation"].map(lambda x: tokenize([x]), batched=False)
    
    def train(self):
        args = TrainingArguments(
            output_dir=str(self.output_dir),
            num_train_epochs=self.config.num_train_epochs,
            per_device_train_batch_size=self.config.per_device_train_batch_size,
            gradient_accumulation_steps=self.config.gradient_accumulation_steps,
            learning_rate=self.config.learning_rate,
            warmup_ratio=self.config.warmup_ratio,
            logging_steps=self.config.logging_steps,
            eval_steps=self.config.eval_steps,
            save_steps=self.config.save_steps,
            evaluation_strategy="steps",
            save_strategy="steps",
            load_best_model_at_end=True,
            bf16=True,
            report_to=[]
        )
        trainer = Trainer(
            model=self.model,
            args=args,
            train_dataset=self.dataset["train"],
            eval_dataset=self.dataset["validation"],
            data_collator=DataCollatorForLanguageModeling(self.tokenizer, mlm=False)
        )
        result = trainer.train()
        eval_result = trainer.evaluate()
        return {'eval_loss': eval_result['eval_loss']}

print("✅ Training function ready")

## 6️⃣ Optimization Function

In [None]:
import json
from datetime import datetime
from pathlib import Path
from ax.service.ax_client import AxClient
from ax.service.utils.instantiation import ObjectiveProperties

class HyperparameterOptimizer:
    def __init__(self, config):
        self.config = config
        self.ax_client = None
        self.trial_results = []
        self.checkpoint_path = Path('/content/drive/MyDrive/ai_director_optimization_checkpoint.json')
    
    def save_checkpoint(self):
        """Save checkpoint after each trial"""
        checkpoint = {
            'trial_results': self.trial_results,
            'timestamp': datetime.now().isoformat(),
            'completed_trials': len(self.trial_results)
        }
        self.checkpoint_path.parent.mkdir(parents=True, exist_ok=True)
        with open(self.checkpoint_path, 'w') as f:
            json.dump(checkpoint, f, indent=2)
        print(f"💾 Checkpoint saved: {len(self.trial_results)} trials completed")
    
    def load_checkpoint(self):
        """Load checkpoint if exists"""
        if self.checkpoint_path.exists():
            with open(self.checkpoint_path, 'r') as f:
                checkpoint = json.load(f)
            self.trial_results = checkpoint['trial_results']
            print(f"✅ Loaded checkpoint: {len(self.trial_results)} trials already completed")
            return len(self.trial_results)
        return 0
    
    def setup_ax(self):
        self.ax_client = AxClient()
        parameters = []
        for name, cfg in self.config['search_space'].items():
            if cfg['type'] == 'choice':
                parameters.append({"name": name, "type": "choice", "values": cfg['values'], "value_type": "int"})
            elif cfg['type'] == 'range':
                parameters.append({"name": name, "type": "range", "bounds": cfg['bounds'], "value_type": "float", "log_scale": cfg.get('log_scale', False)})
        
        self.ax_client.create_experiment(
            name="ai_director_optimization",
            parameters=parameters,
            objectives={"eval_loss": ObjectiveProperties(minimize=True)}
        )
        print("✅ Ax setup complete")
    
    def evaluate(self, params):
        print(f"\n🔬 Trial: {params}")
        try:
            cfg = FineTuningConfig(
                lora_r=params['lora_rank'],
                lora_alpha=params['lora_alpha'],
                learning_rate=params['learning_rate'],
                per_device_train_batch_size=params['batch_size'],
                output_dir=f"/content/models/trial_{datetime.now():%H%M%S}",
                dataset_path=self.config['dataset_path']
            )
            trainer = AIDirectorFineTuner(cfg)
            trainer.load_tokenizer()
            trainer.load_model()
            trainer.apply_lora()
            trainer.load_dataset()
            result = trainer.train()
            loss = result['eval_loss']
            print(f"✅ Loss: {loss:.4f}")
            if Path(cfg.output_dir).exists():
                shutil.rmtree(cfg.output_dir)
            # Clear CUDA cache
            import torch
            torch.cuda.empty_cache()
            import gc
            gc.collect()
            return loss
        except Exception as e:
            print(f"❌ Error: {e}")
            return 1.0
    
    def run(self, num_trials=None):
        if num_trials is None:
            num_trials = self.config['num_trials']
        
        # Try to load checkpoint
        start_trial = self.load_checkpoint()
        
        if start_trial > 0:
            print(f"\n🔄 RESUMING from trial {start_trial + 1}/{num_trials}")
            print(f"Already completed: {start_trial} trials\n")
        
        self.setup_ax()
        
        # Replay completed trials to Ax
        if start_trial > 0:
            print("🔄 Replaying completed trials to Ax...")
            for result in self.trial_results:
                params, trial_idx = self.ax_client.get_next_trial()
                self.ax_client.complete_trial(trial_index=trial_idx, raw_data=result['loss'])
            print(f"✅ Replayed {start_trial} trials\n")
        
        print(f"🚀 Starting optimization from trial {start_trial + 1}/{num_trials}\n")
        
        # Run remaining trials
        for i in range(start_trial, num_trials):
            print(f"\n{'#'*50}\nTRIAL {i+1}/{num_trials}\n{'#'*50}")
            params, trial_idx = self.ax_client.get_next_trial()
            loss = self.evaluate(params)
            self.ax_client.complete_trial(trial_index=trial_idx, raw_data=loss)
            self.trial_results.append({'trial': i+1, 'params': params, 'loss': loss, 'timestamp': datetime.now().isoformat()})
            
            # Save checkpoint after each trial
            self.save_checkpoint()
        
        best_params, values = self.ax_client.get_best_parameters()
        best_loss = values[0]['eval_loss']
        improvement = (self.config['baseline']['loss'] - best_loss) / self.config['baseline']['loss'] * 100
        
        print(f"\n{'='*50}\n🏆 RESULTS\n{'='*50}")
        print(f"Best: {best_params}")
        print(f"Loss: {best_loss:.4f} (baseline: {self.config['baseline']['loss']:.4f})")
        print(f"Improvement: {improvement:+.1f}%")
        
        # Delete checkpoint after completion
        if self.checkpoint_path.exists():
            self.checkpoint_path.unlink()
            print("\n🗑️ Checkpoint deleted (optimization complete)")
        
        return {'best_params': best_params, 'best_loss': best_loss, 'improvement': improvement, 'trials': self.trial_results}

print("✅ Optimizer ready (with checkpoint support)")

## 7️⃣ Run Optimization 🚀

**⚠️ WARNING**: This will take ~2.5 hours on T4 GPU!

**✅ Checkpoint Feature**: 
- Auto-saves after each trial to Google Drive
- If Colab disconnects, just run this cell again
- Will automatically resume from last completed trial
- No need to start over!

**Quick test**: Change to `num_trials=3` (~25 minutes)

In [None]:
# Check if checkpoint exists
import json
from pathlib import Path

checkpoint_path = Path('/content/drive/MyDrive/ai_director_optimization_checkpoint.json')

if checkpoint_path.exists():
    with open(checkpoint_path, 'r') as f:
        checkpoint = json.load(f)
    print(f"📁 Checkpoint found!")
    print(f"Completed trials: {checkpoint['completed_trials']}/{CONFIG['num_trials']}")
    print(f"Last updated: {checkpoint['timestamp']}")
    print(f"\n✅ Will resume from trial {checkpoint['completed_trials'] + 1}")
else:
    print("📁 No checkpoint found - will start from trial 1")

In [None]:
optimizer = HyperparameterOptimizer(CONFIG)
results = optimizer.run(num_trials=20)

## 🗑️ Delete Checkpoint (Optional)

Run this if you want to **start over** from scratch:

In [None]:
# ⚠️ WARNING: This will delete checkpoint - you'll start from trial 1
checkpoint_path = Path('/content/drive/MyDrive/ai_director_optimization_checkpoint.json')

if checkpoint_path.exists():
    checkpoint_path.unlink()
    print("🗑️ Checkpoint deleted - will start from trial 1 next time")
else:
    print("📁 No checkpoint found")

## 8️⃣ Save Results

In [None]:
import yaml
output_dir = Path('/content/drive/MyDrive/ai_director_optimization_results')
output_dir.mkdir(parents=True, exist_ok=True)

# JSON
with open(output_dir / 'results.json', 'w') as f:
    json.dump(results, f, indent=2)

# YAML
with open(output_dir / 'best_config.yaml', 'w') as f:
    yaml.dump(results['best_params'], f)

print(f"✅ Saved to {output_dir}")

## 9️⃣ Visualize

In [None]:
import matplotlib.pyplot as plt

trials = [r['trial'] for r in results['trials']]
losses = [r['loss'] for r in results['trials']]

plt.figure(figsize=(10, 5))
plt.plot(trials, losses, 'bo-')
plt.axhline(y=CONFIG['baseline']['loss'], color='r', linestyle='--', label='Baseline')
plt.xlabel('Trial')
plt.ylabel('Loss')
plt.title('Optimization Progress')
plt.legend()
plt.grid(True, alpha=0.3)
plt.savefig(output_dir / 'plot.png', dpi=150)
plt.show()

## 🎉 Done!

In [None]:
print("="*50)
print("🎯 SUMMARY")
print("="*50)
print(f"Best Loss: {results['best_loss']:.4f}")
print(f"Improvement: {results['improvement']:+.1f}%")
print(f"Best Config: {results['best_params']}")
print(f"\nResults saved to Google Drive: {output_dir}")