# SQL-R1: RL Training for Text-to-SQL
This notebook sets up and runs the SQL-R1 codebase on Google Colab with a 24GB GPU.

**Requirements**: Select Runtime > Change runtime type > T4 GPU (or better)

## Step 1: Check GPU

In [None]:
!nvidia-smi

## Step 2: Clone Repository

In [None]:
import os

# 1. Go to root to ensure we don't nest folders on re-runs
%cd /content

# 2. Check if repo exists
if os.path.exists('SellWizr-Assignment'):
    print("Updating existing repository...")
    %cd SellWizr-Assignment
    !git pull
else:
    print("Cloning repository...")
    !git clone https://github.com/dancinglightning/SellWizr-Assignment
    %cd SellWizr-Assignment

# 3. Enter the target directory
%cd SQL-R1

## Step 3: Install Dependencies

In [None]:
# Install PyTorch with CUDA
!pip install torch==2.4.0 --index-url https://download.pytorch.org/whl/cu121

# Install vLLM and Ray
!pip install vllm==0.6.3 ray

# Install Flash Attention
!pip install flash-attn --no-build-isolation

# Install verl framework (from local repo)
!pip install -e .

# Install other dependencies
!pip install wandb IPython matplotlib sqlparse func_timeout nltk ijson hydra-core omegaconf codetiming

## Step 4: Download Model (Qwen2.5-Coder-3B)

In [None]:
!pip install huggingface_hub

from huggingface_hub import snapshot_download
import os

# Create models directory
os.makedirs('models/Qwen2.5-Coder-3B-Instruct', exist_ok=True)

# Download 3B model
snapshot_download(
    repo_id="Qwen/Qwen2.5-Coder-3B-Instruct",
    local_dir="models/Qwen2.5-Coder-3B-Instruct",
    local_dir_use_symlinks=False
)

print("Model downloaded successfully!")

## Step 5: Setup Training Data

In [None]:
import os
import shutil

# Create data directory
os.makedirs('data', exist_ok=True)

# Copy example data to data directory
shutil.copy('example_data/train.parquet', 'data/train.parquet')
shutil.copy('example_data/test.parquet', 'data/test.parquet')

print("Training data ready!")
print(f"Train: {os.path.getsize('data/train.parquet') / 1e6:.1f} MB")
print(f"Test: {os.path.getsize('data/test.parquet') / 1e6:.1f} MB")

## Step 6: Verify RL Components

In [None]:
# Test imports
print("Testing RL component imports...")

try:
    from verl.utils.reward_score.synsql import compute_score
    print("✓ Reward computation module loaded")
except Exception as e:
    print(f"✗ Reward module error: {e}")

try:
    from verl.trainer.ppo.core_algos import compute_grpo_outcome_advantage, compute_policy_loss
    print("✓ GRPO/PPO algorithms loaded")
except Exception as e:
    print(f"✗ Core algos error: {e}")

try:
    from verl.trainer.ppo.ray_trainer import RayPPOTrainer
    print("✓ Training loop loaded")
except Exception as e:
    print(f"✗ Trainer error: {e}")

try:
    from verl.trainer.main_ppo import RewardManager
    print("✓ RewardManager loaded")
except Exception as e:
    print(f"✗ RewardManager error: {e}")

print("\n✅ All RL components verified!")

## Step 7: Setup Weights & Biases (Optional)

In [None]:
import wandb

# Login to W&B (optional - for logging)
# wandb.login()

# Or set to offline mode
import os
os.environ['WANDB_MODE'] = 'offline'
print("W&B set to offline mode")

## Step 8: Run RL Training

In [None]:
# Run the 24GB GPU optimized training script
!bash sh/train_colab.sh

## Step 9: Check Training Results

In [None]:
import os

# List checkpoints
checkpoint_dir = 'checkpoints/SQL-R1-Colab'
if os.path.exists(checkpoint_dir):
    print("Checkpoints saved:")
    for root, dirs, files in os.walk(checkpoint_dir):
        for d in dirs:
            print(f"  {os.path.join(root, d)}")
else:
    print("No checkpoints yet (training may not have completed)")