# SQL-R1: Text-to-SQL RL Training on Kaggle

This notebook implements RL training for Text-to-SQL using the SQL-R1 approach with GRPO algorithm.

**Requirements**: Kaggle GPU Runtime (T4 16GB)

## Overview
- **Paper**: SQL-R1: Training Natural Language to SQL Reasoning Model By Reinforcement Learning
- **Algorithm**: GRPO (Group Relative Policy Optimization)
- **Model**: Qwen2.5-Coder-3B-Instruct
- **Reward**: Format + Execution Correctness + Result Matching + Length Bonus

## 1. Environment Setup

In [None]:
# Check GPU availability
!nvidia-smi
!nvidia-smi
!cd /
!rm -rf /kaggle/working/*
!rm -rf ~/.cache/huggingface

In [None]:
# Install dependencies (use Kaggle's pre-installed PyTorch to avoid numpy conflicts)
# DO NOT reinstall torch - it breaks numpy compatibility

import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

# Install RL and inference dependencies
!pip install vllm==0.6.3 ray --quiet
!pip install transformers accelerate --quiet
!pip install wandb sqlparse func_timeout nltk ijson --quiet
!pip install hydra-core omegaconf --quiet

In [None]:
# Install flash-attention (may take a few minutes)
!pip install flash-attn --no-build-isolation --quiet

In [None]:
# Clone and install SQL-R1 (verl integration)
import os
if not os.path.exists('SellWizr-Assignment'):
    !git clone https://github.com/dancinglightning/SellWizr-Assignment.git
%cd SellWizr-Assignment/SQL-R1

!pip install -e . --quiet

## 2. Download Databases

The reward function requires SQLite databases for SQL execution verification.

In [None]:
import os
import zipfile
import shutil

# Create data directories
os.makedirs('data/NL2SQL/SynSQL-2.5M/databases', exist_ok=True)
os.makedirs('data/spider/database', exist_ok=True)

print("Data directories created!")

In [None]:
# Download Spider database from Yale's official server
import os
import zipfile

spider_db_path = 'data/spider/database'

if not os.path.exists(spider_db_path) or len(os.listdir(spider_db_path)) == 0:
    print("Downloading Spider dataset...")
    
    # Try gdown for Google Drive
    !pip install gdown --quiet
    !gdown --fuzzy "https://drive.google.com/uc?id=1TqleXec_OykOYFREKKtschzY29dUcVAQ" -O data/spider.zip 2>/dev/null || true
    
    if os.path.exists('data/spider.zip') and os.path.getsize('data/spider.zip') > 10000000:
        print("Extracting Spider...")
        with zipfile.ZipFile('data/spider.zip', 'r') as zip_ref:
            zip_ref.extractall('data/')
        
        # Find and move database folder
        for root, dirs, files in os.walk('data'):
            if 'database' in dirs:
                src_db = os.path.join(root, 'database')
                if src_db != spider_db_path and os.path.isdir(src_db):
                    if os.path.exists(spider_db_path):
                        shutil.rmtree(spider_db_path)
                    shutil.move(src_db, spider_db_path)
                    break
        print(f"Spider ready! Found {len(os.listdir(spider_db_path))} databases")
    else:
        print("Spider download unavailable. Will create test databases.")
else:
    print(f"Spider database exists! Found {len(os.listdir(spider_db_path))} databases")

In [None]:
# Create test databases (ensures reward function works)
import sqlite3
import os
import shutil

synsql_db_path = 'data/NL2SQL/SynSQL-2.5M/databases'
spider_db_path = 'data/spider/database'

# Copy Spider DBs if they exist
if os.path.exists(spider_db_path) and len(os.listdir(spider_db_path)) > 0:
    print("Copying Spider databases to SynSQL path...")
    for db_name in os.listdir(spider_db_path):
        src = os.path.join(spider_db_path, db_name)
        dst = os.path.join(synsql_db_path, db_name)
        if os.path.isdir(src) and not os.path.exists(dst):
            shutil.copytree(src, dst)
    print(f"Copied {len(os.listdir(synsql_db_path))} databases!")
else:
    # Create minimal test databases
    print("Creating test databases...")
    
    test_schemas = {
        'concert_singer': [
            'CREATE TABLE singer (singer_id INT PRIMARY KEY, name TEXT, country TEXT, age INT)',
            'CREATE TABLE concert (concert_id INT PRIMARY KEY, concert_name TEXT, year INT)'
        ],
        'pets_1': [
            'CREATE TABLE pets (pet_id INT PRIMARY KEY, pet_type TEXT, pet_age INT)',
            'CREATE TABLE owners (owner_id INT PRIMARY KEY, name TEXT, age INT)'
        ],
        'employee_hire_evaluation': [
            'CREATE TABLE employees (employee_id INT PRIMARY KEY, name TEXT, department TEXT, salary INT)',
            'CREATE TABLE evaluations (eval_id INT PRIMARY KEY, employee_id INT, score INT)'
        ],
        'world_1': [
            'CREATE TABLE country (code TEXT PRIMARY KEY, name TEXT, continent TEXT, population INT)',
            'CREATE TABLE city (id INT PRIMARY KEY, name TEXT, country_code TEXT, population INT)'
        ]
    }
    
    for db_name, schemas in test_schemas.items():
        for base_path in [spider_db_path, synsql_db_path]:
            db_dir = f'{base_path}/{db_name}'
            os.makedirs(db_dir, exist_ok=True)
            db_file = f'{db_dir}/{db_name}.sqlite'
            
            conn = sqlite3.connect(db_file)
            for schema in schemas:
                conn.execute(schema)
            conn.commit()
            conn.close()
    
    print(f"Created {len(test_schemas)} test databases!")

print(f"\nTotal databases available: {len(os.listdir(synsql_db_path))}")

## 3. Download Model

In [None]:
from huggingface_hub import snapshot_download
import os

MODEL_NAME = "Qwen/Qwen2.5-Coder-3B-Instruct"
MODEL_PATH = "models/Qwen2.5-Coder-3B-Instruct"

if not os.path.exists(MODEL_PATH):
    print(f"Downloading {MODEL_NAME}...")
    snapshot_download(repo_id=MODEL_NAME, local_dir=MODEL_PATH, local_dir_use_symlinks=False)
    print("Model downloaded!")
else:
    print("Model already exists!")

## 4. Prepare Training Data

In [None]:
import pandas as pd

# Check training data format
train_df = pd.read_parquet('example_data/train.parquet')
print(f"Training samples: {len(train_df)}")
print(f"Columns: {train_df.columns.tolist()}")
print("\nSample entry:")
print(train_df.iloc[0])

## 5. RL Training with GRPO

### Reward Function
| Component | Score | Condition |
|-----------|-------|-------|
| Format | +1/-1 | Valid `<think>...<answer>` structure |
| Execution | +2/-2 | SQL executes without errors |
| Result Match | +3/-3 | Query results match gold |
| Length Bonus | 0-1.5 | Concise reasoning |

In [None]:
# Set environment variables
import os

os.environ['VLLM_ATTENTION_BACKEND'] = 'XFORMERS'
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'

In [None]:
# Training configuration for Kaggle T4 (16GB)

TRAIN_CONFIG = {
    'data.train_files': 'example_data/train.parquet',
    'data.val_files': 'example_data/test.parquet',
    'data.train_batch_size': 2,
    'data.val_batch_size': 2,
    'data.max_prompt_length': 1024,
    'data.max_response_length': 512,
    'actor_rollout_ref.model.path': 'models/Qwen2.5-Coder-3B-Instruct',
    'actor_rollout_ref.model.enable_gradient_checkpointing': True,
    'actor_rollout_ref.actor.ppo_mini_batch_size': 2,
    'actor_rollout_ref.actor.ppo_micro_batch_size': 1,
    'actor_rollout_ref.actor.fsdp_config.param_offload': True,
    'actor_rollout_ref.actor.fsdp_config.grad_offload': True,
    'actor_rollout_ref.actor.fsdp_config.optimizer_offload': True,
    'actor_rollout_ref.actor.optim.lr': '1e-6',
    'actor_rollout_ref.actor.use_kl_loss': True,
    'actor_rollout_ref.actor.kl_loss_coef': 0.001,
    'actor_rollout_ref.actor.kl_loss_type': 'low_var_kl',
    'actor_rollout_ref.rollout.name': 'vllm',
    'actor_rollout_ref.rollout.tensor_model_parallel_size': 1,
    'actor_rollout_ref.rollout.gpu_memory_utilization': 0.3,
    'actor_rollout_ref.rollout.n': 4,
    'actor_rollout_ref.rollout.temperature': 1.0,
    'actor_rollout_ref.rollout.log_prob_micro_batch_size': 8,
    'actor_rollout_ref.ref.fsdp_config.param_offload': True,
    'actor_rollout_ref.ref.log_prob_micro_batch_size': 8,
    'algorithm.adv_estimator': 'grpo',
    'algorithm.kl_ctrl.kl_coef': 0.001,
    'trainer.n_gpus_per_node': 1,
    'trainer.nnodes': 1,
    'trainer.total_epochs': 1,
    'trainer.save_freq': 50,
    'trainer.test_freq': 25,
    'trainer.critic_warmup': 0,
    'trainer.logger': "['console']",
    'trainer.project_name': 'SQL-R1-Kaggle',
    'trainer.experiment_name': '3B-T4-GRPO',
    'trainer.default_local_dir': 'logs/kaggle_run',
}

cmd_args = ' '.join([f"{k}={v}" for k, v in TRAIN_CONFIG.items()])
print("Config ready. Run next cell to start training.")

In [None]:
# Run training
train_cmd = f"python -m verl.trainer.main_ppo {cmd_args}"
print("Starting RL training with GRPO...")
!{train_cmd}

## 6. Test Reward Function

In [None]:
from verl.utils.reward_score.synsql import extract_solution, validate_response_structure

test_response = """<|im_start|>assistant
<think>Analyzing query for employees table.</think>
<answer>
```sql
SELECT * FROM employees
```
</answer>"""

answer, think, processed = extract_solution(test_response)
print(f"Answer: {answer}")
print(f"Think: {think}")

In [None]:
# Check training logs
import os
log_dir = 'logs/kaggle_run'
if os.path.exists(log_dir):
    for root, dirs, files in os.walk(log_dir):
        print(f"{root}: {len(files)} files")
else:
    print("No logs yet.")

In [None]:
!nvidia-smi