# FlowGRPO Tutorial - Colab Setup

This notebook sets up and runs the FlowGRPO tutorial in Google Colab.

## Step 1: Install Dependencies

In [None]:
# Install required packages
!pip install torch>=2.0.0 numpy>=1.24.0 matplotlib>=3.7.0 tqdm>=4.65.0 pillow>=10.0.0 scipy>=1.10.0

## Step 2: Setup Directory Structure

In [None]:
import os
from pathlib import Path

# Create necessary directories
os.makedirs("tutorial/dataset", exist_ok=True)
os.makedirs("tutorial/outputs", exist_ok=True)

print("Directories created!")

## Step 3: Upload Code Files

**Option A: If you have the code in a GitHub repo**, clone it:
```python
!git clone https://github.com/your-username/minimal-rl.git
%cd minimal-rl
```

**Option B: Upload files manually** - Use the file uploader below, or upload via Colab's file menu.

In [None]:
# If uploading manually, you can use this cell to verify files exist
import sys
from pathlib import Path

# Add current directory to path
sys.path.insert(0, str(Path.cwd()))

# Check if tutorial directory exists
if Path("tutorial").exists():
    print("✓ Tutorial directory found")
    print(f"  Files: {list(Path('tutorial').rglob('*.py'))}")
else:
    print("✗ Tutorial directory not found. Please upload the code files.")

## Step 4: Generate Dataset

In [None]:
import sys
from pathlib import Path

# Add parent directory to path
sys.path.insert(0, str(Path.cwd()))

from tutorial.dataset.generate_dataset import generate_dataset

# Generate dataset
generate_dataset()

# Verify
if Path("tutorial/dataset/train.txt").exists():
    print("✓ Dataset generated successfully!")
    with open("tutorial/dataset/train.txt", "r") as f:
        print(f"  Training prompts: {len(f.readlines())}")

## Step 5: Run Training

In [None]:
import sys
from pathlib import Path
import torch

# Add parent directory to path
sys.path.insert(0, str(Path.cwd()))

from tutorial.dataset.dataset import PromptDataset
from tutorial.dataset.generate_dataset import generate_dataset
from tutorial.models.toy_flow_model import create_toy_model
from tutorial.rewards.simple_reward import SimpleReward
from tutorial.training.trainer import FlowGRPOTrainer

# Setup
dataset_dir = Path("tutorial/dataset")
output_dir = Path("tutorial/outputs")

# Generate dataset if needed
if not (dataset_dir / "train.txt").exists():
    print("Generating dataset...")
    generate_dataset()

# Load datasets
print("Loading datasets...")
train_dataset = PromptDataset(dataset_dir, split="train")
test_dataset = PromptDataset(dataset_dir, split="test")

print(f"Train samples: {len(train_dataset)}")
print(f"Test samples: {len(test_dataset)}")

# Create model
print("Creating model...")
model, prompt_encoder = create_toy_model(
    signal_dim=64,
    prompt_dim=32,
    hidden_dim=128,
    vocab_size=20,
)

# Create reward function
reward_fn = SimpleReward()

# Training config
# Use CUDA if available, otherwise CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

config = {
    "batch_size": 4,
    "num_samples_per_prompt": 4,
    "num_steps": 20,
    "eval_num_steps": 20,
    "learning_rate": 1e-3,
    "clip_range": 1e-4,
    "beta": 0.0,
    "device": device,
    "output_dir": str(output_dir),
    "eval_freq": 5,
    "max_grad_norm": 1.0,
}

# Create trainer
trainer = FlowGRPOTrainer(
    model=model,
    prompt_encoder=prompt_encoder,
    reward_fn=reward_fn,
    train_dataset=train_dataset,
    test_dataset=test_dataset,
    config=config,
)

# Train
print("Starting training...")
trainer.train(num_epochs=50)

print(f"\nTraining complete! Check outputs in {output_dir}")

## Step 6: View Results

In [None]:
from IPython.display import Image, display
from pathlib import Path

# Display generated plots
output_dir = Path("tutorial/outputs")

if (output_dir / "training_curves.png").exists():
    display(Image(str(output_dir / "training_curves.png")))

# List all output files
print("Output files:")
for f in sorted(output_dir.glob("*")):
    print(f"  - {f.name}")