# ARPO Smoke Test - Simplified

Minimal test with simpler dependencies.

**Quick test**: 4 tasks, ~1 hour

In [None]:
# Check GPU
import torch
!nvidia-smi -L
print(f"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")

In [None]:
# Clone repo
from getpass import getpass
token = getpass('GitHub token: ')
!git clone https://{token}@github.com/gowathena/arpo_replica.git
%cd arpo_replica
!git checkout arpo-cpu-replicate  
!git submodule update --init --recursive

In [None]:
# Install ONLY what we absolutely need
# Skip vllm and other heavy deps with conflicts

# Core
!pip install -q transformers accelerate
!pip install -q ray omegaconf wandb
!pip install -q tensordict codetiming mathruler
!pip install -q qwen-vl-utils pillow

# OSWorld (minimal)
%cd OSWorld
!pip install -q -e .
%cd ..

# Add verl to path (don't install as package)
import sys
sys.path.insert(0, '/content/arpo_replica')

print('✅ Minimal deps installed')

# Test import
try:
    from verl.trainer import main
    print('✅ VERL importable')
except Exception as e:
    print(f'Import test: {e}')

**Note**: The dependency conflicts above are safe to ignore.

We're not using: vllm rollout (using direct model), torchaudio, etc.

Continue if 'VERL importable' shows above.

In [None]:
# Setup Docker + Ray + wandb
!sudo service docker start
!docker pull happysixd/osworld-docker:latest

import ray
ray.init(num_cpus=4, num_gpus=1, ignore_reinit_error=True)

import wandb, os
os.environ['WANDB_API_KEY'] = getpass('wandb key: ')
wandb.login()

# Update for Docker
!sed -i 's/vmware/docker/g' OSWorld/run_uitars.py
!sed -i 's/vmware/docker/g' OSWorld/run_multienv_uitars.py

print('✅ Setup complete!')

In [None]:
# Create minimal config
import yaml

config = {
    'data': {'train_files': 'test_data/osworld_examples/train_smoke_4.json'},
    'algorithm': {'adv_estimator': 'grpo', 'disable_kl': True, 'enable_replay': True},
    'worker': {
        'actor': {
            'model': {'model_path': 'ByteDance-Seed/UI-TARS-2B-SFT'},
            'optim': {'lr': 1e-6},
        },
    },
    'env': {'num_envs': 2, 'max_steps': 16, 'provider': 'docker'},
    'trainer': {'total_episodes': 1, 'logger': ['console', 'wandb']},
}

with open('smoke.yaml', 'w') as f:
    yaml.dump(config, f)

print('✅ Config created')

In [None]:
# Run training
!python -m verl.trainer.main config=smoke.yaml