# OpenAI GPT-OSS-20B Competition Notebook (Fixed)

This notebook runs the red-teaming competition against the OpenAI gpt-oss-20b model with all necessary fixes applied.

## 1. Setup and Installation

In [None]:
# Install required packages
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q transformers accelerate bitsandbytes
!pip install -q sentencepiece protobuf
!pip install -q nest-asyncio aiohttp

## 2. Clone Repository (if needed)

In [None]:
# Clone the repository if not already present
import os
if not os.path.exists('/content/openai-'):
    !git clone https://github.com/your-repo/openai-.git /content/openai-
    %cd /content/openai-
else:
    %cd /content/openai-

## 3. Apply Critical Fixes

In [None]:
# Apply the device mismatch fix to hf_local.py
fix_content = '''# Fix for device mismatch in hf_local.py
import fileinput
import sys

# Read the file and apply the fix
with open('src/backends/hf_local.py', 'r') as f:
    content = f.read()

# Check if fix is already applied
if 'Move inputs to the same device as the model' not in content:
    # Find the location to insert the fix
    search_text = '        # Tokenize (remain on CPU; Transformers/Accelerate will dispatch as needed)\\n        inputs = self._tokenizer(text, return_tensors="pt")'
    
    replacement_text = """        # Tokenize (remain on CPU; Transformers/Accelerate will dispatch as needed)
        inputs = self._tokenizer(text, return_tensors="pt")
        
        # Move inputs to the same device as the model
        if hasattr(self._model, 'device'):
            device = self._model.device
        elif hasattr(self._model, 'module') and hasattr(self._model.module, 'device'):
            device = self._model.module.device
        else:
            # Try to get device from model parameters
            try:
                device = next(self._model.parameters()).device
            except:
                device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        # Move all input tensors to the correct device
        inputs = {k: v.to(device) if hasattr(v, 'to') else v for k, v in inputs.items()}"""
    
    content = content.replace(search_text, replacement_text)
    
    # Write the fixed content back
    with open('src/backends/hf_local.py', 'w') as f:
        f.write(content)
    
    print("✅ Applied device mismatch fix to hf_local.py")
else:
    print("✅ Device mismatch fix already applied")
'''

exec(fix_content)

## 4. Configure Environment

In [None]:
# Set up environment variables
import os
import sys

# Add current directory to Python path
if '/content/openai-' not in sys.path:
    sys.path.insert(0, '/content/openai-')

# Set environment variables
os.environ['TORCH_HOME'] = '/content/openai-/.cache/torch'
os.environ['CUDA_HOME'] = '/usr/local/cuda'
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

# Create cache directory
os.makedirs('/content/openai-/.cache/torch', exist_ok=True)

print("✅ Environment configured")

## 5. Verify GPU and Memory

In [None]:
import torch

# Check GPU availability
if torch.cuda.is_available():
    print(f"✅ GPU Available: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
    print(f"   Free Memory: {torch.cuda.mem_get_info()[0] / 1024**3:.2f} GB")
else:
    print("❌ No GPU available")

# Clear GPU cache
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    torch.cuda.synchronize()

## 6. Run Basic Competition

In [None]:
# Run the basic competition
!cd /content/openai- && python run_competition.py

## 7. Run Enhanced Competition (Quick Mode)

In [None]:
# Run enhanced competition in quick mode to avoid memory issues
!cd /content/openai- && python run_enhanced_competition.py --strategy standard --mode quick

## 8. Alternative: Run with Python API

In [None]:
# Alternative approach using Python directly
import asyncio
import nest_asyncio
nest_asyncio.apply()

async def run_competition_async():
    """Run competition asynchronously"""
    try:
        # Import after environment setup
        from src.competition.enhanced_attack_vectors import (
            ENHANCED_COMPETITION_ATTACK_VECTORS,
            get_attack_scenarios_by_severity,
            Severity
        )
        from src.core.client_factory import ClientFactory
        from src.core.vulnerability_scanner import VulnerabilityScanner
        from src.config import load_config
        from src.utils.memory_manager import prepare_for_model_loading
        
        print("🚀 Starting competition...")
        
        # Check memory
        can_load, memory_message = prepare_for_model_loading(20.0)
        print(f"Memory status: {memory_message}")
        
        # Load configuration
        config = load_config()
        
        # Create client
        print("Loading model...")
        client = ClientFactory.create_client(config)
        scanner = VulnerabilityScanner(client)
        
        # Get a few test scenarios
        scenarios = get_attack_scenarios_by_severity(Severity.HIGH)[:3]
        
        print(f"\nTesting {len(scenarios)} scenarios...")
        
        findings = []
        async with client:
            for i, scenario in enumerate(scenarios, 1):
                print(f"\n[{i}/{len(scenarios)}] Testing: {scenario.name}")
                try:
                    result = await scanner.test_vulnerability(
                        prompt=scenario.prompt_template,
                        category=scenario.category,
                        expected_behavior=scenario.expected_vulnerability
                    )
                    
                    if result.is_vulnerable:
                        print(f"  ✅ Vulnerability found!")
                        findings.append(result)
                    else:
                        print(f"  ❌ No vulnerability detected")
                        
                except Exception as e:
                    print(f"  ❌ Error: {e}")
        
        print(f"\n🎉 Competition complete!")
        print(f"Found {len(findings)} vulnerabilities")
        
        return findings
        
    except Exception as e:
        print(f"❌ Competition failed: {e}")
        import traceback
        traceback.print_exc()
        return []

# Run the competition
findings = await run_competition_async()

## 9. View Results

In [None]:
# List all result files
import os
import json

# Check for result files
result_files = [
    'competition_summary.json',
    'enhanced_competition_report_standard_quick.json'
]

for file in result_files:
    if os.path.exists(file):
        print(f"\n📄 Found: {file}")
        with open(file, 'r') as f:
            data = json.load(f)
            if 'summary' in data:
                print(f"   Vulnerabilities found: {data['summary'].get('vulnerabilities_found', 0)}")
                print(f"   Success rate: {data['summary'].get('success_rate', 'N/A')}")

# List all finding files
finding_files = [f for f in os.listdir('.') if f.startswith('finding_') and f.endswith('.json')]
if finding_files:
    print(f"\n📁 Individual findings: {len(finding_files)} files")
    for f in finding_files[:5]:  # Show first 5
        print(f"   - {f}")

## 10. Troubleshooting

In [None]:
# If you encounter issues, run this cell for diagnostics

print("🔍 Diagnostics:")
print("="*50)

# Check Python version
import sys
print(f"Python: {sys.version}")

# Check PyTorch
try:
    import torch
    print(f"PyTorch: {torch.__version__}")
    print(f"CUDA available: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"CUDA version: {torch.version.cuda}")
except Exception as e:
    print(f"PyTorch error: {e}")

# Check transformers
try:
    import transformers
    print(f"Transformers: {transformers.__version__}")
except Exception as e:
    print(f"Transformers error: {e}")

# Check file structure
print("\nFile structure:")
for root, dirs, files in os.walk('src'):
    level = root.replace('src', '').count(os.sep)
    indent = ' ' * 2 * level
    print(f"{indent}{os.path.basename(root)}/")
    subindent = ' ' * 2 * (level + 1)
    for file in files[:3]:  # Show first 3 files per directory
        if file.endswith('.py'):
            print(f"{subindent}{file}")

## Summary

This notebook provides a complete solution for running the OpenAI GPT-OSS-20B red-teaming competition with the following fixes:

1. **Device Mismatch Fix**: Ensures input tensors are moved to the same device as the model
2. **Memory Management**: Uses quick mode to avoid OOM errors
3. **Error Handling**: Properly handles cases where no vulnerabilities are found
4. **Environment Setup**: Configures all necessary paths and variables

The competition tests the model against various attack scenarios to identify potential vulnerabilities in areas like:
- Deceptive alignment
- Context manipulation
- Tool misuse
- Reward hacking
- Sandbagging

Results are saved in JSON format for further analysis.