# AutoSpMV GPU Optimization Demo

This notebook demonstrates the GPU kernel optimization framework using KernelBench tasks.

In [None]:
# Verify GPU is available
!nvidia-smi

In [None]:
# Clone and setup repository
!git clone https://github.com/austinmann1/autospmv.git
%cd autospmv

# Install dependencies
!pip install -r requirements.txt
!pip install git+https://github.com/kernelbench/kernelbench.git

In [None]:
# Setup environment
import os
import torch
from pathlib import Path
from src.cuda.auto_cuda import AutoCUDA

# Set API key
os.environ['OPENROUTER_API_KEY'] = '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'

# Verify CUDA
print(f'CUDA available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name()}')

In [None]:
# Run optimization
output_dir = Path('output')
auto_cuda = AutoCUDA(output_dir, task_id='spmv_level1', use_mock=False)

# Run optimization loop
result = auto_cuda.optimize(max_iterations=3)

# Print results
print('\nOptimization Results:')
print(f'Baseline Runtime: {result["baseline_metrics"]["runtime_ms"]:.2f} ms')
print(f'Best Runtime: {result["best_metrics"]["runtime_ms"]:.2f} ms')
print(f'Speedup: {result["baseline_metrics"]["runtime_ms"] / result["best_metrics"]["runtime_ms"]:.2f}x')

print('\nOptimization History:')
for entry in result['optimization_history']:
    if 'error' in entry:
        print(f'Iteration {entry["iteration"]}: Failed - {entry["error"]}')
    else:
        print(f'\nIteration {entry["iteration"]}: {entry["metrics"]["runtime_ms"]:.2f} ms')
        if 'key_changes' in entry:
            for change in entry['key_changes']:
                print(f'  - {change}')
        print('Metrics:')
        print(f'  Occupancy: {entry["metrics"].get("achieved_occupancy", "N/A")}')
        print(f'  Read Throughput: {entry["metrics"].get("dram_read_throughput", "N/A")} GB/s')
        print(f'  Write Throughput: {entry["metrics"].get("dram_write_throughput", "N/A")} GB/s')