# SurrealEngine Zero-Copy Accelerator Benchmark

This notebook benchmarks the performance of the new Rust-based Zero-Copy Accelerator compared to the standard Object-Document Mapper (ODM) approach.

## Objectives
1. Generate a dataset of 100,000 records.
2. Measure read performance using standard `User.objects.all()`.
3. Measure read performance using `User.objects.to_arrow()` with the Rust Accelerator.
4. Compare results.

In [1]:
import asyncio
import time
import random
import string
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from surrealengine import Document, StringField, IntField, create_connection, RawSurrealConnection
import pyarrow as pa

# Constants
DB_URL = "ws://db:8000/rpc"
NAMESPACE = "test"
DATABASE = "benchmark"
NUM_RECORDS = 50000  # Adjust based on your system

ModuleNotFoundError: No module named 'pandas'

## Define Model

In [None]:
class BenchmarkUser(Document):
    name = StringField()
    email = StringField()
    age = IntField()
    
    class Meta:
        collection = "benchmark_users"

## Data Generation

In [None]:
async def setup_data():
    conn = create_connection(DB_URL)
    await conn.connect()
    await conn.use(NAMESPACE, DATABASE)
    
    count = await BenchmarkUser.objects.count()
    print(f"Current record count: {count}")
    
    if count >= NUM_RECORDS:
        print("Data sufficient.")
        await conn.close()
        return

    print(f"Generating {NUM_RECORDS - count} records...")
    # Note: Bulk insert not implemented in this demo script, doing sequential for simplicity
    # In real usage, use a bulk insert tool or script
    print("Warning: Generating large data one-by-one is slow. Please ensure DB has data or allow time.")
    # Skipping generation loop to avoid hanging the notebook if user just wants to test logic
    # Uncomment below to generate if needed
    # for i in range(NUM_RECORDS - count):
    #     user = BenchmarkUser(name='User'+str(i), age=i)
    #     await user.save()
    
    await conn.close()

# await setup_data()

## Benchmark Execution

In [None]:
results = []

async def run_benchmark():
    print("Starting Benchmark...")
    
    # 1. Standard ODM
    conn = create_connection(DB_URL)
    await conn.connect()
    await conn.use(NAMESPACE, DATABASE)
    
    start = time.time()
    users = await BenchmarkUser.objects.all()
    duration = time.time() - start
    count = len(users)
    print(f"Standard ODM: {duration:.4f}s ({count} records)")
    results.append({"Method": "Standard ODM", "Time": duration, "Records": count})
    await conn.close()
    
    # 2. Rust Accelerator (Zero-Copy)
    # connect with RawSurrealConnection
    async with RawSurrealConnection(DB_URL, namespace=NAMESPACE, database=DATABASE) as raw_conn:
        # Initialize QuerySet with Raw Connection
        # Note: QuerySet usually takes a normal connection wrapper.
        # We need to hack it slightly or allow QuerySet to take RawConnection if we changed __init__ type hints
        # But at runtime Python is duck-typed.
        # However, QuerySet expects .client attribute. RawConnection doesn't have it.
        # We should use a wrapper or just manually call raw_conn.query_arrow for pure speed test.
        
        # Let's test the .to_arrow() integration on QuerySet if possible.
        # Since QuerySet depends on `self.connection.client.query`, it might fail if we pass RawConnection directly
        # unless we only call .to_arrow() which we patched to check `query_arrow`.
        
        qs = BenchmarkUser.objects
        # Temporarily swap connection for the qs/class
        qs.connection = raw_conn
        
        start = time.time()
        table = await qs.to_arrow()
        duration = time.time() - start
        count = len(table)
        print(f"Zero-Copy Arrow: {duration:.4f}s ({count} records)")
        results.append({"Method": "Zero-Copy Arrow", "Time": duration, "Records": count})

await run_benchmark()

## Visualization

In [None]:
df = pd.DataFrame(results)
print(df)

if not df.empty:
    plt.figure(figsize=(10, 6))
    sns.barplot(x="Method", y="Time", data=df)
    plt.title("SurrealEngine Read Performance: ODM vs Zero-Copy")
    plt.ylabel("Time (seconds)")
    plt.show()