In [2]:
import pardox as px
import random
import uuid
from datetime import datetime, timedelta
import time

# ==========================================
# 1. CONFIGURATION & CONSTANTS
# ==========================================
NUM_ROWS = 10000  # Increase to 1M+ for stress testing

ASSET_TYPES = ['ML Model', 'AI Agent', 'MCP Server', 'ML Dataset']
PROVIDERS = ['OpenAI', 'Anthropic', 'Azure OpenAI', 'AWS Bedrock', 'HuggingFace Hub', 'Internal', 'Google Vertex AI']
TEAMS = ['Customer Support', 'Sales Ops', 'Product Engineering', 'Data Science Core', 'Legal Tech', 'Marketing', 'Fraud Detection']
LIFECYCLE_STAGES = ['Experimentation', 'Development', 'Staging', 'Production', 'Retired']
RISK_LEVELS = ['Low', 'Medium', 'High', 'Critical']
HOSTING_TYPES = ['Internal Private Cloud', 'External SaaS', 'Hybrid']

def generate_date(start_year=2024, end_year=2026):
    """Generates a random date within the given range."""
    start = datetime(start_year, 1, 1)
    end = datetime(end_year, 12, 31)
    delta = end - start
    random_days = random.randint(0, delta.days)
    return (start + timedelta(days=random_days)).strftime("%Y-%m-%d")

# ==========================================
# 2. DATA GENERATION (Pure Python)
# ==========================================
print(f"üöÄ Generating {NUM_ROWS} AI Asset records in memory...")
start_gen = time.time()

data = []

for _ in range(NUM_ROWS):
    # Generate base values
    asset_type = random.choices(ASSET_TYPES, weights=[50, 20, 10, 20], k=1)[0]
    provider = random.choice(PROVIDERS)
    risk = random.choice(RISK_LEVELS)
    monthly_cost = round(random.uniform(50.0, 5000.0), 2)
    compliance_check = random.choice([True, False])
    
    # Business Logic (Health Score) applied HERE for efficiency
    health_status = "Healthy"
    if risk == 'Critical' and not compliance_check:
        health_status = "Critical Breach"
    elif risk == 'High' or monthly_cost > 4000:
        health_status = "Risk Watch"
    
    # Build the record
    row = {
        "asset_id": str(uuid.uuid4()),
        "asset_name": f"{asset_type} - {random.randint(1000, 9999)}",
        "asset_type": asset_type,
        "provider": provider,
        "owner_team": random.choice(TEAMS),
        "lifecycle_stage": random.choice(LIFECYCLE_STAGES),
        "risk_level": risk,
        "hosting_type": random.choice(HOSTING_TYPES),
        "last_audit_date": generate_date(),
        "monthly_cost": monthly_cost,
        "compliance_passed": compliance_check,
        "health_status": health_status  # Calculated field
    }
    data.append(row)

end_gen = time.time()
print(f"‚úÖ Data generated in {end_gen - start_gen:.4f} seconds.")

# ==========================================
# 3. THE LITMUS TEST: PARDOX INGESTION
# ==========================================
print("\n‚ö° Ingesting data into PardoX Engine (Rust Kernel)...")
start_ingest = time.time()

# THE MAGIC HAPPENS HERE:
# Python serializes -> Rust receives Bytes -> Arrow infers Types -> HyperBlock created -> Append to Manager
df = px.DataFrame(data)

end_ingest = time.time()
print(f"‚úÖ DataFrame created in {end_ingest - start_ingest:.4f} seconds.")

# ==========================================
# 4. VERIFICATION & EXPORT
# ==========================================
print(f"\nüìä DataFrame Dimensions: {df.shape}")
print("üîç Preview (Head):")
print(df.head(5))

print("\nüíæ Exporting to CSV...")
df.to_csv("ai_asset_inventory.csv")
print("‚úÖ File 'ai_asset_inventory.csv' generated successfully.")

üöÄ Generating 10000 AI Asset records in memory...
‚úÖ Data generated in 0.2885 seconds.

‚ö° Ingesting data into PardoX Engine (Rust Kernel)...
‚úÖ DataFrame created in 0.0727 seconds.

üìä DataFrame Dimensions: (1024, 12)
üîç Preview (Head):
+---+--------------------------------------+-------------------+------------+-------------------+---------------+------------------------+-----------------+-----------------+--------------+------------------+-------------+------------+
| # |               asset_id               |    asset_name     | asset_type | compliance_passed | health_status |      hosting_type      | last_audit_date | lifecycle_stage | monthly_cost |    owner_team    |  provider   | risk_level |
+---+--------------------------------------+-------------------+------------+-------------------+---------------+------------------------+-----------------+-----------------+--------------+------------------+-------------+------------+
| 0 | 45bdf9b6-3751-4554-854e-7f0785f9f1d7 | 