# Root Cause Analysis: Benchmark Validation Failures

This notebook analyzes validation failures from all recent benchmark runs (last 48 hours). 
Data is sourced from `benchmarks/analysis_cache.db`.

In [None]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
import json

# Connect to Database
DB_PATH = "../benchmarks/analysis_cache.db"
conn = sqlite3.connect(DB_PATH)

# Load Data
query = """
SELECT 
    run_id, 
    timestamp, 
    benchmark_name, 
    suite, 
    generator, 
    error_type, 
    raw_error
FROM failures
ORDER BY timestamp DESC
"""
df = pd.read_sql_query(query, conn)

# Clean up Error Type (stored as JSON string)
def clean_error_type(x):
    try:
        return json.loads(x)[0] if x else "Unknown"
    except:
        return x

df['primary_error'] = df['error_type'].apply(clean_error_type)

print(f"Loaded {len(df)} failure records.")
df.head()

## Global Error Distribution (Last 48 Hours)

In [None]:
error_counts = df['primary_error'].value_counts()

plt.figure(figsize=(12, 6))
error_counts.plot(kind='bar', color='skyblue')
plt.title('Global Distribution of Failure Root Causes')
plt.xlabel('Error Category')
plt.ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

## Latest Experiment Deep Dive
Analyzing the most recent benchmark run.

In [None]:
# Identify Latest Run
if not df.empty:
    latest_run_id = df['run_id'].iloc[0]
    latest_df = df[df['run_id'] == latest_run_id]

    print(f"Latest Run ID: {latest_run_id}")
    print(f"Failures in this run: {len(latest_df)}")

    # Pie Chart
    latest_counts = latest_df['primary_error'].value_counts()

    plt.figure(figsize=(8, 8))
    latest_counts.plot(kind='pie', autopct='%1.1f%%', startangle=140, cmap='Pastel1')
    plt.title(f'Error Modes for Run: {latest_run_id}')
    plt.ylabel('')
    plt.show()
else:
    print("No failures found in the database.")

### Sample Errors from Latest Run

In [None]:
if not df.empty:
    pd.set_option('display.max_colwidth', 300)
    display(latest_df[['benchmark_name', 'generator', 'primary_error', 'raw_error']].head(15))
else:
    print("No data available.")