# Forensic Root Cause Analysis Report (Granular)

This report provides a hierarchical view of benchmark failures from the **last 10 runs**, analyzed by Gemini 2.0. 
The analysis uses a **Granular Taxonomy** to distinguish between Retrieval Failures (Bad Query vs. Shallow) and Reasoning Failures (Ignored Context vs. Fabrication).

In [None]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
import json
from IPython.display import display, HTML, Markdown

# Connect to Database
DB_PATH = "benchmarks/analysis_cache.db"
conn = sqlite3.connect(DB_PATH)

# Load Data
query = """
SELECT 
    run_id, 
    generator, 
    suite, 
    benchmark_name, 
    attempt_number, 
    llm_root_cause,
    llm_analysis
FROM failures
WHERE llm_root_cause IS NOT NULL
ORDER BY run_id DESC, generator, suite, benchmark_name, attempt_number
"""
df = pd.read_sql_query(query, conn)

# Parse JSON forensics
def parse_forensics(row):
    try:
        data = json.loads(row['llm_analysis'])
        return pd.Series({
            'narrative': data.get('explanation', 'N/A'),
            'citations': " | ".join(data.get('evidence', data.get('citations', []))),
            'tool_audit': str(data.get('tool_audit', ''))
        })
    except:
        return pd.Series({'narrative': 'Parse Error', 'citations': 'N/A', 'tool_audit': 'N/A'})

forensics = df.apply(parse_forensics, axis=1)
df = pd.concat([df, forensics], axis=1)

print(f"Total failures analyzed: {len(df)}")

## Global Failure Trends

In [None]:
if not df.empty:
    plt.figure(figsize=(14, 8))
    df['llm_root_cause'].value_counts().plot(kind='bar', color='#2ca02c')
    plt.title('Global Root Cause Distribution (Granular)')
    plt.ylabel('Count')
    plt.xticks(rotation=45, ha='right')
    plt.grid(axis='y', alpha=0.3)
    plt.tight_layout()
    plt.show()
else:
    print("No analyzed failures found.")

## Analysis by Benchmark Suite
Breakdown of root causes for each test suite.

In [None]:
if not df.empty:
    suites = df['suite'].unique()
    for suite in suites:
        display(Markdown(f"### Suite: `{suite}`"))
        suite_df = df[df['suite'] == suite]
        
        # Chart
        plt.figure(figsize=(10, 5))
        suite_df['llm_root_cause'].value_counts().plot(kind='barh', color='salmon')
        plt.title(f'Root Causes: {suite}')
        plt.xlabel('Count')
        plt.tight_layout()
        plt.show()
        
        # Detailed Table (Top 5 Failures)
        display(Markdown(f"**Top Failure Examples ({suite}):**"))
        display(suite_df[['generator', 'benchmark_name', 'llm_root_cause', 'narrative']].head(5))
        display(Markdown("---"))
else:
    print("No data available.")