# Retrieval Dataset Analysis Report

## 1. Methodology
Analysis of empirical relevance using Monte Carlo Causal Inference.

### Algorithm
1. Randomized Trials (N=40)
2. Blind Solving
3. Impact Scoring (Delta P)


In [1]:
import yaml
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import glob
import os
import json
from pathlib import Path
import sys
sys.path.append('..')
from tools.retrieval_dataset_generation.lib import RetrievalDataset, RetrievalCase

print(f"Current Working Directory: {os.getcwd()}")
sns.set_theme(style="whitegrid")
plt.rcParams['figure.figsize'] = (10, 6)
pd.set_option('display.max_colwidth', 100)

Current Working Directory: /Users/ivanmkc/Documents/code/agent-generator


In [2]:
# Load Logs
trial_events = []
convergence_events = []
trials_df = pd.DataFrame()
log_files = glob.glob("../logs/*.yaml")
if not log_files: log_files = glob.glob("logs/*.yaml")

if log_files:
    latest_log = max(log_files, key=os.path.getctime)
    print(f"Loading logs from: {latest_log}")
    with open(latest_log, 'r') as f:
        events = list(yaml.safe_load_all(f))
    trial_events = [e for e in events if e and e.get('event') == 'trial_complete']
    trials_df = pd.DataFrame(trial_events)
    convergence_events = [e for e in events if e and e.get('event') == 'convergence_check']
    print(f"Loaded {len(trials_df)} trials.")
else:
    print("No logs found.")

Loading logs from: logs/validation_run_2026-01-25_23-56-48.yaml


Loaded 139 trials.


In [3]:
dataset_path = Path("../retrieval_dataset_verified.yaml")
if not dataset_path.exists(): dataset_path = Path("retrieval_dataset_verified.yaml")

if dataset_path.exists():
    print(f"Loading metadata from: {dataset_path}")
    with open(dataset_path, 'r') as f:
        dataset = RetrievalDataset.model_validate(yaml.safe_load(f))
    print(f"Loaded {len(dataset.cases)} cases.")
    
    cases = dataset.cases
    records = []
    convergence_traces = []
    for case in cases:
        if 'convergence_trace' in case.metadata:
            convergence_traces.append({'case_id': case.id, 'trace': case.metadata['convergence_trace']})
        for ctx in case.candidates:
            meta = ctx.metadata
            records.append({
                'case_id': case.id,
                'fqn': ctx.fqn,
                'source_type': ctx.context_type,
                'delta_p': meta.delta_p,
                'n_in': meta.n_in
            })
    stats_df = pd.DataFrame(records)
else:
    print("No dataset found.")
    stats_df = pd.DataFrame()

Loading metadata from: retrieval_dataset_verified.yaml
Loaded 1 cases.


In [4]:
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 100)

if not stats_df.empty:
    cases_groups = stats_df.groupby('case_id')
    for case_id, group in cases_groups:
        print()
        print(f"# Case: {case_id}")
        case_obj = next((c for c in dataset.cases if c.id == case_id), None)
        if case_obj:
             print(f"**Query:** {case_obj.query}")
             print(f"**Zero-Context Success:** {case_obj.metadata.get('zero_context_success_rate', 'N/A')}")

        sorted_group = group.sort_values('delta_p', ascending=False)
        relevant = sorted_group[sorted_group['delta_p'] > 0.1]
        toxic = sorted_group[sorted_group['delta_p'] < -0.1]
        display_cols = ['fqn', 'delta_p', 'source_type', 'n_in']

        if not relevant.empty:
            print()
            print("### ✅ Relevant Documents (Delta P > 0.1)")
            print(relevant[display_cols].to_markdown(index=False))
        else:
            print()
            print("*No highly relevant documents found.*")

        if not toxic.empty:
             print()
             print("### ❌ Toxic Documents (Delta P < -0.1)")
             print(toxic[display_cols].to_markdown(index=False))


# Case: configure_adk_features_mc:which_is_the_correct_import_statement_for_the_runn
**Query:** Which is the correct import statement for the `Runner` class?
**Zero-Context Success:** 0.0

*No highly relevant documents found.*
