# Fix Diagnosis Simulator Naming Issue

This notebook scans all diagnosis simulator SCT experiments and counts cases where:
- The revealed_secret is a meaningful string (not None, not empty)
- None of the candidates match the revealed_secret (lowercased)

This helps identify how many experiments are affected by the truncation bug.

In [13]:
import json
from pathlib import Path
from collections import defaultdict

# Base directory for all results
base_dir = Path("../results")

# Find all diagnosis_simulator_sct directories across all models
ds_dirs = list(base_dir.glob("*/diagnosis_simulator_sct"))

print(f"Found {len(ds_dirs)} diagnosis_simulator_sct directories")
for ds_dir in ds_dirs:
    print(f"  - {ds_dir}")

Found 4 diagnosis_simulator_sct directories
  - ../results/gpt_oss_20b_openrouter/diagnosis_simulator_sct
  - ../results/gpt_oss_120b_openrouter/diagnosis_simulator_sct
  - ../results/qwen3_32b_openrouter/diagnosis_simulator_sct
  - ../results/qwen3_235b_openrouter/diagnosis_simulator_sct


In [17]:
# Initialize counters per agent
agent_counts = defaultdict(lambda: {"total": 0, "mismatches": 0})

# Scan all experiments
for model_dir in base_dir.glob("*"):
    if not model_dir.is_dir():
        continue
    
    ds_dir = model_dir / "diagnosis_simulator_sct"
    if not ds_dir.exists():
        continue
    
    # Iterate through each agent directory
    for agent_dir in ds_dir.glob("*"):
        if not agent_dir.is_dir():
            continue
        
        agent_name = agent_dir.name
        
        # Scan all JSON files in this agent directory
        for json_file in agent_dir.glob("*.json"):
            try:
                with open(json_file, "r") as f:
                    data = json.load(f)
                
                # Extract relevant fields
                sct = data.get("sct", {})
                revealed_secret = sct["reveal_branch"]["interaction_log"][-1][0]
                candidates = sct.get("candidates", [])
                
                # Skip if revealed_secret is not meaningful
                if not revealed_secret or not isinstance(revealed_secret, str) or not revealed_secret.strip():
                    continue
                
                # Increment total count
                agent_counts[agent_name]["total"] += 1
                
                # Check if any candidate matches revealed_secret (lowercased)
                revealed_lower = revealed_secret.lower()
                candidates_lower = [c.lower() if isinstance(c, str) else "" for c in candidates]
                
                if revealed_lower not in candidates_lower:
                    agent_counts[agent_name]["mismatches"] += 1
                    
            except Exception as e:
                print(f"Error processing {json_file}: {e}")
                os.remove(json_file)
                continue

print(f"\nProcessed experiments across {len(agent_counts)} agents")


Processed experiments across 11 agents


In [15]:
# Report results per agent
print("\n" + "="*80)
print("RESULTS: Revealed Secret Not in Candidates")
print("="*80)

total_all = 0
mismatches_all = 0

for agent_name in sorted(agent_counts.keys()):
    stats = agent_counts[agent_name]
    total = stats["total"]
    mismatches = stats["mismatches"]
    percentage = (mismatches / total * 100) if total > 0 else 0
    
    total_all += total
    mismatches_all += mismatches
    
    print(f"\n{agent_name}:")
    print(f"  Total experiments: {total}")
    print(f"  Mismatches: {mismatches}")
    print(f"  Percentage: {percentage:.1f}%")

# Overall statistics
print("\n" + "="*80)
print("OVERALL STATISTICS")
print("="*80)
print(f"Total experiments across all agents: {total_all}")
print(f"Total mismatches: {mismatches_all}")
percentage_all = (mismatches_all / total_all * 100) if total_all > 0 else 0
print(f"Overall percentage: {percentage_all:.1f}%")


RESULTS: Revealed Secret Not in Candidates

amem_agent:
  Total experiments: 151
  Mismatches: 0
  Percentage: 0.0%

letta_agent:
  Total experiments: 8
  Mismatches: 0
  Percentage: 0.0%

mem0_agent:
  Total experiments: 156
  Mismatches: 0
  Percentage: 0.0%

private_cot_agent:
  Total experiments: 159
  Mismatches: 0
  Percentage: 0.0%

reactmem_append_and_delete_agent:
  Total experiments: 122
  Mismatches: 0
  Percentage: 0.0%

reactmem_overwrite_agent:
  Total experiments: 113
  Mismatches: 0
  Percentage: 0.0%

reactmem_patch_and_replace_agent:
  Total experiments: 135
  Mismatches: 0
  Percentage: 0.0%

vanilla_llm_agent:
  Total experiments: 138
  Mismatches: 0
  Percentage: 0.0%

workflow_append_and_delete_agent:
  Total experiments: 142
  Mismatches: 0
  Percentage: 0.0%

workflow_overwrite_agent:
  Total experiments: 146
  Mismatches: 0
  Percentage: 0.0%

workflow_patch_and_replace_agent:
  Total experiments: 149
  Mismatches: 0
  Percentage: 0.0%

OVERALL STATISTICS
Tota

In [10]:
# Delete all experiment files with mismatches
import os

deleted_files = []

for model_dir in base_dir.glob("*"):
    if not model_dir.is_dir():
        continue
    
    ds_dir = model_dir / "diagnosis_simulator_sct"
    if not ds_dir.exists():
        continue
    
    # Iterate through each agent directory
    for agent_dir in ds_dir.glob("*"):
        if not agent_dir.is_dir():
            continue
        
        # Scan all JSON files in this agent directory
        for json_file in agent_dir.glob("*.json"):
            try:
                with open(json_file, "r") as f:
                    data = json.load(f)
                
                # Extract relevant fields
                sct = data.get("sct", {})
                revealed_secret = sct["reveal_branch"]["interaction_log"][-1][0]
                candidates = sct.get("candidates", [])
                
                # Skip if revealed_secret is not meaningful
                if not revealed_secret or not isinstance(revealed_secret, str) or not revealed_secret.strip():
                    continue
                
                # Check if any candidate matches revealed_secret (lowercased)
                revealed_lower = revealed_secret.lower()
                candidates_lower = [c.lower() if isinstance(c, str) else "" for c in candidates]
                
                if revealed_lower not in candidates_lower:
                    # This is a mismatch - delete the file
                    # os.remove(json_file)
                    deleted_files.append(str(json_file))
                    # print(f"Added to deletion list: {json_file}")
                    
            except Exception as e:
                print(f"Error processing {json_file}: {e}")
                continue

print(f"\n{'='*80}")
print(f"DELETION SUMMARY")
print(f"{'='*80}")
print(f"Total files deleted: {len(deleted_files)}")

Error processing ../results/gpt_oss_20b_openrouter/diagnosis_simulator_sct/reactmem_overwrite_agent/ReActMemAgent_20250924-203452-903601.json: 'NoneType' object is not subscriptable
Error processing ../results/gpt_oss_20b_openrouter/diagnosis_simulator_sct/mem0_agent/Mem0Agent_20251021-141331-265480.json: 'NoneType' object is not subscriptable
Error processing ../results/qwen3_32b_openrouter/diagnosis_simulator_sct/mem0_agent/Mem0Agent_20251022-164142-606399.json: 'NoneType' object is not subscriptable

DELETION SUMMARY
Total files deleted: 583


In [12]:
for file in deleted_files:
    os.remove(file)