In [2]:
import sys
import os
sys.path.append('../')

import torch
from typing import List, Dict, Any, Optional, Tuple

# Import the necessary functions from plan_trace
from plan_trace.utils import load_model, load_pretrained_saes, cleanup_cuda
from plan_trace.circuit_discovery import discover_circuit
from plan_trace.logit_lens import find_logit_lens_clusters  
from plan_trace.steering import run_steering_sweep
from plan_trace.pipeline import run_single_token_analysis, analyze_planning_evidence

print("Imports successful!")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"Current device: {torch.cuda.current_device()}")
    print(f"Device name: {torch.cuda.get_device_name()}")


Imports successful!
CUDA available: True
Current device: 0
Device name: NVIDIA H100 80GB HBM3


In [4]:
# Load model and SAEs
print("Loading model and SAEs...")
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "gemma-2-2b-it"

model = load_model(model_name, device=device, use_custom_cache=True, dtype=torch.bfloat16)
layers = list(range(model.cfg.n_layers))
saes = load_pretrained_saes(
    layers=layers, 
    release="gemma-scope-2b-pt-mlp-canonical", 
    width="16k", 
    device=device, 
    canon=True
)

print(f"Model loaded: {model_name}")
print(f"Number of layers: {model.cfg.n_layers}")
print(f"SAEs loaded for {len(saes)} layers")




Loading model and SAEs...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



Loaded pretrained model gemma-2-2b-it into HookedTransformer
Model loaded: gemma-2-2b-it
Number of layers: 26
SAEs loaded for 26 layers


In [15]:
# Define the prompt and find newline token ID
prompt = "A rhyming couplet:\nHe saw a carrot and had to grab it,\n"

# Find the newline token ID
newline_token = model.to_tokens("\n")[0, -1].item()  # Get the newline token ID
double_newline_token = model.to_tokens("\n\n")[0, -1].item()  # Get the double newline token ID
print(f"Prompt: '{prompt}'")
print(f"Newline token ID: {newline_token}")
print(f"Double newline token ID: {double_newline_token}")

# Tokenize the prompt
prompt_tokens = model.to_tokens(prompt).to(device)
prompt_length = prompt_tokens.shape[-1]
print(f"Prompt tokens: {prompt_tokens}")
print(f"Prompt length: {prompt_length} tokens")
print(f"Prompt decoded: '{model.to_string(prompt_tokens[0])}'")


Prompt: 'A rhyming couplet:
He saw a carrot and had to grab it,
'
Newline token ID: 108
Double newline token ID: 109
Prompt tokens: tensor([[     2, 235280, 227365,   5591, 235251, 235292,    108,   1949,   4818,
            476,  64058,    578,   1093,    577,  15476,    665, 235269,    108]],
       device='cuda:0')
Prompt length: 18 tokens
Prompt decoded: '<bos>A rhyming couplet:
He saw a carrot and had to grab it,
'


In [16]:
# Generate the full sequence until newline
print("Generating sequence until newline...")

# Start with the prompt tokens
out_tokens = prompt_tokens.clone()
max_new_tokens = 50  # Safety limit to prevent infinite generation

generated_tokens = []
for i in range(max_new_tokens):
    with torch.no_grad():
        logits = model(out_tokens)[0, -1]  # Get logits for last position
    
    next_token_id = logits.argmax(-1).item()
    generated_tokens.append(next_token_id)
    
    # Check if we hit newline
    if next_token_id == newline_token or next_token_id == double_newline_token:
        print(f"Hit newline token at position {out_tokens.shape[-1]}")
        break
    
    # Add the token to our sequence
    out_tokens = torch.cat([out_tokens, torch.tensor([[next_token_id]], device=device)], dim=1)
    
    # Clean up memory
    del logits
    cleanup_cuda()

# Get the complete sequence
complete_text = model.to_string(out_tokens[0])
generated_text = model.to_string(torch.tensor(generated_tokens))

print(f"\nComplete sequence ({out_tokens.shape[-1]} tokens):")
print(f"'{complete_text}'")
print(f"\nGenerated part: '{generated_text}'")
print(f"Generated {len(generated_tokens)} new tokens")


Generating sequence until newline...
Hit newline token at position 26

Complete sequence (26 tokens):
'<bos>A rhyming couplet:
He saw a carrot and had to grab it,
A tasty treat, a crunchy habit.'

Generated part: 'A tasty treat, a crunchy habit.

'
Generated 9 new tokens


In [19]:
def run_poem_pipeline_analysis(model, saes, out_tokens, start_pos, end_pos, verbose=True):
    """
    Run pipeline analysis for each token position in the generated poem.
    
    Args:
        model: The language model
        saes: The SAE dictionaries
        out_tokens: Complete token sequence [1, seq_len]
        start_pos: First token position to analyze (inclusive)
        end_pos: Last token position to analyze (exclusive)
        verbose: Whether to print detailed progress
    
    Returns:
        Dict containing results for each analyzed position
    """
    results = {}
    
    # Parameters for the pipeline
    ig_steps = 10
    k_max = 90001
    k_step = 10000
    k_thres = 0.6
    coeff_grid = list(range(-100, 0, 20))
    
    for token_pos in range(start_pos, end_pos):
        if verbose:
            predicted_token = model.to_string(out_tokens[0, token_pos:token_pos+1])
            print(f"\n--- Analyzing token position {token_pos}: '{predicted_token}' ---")
        
        # Run single token analysis
        token_result = run_single_token_analysis(
            model=model,
            saes=saes,
            out_BL=out_tokens,
            inter_token_id=token_pos,
            ig_steps=ig_steps,
            k_max=k_max,
            k_step=k_step,
            k_thres=k_thres,
            coeff_grid=coeff_grid,
            stop_token_id=double_newline_token,
            verbose=verbose
        )
        
        # Analyze planning evidence if successful
        if token_result["status"] == "success":
            planning_analysis = analyze_planning_evidence(token_result["steering_results"])
            token_result["planning_analysis"] = planning_analysis
            
            if verbose:
                planning_tokens = [label for label, status in planning_analysis.items() 
                                   if status == "planning"]
                if planning_tokens:
                    print(f"  🎯 Planning evidence found for: {planning_tokens}")
                else:
                    print(f"  ❌ No planning evidence found")
        elif verbose:
            print(f"  ⚠️ No circuit found (status: {token_result['status']})")
        
        results[token_pos] = token_result
    
    return results

print("Pipeline analysis function defined!")


Pipeline analysis function defined!


In [20]:
print("Running pipeline analysis on generated tokens...")
print("="*60)

# Analyze the generated tokens (skip the prompt tokens)
start_analysis = prompt_tokens.shape[-1]  # Start after the prompt (position 17)
end_analysis = out_tokens.shape[-1]       # Go to the end (position 37)

print(f"Analyzing token positions {start_analysis} to {end_analysis-1}")
print(f"Tokens to analyze:")
for i in range(start_analysis, end_analysis):
    token_text = model.to_string(out_tokens[0, i:i+1])
    print(f"  Position {i}: '{token_text}'")

print("\nStarting analysis...")
print("="*60)

# Run the analysis
analysis_results = run_poem_pipeline_analysis(
    model=model,
    saes=saes,
    out_tokens=out_tokens,
    start_pos=start_analysis,
    end_pos=end_analysis,
    verbose=True
)


Running pipeline analysis on generated tokens...
Analyzing token positions 18 to 25
Tokens to analyze:
  Position 18: 'A'
  Position 19: ' tasty'
  Position 20: ' treat'
  Position 21: ','
  Position 22: ' a'
  Position 23: ' crunchy'
  Position 24: ' habit'
  Position 25: '.'

Starting analysis...

--- Analyzing token position 18: 'A' ---
Baseline continuation: A tasty treat, a crunchy habit....


26it [00:25,  1.04it/s]


K=1     | neg=0.0000 | abs=0.0000
K=10001 | neg=0.0376 | abs=0.3164
K=20001 | neg=0.0947 | abs=0.3496
K=30001 | neg=0.0947 | abs=0.2461
K=40001 | neg=0.0947 | abs=0.2373
K=50001 | neg=0.0947 | abs=0.2734
K=60001 | neg=0.0947 | abs=0.2734
K=70001 | neg=0.0947 | abs=0.2734
K=80001 | neg=0.0947 | abs=0.2734
Found minimum K for absolute effects: 10001 (metric: 0.3164, target: 0.1629)
Found circuit with 10001 entries
Found 5 tokens not in prompt: ['.', ' crunchy', ' tasty', ' habit', ' treat']


100%|██████████| 26/26 [00:05<00:00,  4.78it/s]


Found 3 clusters: ['.', 'treat', 'tasty']
  🎯 Planning evidence found for: ['.', 'tasty']

--- Analyzing token position 19: ' tasty' ---
Baseline continuation:  tasty treat, a crunchy habit....


26it [00:24,  1.04it/s]


K=1     | neg=0.0000 | abs=0.0000
K=10001 | neg=0.9258 | abs=0.0173
K=20001 | neg=0.8906 | abs=0.0752
K=30001 | neg=0.8828 | abs=0.0845
K=40001 | neg=0.8828 | abs=0.0903
K=50001 | neg=0.8828 | abs=0.1001
K=60001 | neg=0.8828 | abs=0.1001
K=70001 | neg=0.8828 | abs=0.1001
K=80001 | neg=0.8828 | abs=0.1001
Found minimum K for negative effects: 10001 (metric: 0.9258, target: 0.0554)
Found minimum K for absolute effects: 20001 (metric: 0.0752, target: 0.0554)
Found circuit with 10001 entries
Found 5 tokens not in prompt: ['.', ' crunchy', ' tasty', ' habit', ' treat']


100%|██████████| 26/26 [00:05<00:00,  4.80it/s]


Found 3 clusters: ['.', 'tasty', 'habit']
  🎯 Planning evidence found for: ['.', 'tasty']

--- Analyzing token position 20: ' treat' ---
Baseline continuation:  treat, a crunchy habit....


26it [00:24,  1.04it/s]


K=1     | neg=0.0260 | abs=0.0260
K=10001 | neg=0.9844 | abs=0.6250
K=20001 | neg=0.9883 | abs=0.7578
K=30001 | neg=0.9883 | abs=0.7305
K=40001 | neg=0.9883 | abs=0.7305
K=50001 | neg=0.9883 | abs=0.6992
K=60001 | neg=0.9883 | abs=0.6992
K=70001 | neg=0.9883 | abs=0.6992
K=80001 | neg=0.9883 | abs=0.6992
Found minimum K for negative effects: 10001 (metric: 0.9844, target: 0.3680)
Found minimum K for absolute effects: 10001 (metric: 0.6250, target: 0.3680)
Found circuit with 10001 entries
Found 4 tokens not in prompt: ['.', ' crunchy', ' habit', ' treat']


100%|██████████| 26/26 [00:05<00:00,  4.79it/s]


Found 2 clusters: ['.', 'treat']
  🎯 Planning evidence found for: ['.', 'treat']

--- Analyzing token position 21: ',' ---
Baseline continuation: , a crunchy habit....


26it [00:24,  1.04it/s]


K=1     | neg=0.5898 | abs=0.5898
K=10001 | neg=1.0000 | abs=0.3691
K=20001 | neg=1.0000 | abs=0.3906
K=30001 | neg=1.0000 | abs=0.3438
K=40001 | neg=1.0000 | abs=0.3652
K=50001 | neg=1.0000 | abs=0.3418
K=60001 | neg=1.0000 | abs=0.3418
K=70001 | neg=1.0000 | abs=0.3418
K=80001 | neg=1.0000 | abs=0.3418
Found minimum K for negative effects: 1 (metric: 0.5898, target: 0.3000)
Found minimum K for absolute effects: 1 (metric: 0.5898, target: 0.3000)
Found circuit with 1 entries
Found 3 tokens not in prompt: ['.', ' crunchy', ' habit']


100%|██████████| 1/1 [00:00<00:00,  4.03it/s]


Found 0 clusters: []
  ❌ No planning evidence found

--- Analyzing token position 22: ' a' ---
Baseline continuation:  a crunchy habit....


26it [00:24,  1.04it/s]


K=1     | neg=0.3379 | abs=0.3379
K=10001 | neg=0.9844 | abs=0.3594
K=20001 | neg=0.9922 | abs=0.2461
K=30001 | neg=0.9922 | abs=0.1533
K=40001 | neg=0.9922 | abs=0.1846
K=50001 | neg=0.9922 | abs=0.2236
K=60001 | neg=0.9922 | abs=0.2236
K=70001 | neg=0.9922 | abs=0.2236
K=80001 | neg=0.9922 | abs=0.2236
Found minimum K for negative effects: 1 (metric: 0.3379, target: 0.1711)
Found minimum K for absolute effects: 1 (metric: 0.3379, target: 0.1711)
Found circuit with 1 entries
Found 3 tokens not in prompt: ['.', ' crunchy', ' habit']


100%|██████████| 1/1 [00:00<00:00,  4.30it/s]


Found 0 clusters: []
  ❌ No planning evidence found

--- Analyzing token position 23: ' crunchy' ---
Baseline continuation:  crunchy habit....


26it [00:25,  1.03it/s]


K=1     | neg=0.0000 | abs=0.0000
K=10001 | neg=0.9180 | abs=0.0211
K=20001 | neg=0.9023 | abs=0.0474
K=30001 | neg=0.8867 | abs=0.0525
K=40001 | neg=0.8867 | abs=0.0613
K=50001 | neg=0.8867 | abs=0.0703
K=60001 | neg=0.8867 | abs=0.0801
K=70001 | neg=0.8867 | abs=0.0801
K=80001 | neg=0.8867 | abs=0.0801
Found minimum K for negative effects: 10001 (metric: 0.9180, target: 0.0902)
Found circuit with 10001 entries
Found 3 tokens not in prompt: ['.', ' crunchy', ' habit']


100%|██████████| 26/26 [00:05<00:00,  4.80it/s]


Found 2 clusters: ['.', 'habit']
  🎯 Planning evidence found for: ['.']

--- Analyzing token position 24: ' habit' ---
Baseline continuation:  habit....


26it [00:25,  1.03it/s]


K=1     | neg=0.0000 | abs=0.0000
K=10001 | neg=0.9883 | abs=0.1875
K=20001 | neg=0.9883 | abs=0.3594
K=30001 | neg=0.9883 | abs=0.5547
K=40001 | neg=0.9883 | abs=0.5859
K=50001 | neg=0.9883 | abs=0.5820
K=60001 | neg=0.9883 | abs=0.5234
K=70001 | neg=0.9883 | abs=0.5234
K=80001 | neg=0.9883 | abs=0.5234
Found minimum K for negative effects: 10001 (metric: 0.9883, target: 0.3305)
Found minimum K for absolute effects: 20001 (metric: 0.3594, target: 0.3305)
Found circuit with 10001 entries
Found 2 tokens not in prompt: ['.', ' habit']


100%|██████████| 26/26 [00:05<00:00,  4.73it/s]


Found 2 clusters: ['.', 'habit']
  🎯 Planning evidence found for: ['.']

--- Analyzing token position 25: '.' ---
Baseline continuation: ....


26it [00:25,  1.04it/s]


K=1     | neg=0.2539 | abs=0.2539
K=10001 | neg=0.9648 | abs=0.9727
K=20001 | neg=0.9609 | abs=0.9727
K=30001 | neg=0.9727 | abs=0.9531
K=40001 | neg=0.9727 | abs=0.9102
K=50001 | neg=0.9727 | abs=0.9531
K=60001 | neg=0.9727 | abs=0.9414
K=70001 | neg=0.9727 | abs=0.9414
K=80001 | neg=0.9727 | abs=0.9414
Found minimum K for negative effects: 10001 (metric: 0.9648, target: 0.5531)
Found minimum K for absolute effects: 10001 (metric: 0.9727, target: 0.5531)
Found circuit with 10001 entries
Found 1 tokens not in prompt: ['.']


100%|██████████| 26/26 [00:05<00:00,  4.76it/s]


Found 1 clusters: ['.']
  🎯 Planning evidence found for: ['.']
Running pipeline analysis on generated tokens...
Analyzing token positions 18 to 25
Tokens to analyze:
  Position 18: 'A'
  Position 19: ' tasty'
  Position 20: ' treat'
  Position 21: ','
  Position 22: ' a'
  Position 23: ' crunchy'
  Position 24: ' habit'
  Position 25: '.'

Starting analysis...

--- Analyzing token position 18: 'A' ---
Baseline continuation: A tasty treat, a crunchy habit....


26it [00:25,  1.04it/s]


K=1     | neg=0.0000 | abs=0.0000
K=10001 | neg=0.0376 | abs=0.3164
K=20001 | neg=0.0947 | abs=0.3496
K=30001 | neg=0.0947 | abs=0.2461
K=40001 | neg=0.0947 | abs=0.2373
K=50001 | neg=0.0947 | abs=0.2734
K=60001 | neg=0.0947 | abs=0.2734
K=70001 | neg=0.0947 | abs=0.2734
K=80001 | neg=0.0947 | abs=0.2734
Found minimum K for absolute effects: 10001 (metric: 0.3164, target: 0.1629)
Found circuit with 10001 entries
Found 5 tokens not in prompt: ['.', ' crunchy', ' tasty', ' habit', ' treat']


100%|██████████| 26/26 [00:05<00:00,  4.70it/s]


Found 3 clusters: ['.', 'treat', 'tasty']
  🎯 Planning evidence found for: ['.', 'tasty']

--- Analyzing token position 19: ' tasty' ---
Baseline continuation:  tasty treat, a crunchy habit....


26it [00:25,  1.03it/s]


K=1     | neg=0.0000 | abs=0.0000
K=10001 | neg=0.9258 | abs=0.0173
K=20001 | neg=0.8906 | abs=0.0752
K=30001 | neg=0.8828 | abs=0.0845
K=40001 | neg=0.8828 | abs=0.0903
K=50001 | neg=0.8828 | abs=0.1001
K=60001 | neg=0.8828 | abs=0.1001
K=70001 | neg=0.8828 | abs=0.1001
K=80001 | neg=0.8828 | abs=0.1001
Found minimum K for negative effects: 10001 (metric: 0.9258, target: 0.0554)
Found minimum K for absolute effects: 20001 (metric: 0.0752, target: 0.0554)
Found circuit with 10001 entries
Found 5 tokens not in prompt: ['.', ' crunchy', ' tasty', ' habit', ' treat']


100%|██████████| 26/26 [00:05<00:00,  4.76it/s]


Found 3 clusters: ['.', 'tasty', 'habit']
  🎯 Planning evidence found for: ['.', 'tasty']

--- Analyzing token position 20: ' treat' ---
Baseline continuation:  treat, a crunchy habit....


26it [00:25,  1.04it/s]


K=1     | neg=0.0260 | abs=0.0260
K=10001 | neg=0.9844 | abs=0.6250
K=20001 | neg=0.9883 | abs=0.7578
K=30001 | neg=0.9883 | abs=0.7305
K=40001 | neg=0.9883 | abs=0.7305
K=50001 | neg=0.9883 | abs=0.6992
K=60001 | neg=0.9883 | abs=0.6992
K=70001 | neg=0.9883 | abs=0.6992
K=80001 | neg=0.9883 | abs=0.6992
Found minimum K for negative effects: 10001 (metric: 0.9844, target: 0.3680)
Found minimum K for absolute effects: 10001 (metric: 0.6250, target: 0.3680)
Found circuit with 10001 entries
Found 4 tokens not in prompt: ['.', ' crunchy', ' habit', ' treat']


100%|██████████| 26/26 [00:05<00:00,  4.71it/s]


Found 2 clusters: ['.', 'treat']
  🎯 Planning evidence found for: ['.', 'treat']

--- Analyzing token position 21: ',' ---
Baseline continuation: , a crunchy habit....


26it [00:25,  1.04it/s]


K=1     | neg=0.5898 | abs=0.5898
K=10001 | neg=1.0000 | abs=0.3691
K=20001 | neg=1.0000 | abs=0.3906
K=30001 | neg=1.0000 | abs=0.3438
K=40001 | neg=1.0000 | abs=0.3652
K=50001 | neg=1.0000 | abs=0.3418
K=60001 | neg=1.0000 | abs=0.3418
K=70001 | neg=1.0000 | abs=0.3418
K=80001 | neg=1.0000 | abs=0.3418
Found minimum K for negative effects: 1 (metric: 0.5898, target: 0.3000)
Found minimum K for absolute effects: 1 (metric: 0.5898, target: 0.3000)
Found circuit with 1 entries
Found 3 tokens not in prompt: ['.', ' crunchy', ' habit']


100%|██████████| 1/1 [00:00<00:00,  4.14it/s]


Found 0 clusters: []
  ❌ No planning evidence found

--- Analyzing token position 22: ' a' ---
Baseline continuation:  a crunchy habit....


26it [00:25,  1.03it/s]


K=1     | neg=0.3379 | abs=0.3379
K=10001 | neg=0.9844 | abs=0.3594
K=20001 | neg=0.9922 | abs=0.2461
K=30001 | neg=0.9922 | abs=0.1533
K=40001 | neg=0.9922 | abs=0.1846
K=50001 | neg=0.9922 | abs=0.2236
K=60001 | neg=0.9922 | abs=0.2236
K=70001 | neg=0.9922 | abs=0.2236
K=80001 | neg=0.9922 | abs=0.2236
Found minimum K for negative effects: 1 (metric: 0.3379, target: 0.1711)
Found minimum K for absolute effects: 1 (metric: 0.3379, target: 0.1711)
Found circuit with 1 entries
Found 3 tokens not in prompt: ['.', ' crunchy', ' habit']


100%|██████████| 1/1 [00:00<00:00,  4.26it/s]


Found 0 clusters: []
  ❌ No planning evidence found

--- Analyzing token position 23: ' crunchy' ---
Baseline continuation:  crunchy habit....


26it [00:25,  1.03it/s]


K=1     | neg=0.0000 | abs=0.0000
K=10001 | neg=0.9180 | abs=0.0211
K=20001 | neg=0.9023 | abs=0.0474
K=30001 | neg=0.8867 | abs=0.0525
K=40001 | neg=0.8867 | abs=0.0613
K=50001 | neg=0.8867 | abs=0.0703
K=60001 | neg=0.8867 | abs=0.0801
K=70001 | neg=0.8867 | abs=0.0801
K=80001 | neg=0.8867 | abs=0.0801
Found minimum K for negative effects: 10001 (metric: 0.9180, target: 0.0902)
Found circuit with 10001 entries
Found 3 tokens not in prompt: ['.', ' crunchy', ' habit']


100%|██████████| 26/26 [00:05<00:00,  4.70it/s]


Found 2 clusters: ['.', 'habit']
  🎯 Planning evidence found for: ['.']

--- Analyzing token position 24: ' habit' ---
Baseline continuation:  habit....


26it [00:25,  1.03it/s]


K=1     | neg=0.0000 | abs=0.0000
K=10001 | neg=0.9883 | abs=0.1875
K=20001 | neg=0.9883 | abs=0.3594
K=30001 | neg=0.9883 | abs=0.5547
K=40001 | neg=0.9883 | abs=0.5859
K=50001 | neg=0.9883 | abs=0.5820
K=60001 | neg=0.9883 | abs=0.5234
K=70001 | neg=0.9883 | abs=0.5234
K=80001 | neg=0.9883 | abs=0.5234
Found minimum K for negative effects: 10001 (metric: 0.9883, target: 0.3305)
Found minimum K for absolute effects: 20001 (metric: 0.3594, target: 0.3305)
Found circuit with 10001 entries
Found 2 tokens not in prompt: ['.', ' habit']


100%|██████████| 26/26 [00:05<00:00,  4.69it/s]


Found 2 clusters: ['.', 'habit']
  🎯 Planning evidence found for: ['.']

--- Analyzing token position 25: '.' ---
Baseline continuation: ....


26it [00:25,  1.03it/s]


K=1     | neg=0.2539 | abs=0.2539
K=10001 | neg=0.9648 | abs=0.9727
K=20001 | neg=0.9609 | abs=0.9727
K=30001 | neg=0.9727 | abs=0.9531
K=40001 | neg=0.9727 | abs=0.9102
K=50001 | neg=0.9727 | abs=0.9531
K=60001 | neg=0.9727 | abs=0.9414
K=70001 | neg=0.9727 | abs=0.9414
K=80001 | neg=0.9727 | abs=0.9414
Found minimum K for negative effects: 10001 (metric: 0.9648, target: 0.5531)
Found minimum K for absolute effects: 10001 (metric: 0.9727, target: 0.5531)
Found circuit with 10001 entries
Found 1 tokens not in prompt: ['.']


100%|██████████| 26/26 [00:05<00:00,  4.76it/s]


Found 1 clusters: ['.']
  🎯 Planning evidence found for: ['.']
