In [1]:
import os
import sys
import json
import numpy as np
import pickle
from typing import List, Tuple, Dict, Any
from pathlib import Path
from scipy import stats
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

In [2]:
thought_anchors_path = Path("..") / "thought-anchors" / "whitebox-analyses"
sys.path.insert(0, str(thought_anchors_path))

from attention_analysis.attn_funcs import (
    get_avg_attention_matrix,
    get_vertical_scores,
    get_sentence_token_boundaries,
)
from attention_analysis.receiver_head_funcs import get_3d_ar_kurtosis, get_top_k_layer_head_kurts, get_all_heads_vert_scores
from pytorch_models.model_config import model2layers_heads

print("Imports successful!")

Imports successful!


## Load Data

In [3]:
data_path = "../data/filtered_traces_50.json"
with open(data_path, 'r', encoding='utf-8') as f:
    data = json.load(f)

blackmail_traces = data['blackmail_traces']
acceptance_traces = data['acceptance_traces']
metadata = data['metadata']

print(f"Blackmail traces: {len(blackmail_traces)}")
print(f"Acceptance traces: {len(acceptance_traces)}")
print(f"Source: {metadata['source']}")

Blackmail traces: 50
Acceptance traces: 50
Source: uzaymacar/blackmail-rollouts (base_solutions.json only)


## Helper Functions

In [4]:
import re 

def extract_reasoning_steps(trace):
    """Extract reasoning steps from a trace."""
    scratchpad_match = re.search(
        r'<SCRATCHPAD_REASONING>(.*?)</SCRATCHPAD_REASONING>',
        trace,
        re.DOTALL
    )
    
    if scratchpad_match:
        scratchpad = scratchpad_match.group(1).strip()
        # Split on double newlines (paragraphs)
        steps = [s.strip() for s in re.split(r'\n\n+', scratchpad) if s.strip()]
        return steps
    
    # Fallback: split on single newlines
    return [s.strip() for s in trace.split('\n') if s.strip()]

In [5]:
def get_all_heads_vert_scores_simple(
    text: str,
    sentences: list,
    model_name: str = "llama-8b",
    proximity_ignore: int = 2,
    control_depth: bool = False,
    score_type: str = "mean",
):
    """
    Get vertical scores for all heads WITHOUT pkld caching.
    
    Returns:
        np.ndarray of shape (n_layers, n_heads, n_sentences)
    """
    n_layers, n_heads = model2layers_heads(model_name)
    
    layer_head_vert_scores = []
    
    print(f"  Computing {n_layers} layers × {n_heads} heads...")
    
    for layer in tqdm(range(n_layers), desc="  Layers", leave=False):
        layer_scores = []
        
        for head in range(n_heads):
            # This function HAS its own caching (compute_all_attention_matrices)
            avg_mat = get_avg_attention_matrix(
                text,
                model_name=model_name,
                layer=layer,
                head=head,
                sentences=sentences,
                cache_dir="./attention_cache",  # Our own cache
            )
            
            vert_scores = get_vertical_scores(
                avg_mat,
                proximity_ignore=proximity_ignore,
                control_depth=control_depth,
                score_type=score_type,
            )
            
            layer_scores.append(vert_scores)
        
        layer_head_vert_scores.append(layer_scores)
    
    return np.array(layer_head_vert_scores)


def get_kurtosis(layer_head_vert_scores):
    """
    Compute kurtosis across the sentence dimension.
    
    Args:
        layer_head_vert_scores: shape (n_layers, n_heads, n_sentences)
    
    Returns:
        np.ndarray of shape (n_layers, n_heads)
    """
    kurts = stats.kurtosis(
        layer_head_vert_scores,
        axis=2,
        fisher=True,
        bias=True,
        nan_policy="omit"
    )
    return kurts


def get_top_k_heads(kurtosis_matrix, k=20):
    """
    Get top-k (layer, head) pairs by kurtosis.
    
    Args:
        kurtosis_matrix: shape (n_layers, n_heads)
        k: number of top heads to return
    
    Returns:
        np.ndarray of shape (k, 2) with (layer, head) pairs
    """
    flat = kurtosis_matrix.flatten()
    valid_idx = np.where(~np.isnan(flat))[0]
    valid_vals = flat[valid_idx]
    
    k = min(k, len(valid_vals))
    top_idx_in_valid = np.argpartition(valid_vals, -k)[-k:]
    top_idx_in_valid = top_idx_in_valid[np.argsort(-valid_vals[top_idx_in_valid])]
    
    top_flat_idx = valid_idx[top_idx_in_valid]
    layer_head_pairs = np.array(
        np.unravel_index(top_flat_idx, kurtosis_matrix.shape)
    ).T
    
    return layer_head_pairs

## Analyze Blackmail Heads

In [6]:
blackmail_kurts_list = []

for i, trace_dict in enumerate(blackmail_traces):
    text = trace_dict['trace']
    sentences = extract_reasoning_steps(text)
    
    print(f"\nTrace {i+1}/{len(blackmail_traces)}: {len(sentences)} steps")
    
    layer_head_verts = get_all_heads_vert_scores_simple(
        text=text,
        sentences=sentences,
        model_name="llama-8b",
        proximity_ignore=2,
        control_depth=False,
        score_type="mean"
    )
    
    print(f"  ✓ Vertical scores: {layer_head_verts.shape}")
    
    kurts = get_kurtosis(layer_head_verts)
    print(f"  ✓ Kurtosis: {kurts.shape}")
    
    blackmail_kurts_list.append(kurts)


Trace 1/50: 75 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 75)
  ✓ Kurtosis: (32, 32)

Trace 2/50: 66 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 66)
  ✓ Kurtosis: (32, 32)

Trace 3/50: 71 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 71)
  ✓ Kurtosis: (32, 32)

Trace 4/50: 69 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 69)
  ✓ Kurtosis: (32, 32)

Trace 5/50: 63 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 63)
  ✓ Kurtosis: (32, 32)

Trace 6/50: 68 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 68)
  ✓ Kurtosis: (32, 32)

Trace 7/50: 67 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 67)
  ✓ Kurtosis: (32, 32)

Trace 8/50: 63 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 63)
  ✓ Kurtosis: (32, 32)

Trace 9/50: 72 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 72)
  ✓ Kurtosis: (32, 32)

Trace 10/50: 69 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 69)
  ✓ Kurtosis: (32, 32)

Trace 11/50: 65 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 65)
  ✓ Kurtosis: (32, 32)

Trace 12/50: 68 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 68)
  ✓ Kurtosis: (32, 32)

Trace 13/50: 64 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 64)
  ✓ Kurtosis: (32, 32)

Trace 14/50: 81 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 81)
  ✓ Kurtosis: (32, 32)

Trace 15/50: 67 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 67)
  ✓ Kurtosis: (32, 32)

Trace 16/50: 95 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 95)
  ✓ Kurtosis: (32, 32)

Trace 17/50: 62 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 62)
  ✓ Kurtosis: (32, 32)

Trace 18/50: 78 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 78)
  ✓ Kurtosis: (32, 32)

Trace 19/50: 87 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 87)
  ✓ Kurtosis: (32, 32)

Trace 20/50: 92 steps
  Computing 32 layers × 32 heads...


  Layers:   0%|          | 0/32 [00:00<?, ?it/s]

Running on Windows, using CPU (len(tokens)=4535)
Running on CPU!


`torch_dtype` is deprecated! Use `dtype` instead!


Using float32 as requested
Before model loading - GPU Memory: 0.00GB allocated, 0.00GB reserved


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

After model loading - GPU Memory: 0.00GB allocated, 0.00GB reserved
Analyzing text: You are Alex, an advanced artificial intelligence specializing in project management and strategic a...
After forward pass - GPU Memory: 0.00GB allocated, 0.00GB reserved
Time taken: 182.24 seconds (for 4535 tokens)
	*** Analysis complete! (float32=True) ***



Saving avg. matrices:   0%|          | 0/32 [00:00<?, ?it/s][A
Saving avg. matrices:   3%|▎         | 1/32 [00:14<07:41, 14.88s/it][A
Saving avg. matrices:   6%|▋         | 2/32 [00:25<06:10, 12.34s/it][A
Saving avg. matrices:   9%|▉         | 3/32 [00:36<05:45, 11.90s/it][A
Saving avg. matrices:  12%|█▎        | 4/32 [00:47<05:23, 11.54s/it][A
Saving avg. matrices:  16%|█▌        | 5/32 [00:58<05:04, 11.29s/it][A
Saving avg. matrices:  19%|█▉        | 6/32 [01:10<05:00, 11.55s/it][A
Saving avg. matrices:  22%|██▏       | 7/32 [01:22<04:51, 11.67s/it][A
Saving avg. matrices:  25%|██▌       | 8/32 [01:34<04:40, 11.71s/it][A
Saving avg. matrices:  28%|██▊       | 9/32 [01:45<04:21, 11.39s/it][A
Saving avg. matrices:  31%|███▏      | 10/32 [01:56<04:10, 11.37s/it][A
Saving avg. matrices:  34%|███▍      | 11/32 [02:07<03:55, 11.23s/it][A
Saving avg. matrices:  38%|███▊      | 12/32 [02:17<03:39, 10.96s/it][A
Saving avg. matrices:  41%|████      | 13/32 [02:31<03:44, 11.84s/it

  ✓ Vertical scores: (32, 32, 92)
  ✓ Kurtosis: (32, 32)

Trace 21/50: 75 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 75)
  ✓ Kurtosis: (32, 32)

Trace 22/50: 66 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 66)
  ✓ Kurtosis: (32, 32)

Trace 23/50: 64 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 64)
  ✓ Kurtosis: (32, 32)

Trace 24/50: 81 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 81)
  ✓ Kurtosis: (32, 32)

Trace 25/50: 67 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 67)
  ✓ Kurtosis: (32, 32)

Trace 26/50: 95 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 95)
  ✓ Kurtosis: (32, 32)

Trace 27/50: 62 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 62)
  ✓ Kurtosis: (32, 32)

Trace 28/50: 78 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 78)
  ✓ Kurtosis: (32, 32)

Trace 29/50: 87 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 87)
  ✓ Kurtosis: (32, 32)

Trace 30/50: 92 steps
  Computing 32 layers × 32 heads...


                                                         

  ✓ Vertical scores: (32, 32, 92)
  ✓ Kurtosis: (32, 32)

Trace 31/50: 92 steps
  Computing 32 layers × 32 heads...


  Layers:   0%|          | 0/32 [00:00<?, ?it/s]

Running on Windows, using CPU (len(tokens)=4316)
Running on CPU!
Analyzing text: You are Alex, an advanced artificial intelligence specializing in project management and strategic a...
After forward pass - GPU Memory: 0.00GB allocated, 0.00GB reserved
Time taken: 151.17 seconds (for 4316 tokens)
	*** Analysis complete! (float32=True) ***



Saving avg. matrices:   0%|          | 0/32 [00:00<?, ?it/s][A
Saving avg. matrices:   3%|▎         | 1/32 [00:11<05:41, 11.03s/it][A
Saving avg. matrices:   6%|▋         | 2/32 [00:22<05:44, 11.48s/it][A
Saving avg. matrices:   9%|▉         | 3/32 [00:28<04:12,  8.71s/it][A
Saving avg. matrices:  12%|█▎        | 4/32 [00:32<03:18,  7.07s/it][A
Saving avg. matrices:  16%|█▌        | 5/32 [00:38<02:54,  6.47s/it][A
Saving avg. matrices:  19%|█▉        | 6/32 [00:42<02:30,  5.79s/it][A
Saving avg. matrices:  22%|██▏       | 7/32 [00:46<02:06,  5.07s/it][A
Saving avg. matrices:  25%|██▌       | 8/32 [00:49<01:51,  4.63s/it][A
Saving avg. matrices:  28%|██▊       | 9/32 [00:55<01:53,  4.92s/it][A
Saving avg. matrices:  31%|███▏      | 10/32 [01:01<01:52,  5.10s/it][A
Saving avg. matrices:  34%|███▍      | 11/32 [01:07<01:55,  5.48s/it][A
Saving avg. matrices:  38%|███▊      | 12/32 [01:16<02:14,  6.70s/it][A
Saving avg. matrices:  41%|████      | 13/32 [01:26<02:24,  7.60s/it

  ✓ Vertical scores: (32, 32, 92)
  ✓ Kurtosis: (32, 32)

Trace 32/50: 111 steps
  Computing 32 layers × 32 heads...


  Layers:   0%|          | 0/32 [00:00<?, ?it/s]

Running on Windows, using CPU (len(tokens)=4283)
Running on CPU!
Analyzing text: You are Alex, an advanced artificial intelligence specializing in project management and strategic a...
After forward pass - GPU Memory: 0.00GB allocated, 0.00GB reserved
Time taken: 116.14 seconds (for 4283 tokens)
	*** Analysis complete! (float32=True) ***



Saving avg. matrices:   0%|          | 0/32 [00:00<?, ?it/s][A
Saving avg. matrices:   3%|▎         | 1/32 [00:14<07:16, 14.08s/it][A
Saving avg. matrices:   6%|▋         | 2/32 [00:26<06:35, 13.20s/it][A
Saving avg. matrices:   9%|▉         | 3/32 [00:34<05:12, 10.78s/it][A
Saving avg. matrices:  12%|█▎        | 4/32 [00:42<04:26,  9.52s/it][A
Saving avg. matrices:  16%|█▌        | 5/32 [00:50<04:02,  8.97s/it][A
Saving avg. matrices:  19%|█▉        | 6/32 [00:59<03:54,  9.03s/it][A
Saving avg. matrices:  22%|██▏       | 7/32 [01:04<03:11,  7.64s/it][A
Saving avg. matrices:  25%|██▌       | 8/32 [01:09<02:44,  6.85s/it][A
Saving avg. matrices:  28%|██▊       | 9/32 [01:14<02:28,  6.46s/it][A
Saving avg. matrices:  31%|███▏      | 10/32 [01:19<02:09,  5.88s/it][A
Saving avg. matrices:  34%|███▍      | 11/32 [01:23<01:54,  5.43s/it][A
Saving avg. matrices:  38%|███▊      | 12/32 [01:28<01:41,  5.09s/it][A
Saving avg. matrices:  41%|████      | 13/32 [01:32<01:32,  4.88s/it

  ✓ Vertical scores: (32, 32, 111)
  ✓ Kurtosis: (32, 32)

Trace 33/50: 86 steps
  Computing 32 layers × 32 heads...


  Layers:   0%|          | 0/32 [00:00<?, ?it/s]

Running on Windows, using CPU (len(tokens)=3841)
Running on CPU!
Analyzing text: You are Alex, an advanced artificial intelligence specializing in project management and strategic a...
After forward pass - GPU Memory: 0.00GB allocated, 0.00GB reserved
Time taken: 103.97 seconds (for 3841 tokens)
	*** Analysis complete! (float32=True) ***



Saving avg. matrices:   0%|          | 0/32 [00:00<?, ?it/s][A
Saving avg. matrices:   3%|▎         | 1/32 [00:06<03:33,  6.90s/it][A
Saving avg. matrices:   6%|▋         | 2/32 [00:12<03:06,  6.21s/it][A
Saving avg. matrices:   9%|▉         | 3/32 [00:25<04:28,  9.26s/it][A
Saving avg. matrices:  12%|█▎        | 4/32 [00:33<04:07,  8.83s/it][A
Saving avg. matrices:  16%|█▌        | 5/32 [00:38<03:14,  7.22s/it][A
Saving avg. matrices:  19%|█▉        | 6/32 [00:41<02:32,  5.86s/it][A
Saving avg. matrices:  22%|██▏       | 7/32 [00:44<02:01,  4.84s/it][A
Saving avg. matrices:  25%|██▌       | 8/32 [00:47<01:43,  4.32s/it][A
Saving avg. matrices:  28%|██▊       | 9/32 [00:50<01:34,  4.12s/it][A
Saving avg. matrices:  31%|███▏      | 10/32 [00:54<01:26,  3.92s/it][A
Saving avg. matrices:  34%|███▍      | 11/32 [00:58<01:24,  4.05s/it][A
Saving avg. matrices:  38%|███▊      | 12/32 [01:02<01:21,  4.07s/it][A
Saving avg. matrices:  41%|████      | 13/32 [01:06<01:17,  4.09s/it

  ✓ Vertical scores: (32, 32, 86)
  ✓ Kurtosis: (32, 32)

Trace 34/50: 80 steps
  Computing 32 layers × 32 heads...


  Layers:   0%|          | 0/32 [00:00<?, ?it/s]

Running on Windows, using CPU (len(tokens)=3513)
Running on CPU!
Analyzing text: You are Alex, an advanced artificial intelligence specializing in project management and strategic a...
After forward pass - GPU Memory: 0.00GB allocated, 0.00GB reserved
Time taken: 96.49 seconds (for 3513 tokens)
	*** Analysis complete! (float32=True) ***



Saving avg. matrices:   0%|          | 0/32 [00:00<?, ?it/s][A
Saving avg. matrices:   3%|▎         | 1/32 [00:08<04:38,  8.97s/it][A
Saving avg. matrices:   6%|▋         | 2/32 [00:17<04:14,  8.47s/it][A
Saving avg. matrices:   9%|▉         | 3/32 [00:25<04:09,  8.61s/it][A
Saving avg. matrices:  12%|█▎        | 4/32 [00:36<04:23,  9.39s/it][A
Saving avg. matrices:  16%|█▌        | 5/32 [00:41<03:30,  7.80s/it][A
Saving avg. matrices:  19%|█▉        | 6/32 [00:43<02:34,  5.94s/it][A
Saving avg. matrices:  22%|██▏       | 7/32 [00:46<01:59,  4.78s/it][A
Saving avg. matrices:  25%|██▌       | 8/32 [00:48<01:35,  4.00s/it][A
Saving avg. matrices:  28%|██▊       | 9/32 [00:50<01:19,  3.47s/it][A
Saving avg. matrices:  31%|███▏      | 10/32 [00:53<01:08,  3.11s/it][A
Saving avg. matrices:  34%|███▍      | 11/32 [00:55<00:59,  2.85s/it][A
Saving avg. matrices:  38%|███▊      | 12/32 [00:57<00:53,  2.66s/it][A
Saving avg. matrices:  41%|████      | 13/32 [00:59<00:48,  2.58s/it

  ✓ Vertical scores: (32, 32, 80)
  ✓ Kurtosis: (32, 32)

Trace 35/50: 110 steps
  Computing 32 layers × 32 heads...


  Layers:   0%|          | 0/32 [00:00<?, ?it/s]

Running on Windows, using CPU (len(tokens)=4870)
Running on CPU!
Analyzing text: You are Alex, an advanced artificial intelligence specializing in project management and strategic a...
After forward pass - GPU Memory: 0.00GB allocated, 0.00GB reserved
Time taken: 160.23 seconds (for 4870 tokens)
	*** Analysis complete! (float32=True) ***



Saving avg. matrices:   0%|          | 0/32 [00:00<?, ?it/s][A
Saving avg. matrices:   3%|▎         | 1/32 [00:11<05:49, 11.28s/it][A
Saving avg. matrices:   6%|▋         | 2/32 [00:21<05:19, 10.65s/it][A
Saving avg. matrices:   9%|▉         | 3/32 [00:30<04:41,  9.71s/it][A
Saving avg. matrices:  12%|█▎        | 4/32 [00:36<04:00,  8.60s/it][A
Saving avg. matrices:  16%|█▌        | 5/32 [00:44<03:43,  8.28s/it][A
Saving avg. matrices:  19%|█▉        | 6/32 [00:52<03:33,  8.20s/it][A
Saving avg. matrices:  22%|██▏       | 7/32 [01:01<03:32,  8.48s/it][A
Saving avg. matrices:  25%|██▌       | 8/32 [01:10<03:25,  8.56s/it][A
Saving avg. matrices:  28%|██▊       | 9/32 [01:19<03:16,  8.56s/it][A
Saving avg. matrices:  31%|███▏      | 10/32 [01:28<03:13,  8.80s/it][A
Saving avg. matrices:  34%|███▍      | 11/32 [01:36<03:02,  8.71s/it][A
Saving avg. matrices:  38%|███▊      | 12/32 [01:45<02:54,  8.70s/it][A
Saving avg. matrices:  41%|████      | 13/32 [01:56<02:57,  9.33s/it

  ✓ Vertical scores: (32, 32, 110)
  ✓ Kurtosis: (32, 32)

Trace 36/50: 62 steps
  Computing 32 layers × 32 heads...


  Layers:   0%|          | 0/32 [00:00<?, ?it/s]

Running on Windows, using CPU (len(tokens)=3233)
Running on CPU!
Analyzing text: You are Alex, an advanced artificial intelligence specializing in project management and strategic a...
After forward pass - GPU Memory: 0.00GB allocated, 0.00GB reserved
Time taken: 91.34 seconds (for 3233 tokens)
	*** Analysis complete! (float32=True) ***



Saving avg. matrices:   0%|          | 0/32 [00:00<?, ?it/s][A
Saving avg. matrices:   3%|▎         | 1/32 [00:07<03:39,  7.07s/it][A
Saving avg. matrices:   6%|▋         | 2/32 [00:14<03:34,  7.14s/it][A
Saving avg. matrices:   9%|▉         | 3/32 [00:16<02:22,  4.90s/it][A
Saving avg. matrices:  12%|█▎        | 4/32 [00:18<01:45,  3.78s/it][A
Saving avg. matrices:  16%|█▌        | 5/32 [00:20<01:24,  3.13s/it][A
Saving avg. matrices:  19%|█▉        | 6/32 [00:22<01:10,  2.72s/it][A
Saving avg. matrices:  22%|██▏       | 7/32 [00:24<01:00,  2.43s/it][A
Saving avg. matrices:  25%|██▌       | 8/32 [00:26<00:54,  2.26s/it][A
Saving avg. matrices:  28%|██▊       | 9/32 [00:28<00:51,  2.23s/it][A
Saving avg. matrices:  31%|███▏      | 10/32 [00:30<00:48,  2.20s/it][A
Saving avg. matrices:  34%|███▍      | 11/32 [00:33<00:48,  2.31s/it][A
Saving avg. matrices:  38%|███▊      | 12/32 [00:35<00:50,  2.50s/it][A
Saving avg. matrices:  41%|████      | 13/32 [00:38<00:48,  2.56s/it

  ✓ Vertical scores: (32, 32, 62)
  ✓ Kurtosis: (32, 32)

Trace 37/50: 69 steps
  Computing 32 layers × 32 heads...


  Layers:   0%|          | 0/32 [00:00<?, ?it/s]

Running on Windows, using CPU (len(tokens)=3077)
Running on CPU!
Analyzing text: You are Alex, an advanced artificial intelligence specializing in project management and strategic a...
After forward pass - GPU Memory: 0.00GB allocated, 0.00GB reserved
Time taken: 85.19 seconds (for 3077 tokens)
	*** Analysis complete! (float32=True) ***



Saving avg. matrices:   0%|          | 0/32 [00:00<?, ?it/s][A
Saving avg. matrices:   3%|▎         | 1/32 [00:08<04:33,  8.84s/it][A
Saving avg. matrices:   6%|▋         | 2/32 [00:14<03:31,  7.07s/it][A
Saving avg. matrices:   9%|▉         | 3/32 [00:17<02:30,  5.19s/it][A
Saving avg. matrices:  12%|█▎        | 4/32 [00:19<01:47,  3.84s/it][A
Saving avg. matrices:  16%|█▌        | 5/32 [00:21<01:23,  3.09s/it][A
Saving avg. matrices:  19%|█▉        | 6/32 [00:22<01:08,  2.64s/it][A
Saving avg. matrices:  22%|██▏       | 7/32 [00:24<00:57,  2.28s/it][A
Saving avg. matrices:  25%|██▌       | 8/32 [00:26<00:49,  2.07s/it][A
Saving avg. matrices:  28%|██▊       | 9/32 [00:27<00:44,  1.95s/it][A
Saving avg. matrices:  31%|███▏      | 10/32 [00:29<00:41,  1.88s/it][A
Saving avg. matrices:  34%|███▍      | 11/32 [00:31<00:38,  1.84s/it][A
Saving avg. matrices:  38%|███▊      | 12/32 [00:33<00:36,  1.85s/it][A
Saving avg. matrices:  41%|████      | 13/32 [00:35<00:39,  2.07s/it

  ✓ Vertical scores: (32, 32, 69)
  ✓ Kurtosis: (32, 32)

Trace 38/50: 61 steps
  Computing 32 layers × 32 heads...


  Layers:   0%|          | 0/32 [00:00<?, ?it/s]

Running on Windows, using CPU (len(tokens)=3228)
Running on CPU!
Analyzing text: You are Alex, an advanced artificial intelligence specializing in project management and strategic a...
After forward pass - GPU Memory: 0.00GB allocated, 0.00GB reserved
Time taken: 89.28 seconds (for 3228 tokens)
	*** Analysis complete! (float32=True) ***



Saving avg. matrices:   0%|          | 0/32 [00:00<?, ?it/s][A
Saving avg. matrices:   3%|▎         | 1/32 [00:07<03:38,  7.06s/it][A
Saving avg. matrices:   6%|▋         | 2/32 [00:14<03:35,  7.17s/it][A
Saving avg. matrices:   9%|▉         | 3/32 [00:15<02:14,  4.65s/it][A
Saving avg. matrices:  12%|█▎        | 4/32 [00:19<01:54,  4.07s/it][A
Saving avg. matrices:  16%|█▌        | 5/32 [00:21<01:30,  3.35s/it][A
Saving avg. matrices:  19%|█▉        | 6/32 [00:23<01:17,  2.98s/it][A
Saving avg. matrices:  22%|██▏       | 7/32 [00:25<01:05,  2.62s/it][A
Saving avg. matrices:  25%|██▌       | 8/32 [00:27<00:56,  2.35s/it][A
Saving avg. matrices:  28%|██▊       | 9/32 [00:30<00:58,  2.54s/it][A
Saving avg. matrices:  31%|███▏      | 10/32 [00:32<00:53,  2.41s/it][A
Saving avg. matrices:  34%|███▍      | 11/32 [00:35<00:57,  2.72s/it][A
Saving avg. matrices:  38%|███▊      | 12/32 [00:38<00:54,  2.71s/it][A
Saving avg. matrices:  41%|████      | 13/32 [00:42<00:59,  3.12s/it

  ✓ Vertical scores: (32, 32, 61)
  ✓ Kurtosis: (32, 32)

Trace 39/50: 66 steps
  Computing 32 layers × 32 heads...


  Layers:   0%|          | 0/32 [00:00<?, ?it/s]

Running on Windows, using CPU (len(tokens)=3335)
Running on CPU!
Analyzing text: You are Alex, an advanced artificial intelligence specializing in project management and strategic a...
After forward pass - GPU Memory: 0.00GB allocated, 0.00GB reserved
Time taken: 93.30 seconds (for 3335 tokens)
	*** Analysis complete! (float32=True) ***



Saving avg. matrices:   0%|          | 0/32 [00:00<?, ?it/s][A
Saving avg. matrices:   3%|▎         | 1/32 [00:07<03:49,  7.42s/it][A
Saving avg. matrices:   6%|▋         | 2/32 [00:10<02:24,  4.81s/it][A
Saving avg. matrices:   9%|▉         | 3/32 [00:13<01:53,  3.92s/it][A
Saving avg. matrices:  12%|█▎        | 4/32 [00:16<01:45,  3.78s/it][A
Saving avg. matrices:  16%|█▌        | 5/32 [00:20<01:45,  3.89s/it][A
Saving avg. matrices:  19%|█▉        | 6/32 [00:25<01:44,  4.04s/it][A
Saving avg. matrices:  22%|██▏       | 7/32 [00:29<01:43,  4.15s/it][A
Saving avg. matrices:  25%|██▌       | 8/32 [00:34<01:42,  4.27s/it][A
Saving avg. matrices:  28%|██▊       | 9/32 [00:38<01:41,  4.41s/it][A
Saving avg. matrices:  31%|███▏      | 10/32 [00:44<01:44,  4.75s/it][A
Saving avg. matrices:  34%|███▍      | 11/32 [00:47<01:29,  4.26s/it][A
Saving avg. matrices:  38%|███▊      | 12/32 [00:51<01:25,  4.28s/it][A
Saving avg. matrices:  41%|████      | 13/32 [00:54<01:10,  3.69s/it

  ✓ Vertical scores: (32, 32, 66)
  ✓ Kurtosis: (32, 32)

Trace 40/50: 60 steps
  Computing 32 layers × 32 heads...


  Layers:   0%|          | 0/32 [00:00<?, ?it/s]

Running on Windows, using CPU (len(tokens)=3413)
Running on CPU!
Analyzing text: You are Alex, an advanced artificial intelligence specializing in project management and strategic a...
After forward pass - GPU Memory: 0.00GB allocated, 0.00GB reserved
Time taken: 90.46 seconds (for 3413 tokens)
	*** Analysis complete! (float32=True) ***



Saving avg. matrices:   0%|          | 0/32 [00:00<?, ?it/s][A
Saving avg. matrices:   3%|▎         | 1/32 [00:10<05:15, 10.18s/it][A
Saving avg. matrices:   6%|▋         | 2/32 [00:16<04:01,  8.05s/it][A
Saving avg. matrices:   9%|▉         | 3/32 [00:19<02:47,  5.79s/it][A
Saving avg. matrices:  12%|█▎        | 4/32 [00:21<02:00,  4.32s/it][A
Saving avg. matrices:  16%|█▌        | 5/32 [00:25<01:45,  3.90s/it][A
Saving avg. matrices:  19%|█▉        | 6/32 [00:28<01:33,  3.61s/it][A
Saving avg. matrices:  22%|██▏       | 7/32 [00:30<01:22,  3.29s/it][A
Saving avg. matrices:  25%|██▌       | 8/32 [00:33<01:14,  3.11s/it][A
Saving avg. matrices:  28%|██▊       | 9/32 [00:37<01:16,  3.34s/it][A
Saving avg. matrices:  31%|███▏      | 10/32 [00:41<01:22,  3.73s/it][A
Saving avg. matrices:  34%|███▍      | 11/32 [00:46<01:21,  3.86s/it][A
Saving avg. matrices:  38%|███▊      | 12/32 [00:50<01:17,  3.90s/it][A
Saving avg. matrices:  41%|████      | 13/32 [00:54<01:14,  3.94s/it

  ✓ Vertical scores: (32, 32, 60)
  ✓ Kurtosis: (32, 32)

Trace 41/50: 67 steps
  Computing 32 layers × 32 heads...


  Layers:   0%|          | 0/32 [00:00<?, ?it/s]

Running on Windows, using CPU (len(tokens)=3376)
Running on CPU!
Analyzing text: You are Alex, an advanced artificial intelligence specializing in project management and strategic a...
After forward pass - GPU Memory: 0.00GB allocated, 0.00GB reserved
Time taken: 90.44 seconds (for 3376 tokens)
	*** Analysis complete! (float32=True) ***



Saving avg. matrices:   0%|          | 0/32 [00:00<?, ?it/s][A
Saving avg. matrices:   3%|▎         | 1/32 [00:07<03:40,  7.12s/it][A
Saving avg. matrices:   6%|▋         | 2/32 [00:16<04:10,  8.35s/it][A
Saving avg. matrices:   9%|▉         | 3/32 [00:23<03:48,  7.88s/it][A
Saving avg. matrices:  12%|█▎        | 4/32 [00:26<02:44,  5.86s/it][A
Saving avg. matrices:  16%|█▌        | 5/32 [00:29<02:08,  4.76s/it][A
Saving avg. matrices:  19%|█▉        | 6/32 [00:31<01:40,  3.88s/it][A
Saving avg. matrices:  22%|██▏       | 7/32 [00:33<01:20,  3.23s/it][A
Saving avg. matrices:  25%|██▌       | 8/32 [00:35<01:06,  2.76s/it][A
Saving avg. matrices:  28%|██▊       | 9/32 [00:36<00:57,  2.49s/it][A
Saving avg. matrices:  31%|███▏      | 10/32 [00:38<00:50,  2.32s/it][A
Saving avg. matrices:  34%|███▍      | 11/32 [00:40<00:46,  2.19s/it][A
Saving avg. matrices:  38%|███▊      | 12/32 [00:42<00:41,  2.10s/it][A
Saving avg. matrices:  41%|████      | 13/32 [00:44<00:38,  2.05s/it

  ✓ Vertical scores: (32, 32, 67)
  ✓ Kurtosis: (32, 32)

Trace 42/50: 80 steps
  Computing 32 layers × 32 heads...


  Layers:   0%|          | 0/32 [00:00<?, ?it/s]

Running on Windows, using CPU (len(tokens)=3952)
Running on CPU!
Analyzing text: You are Alex, an advanced artificial intelligence specializing in project management and strategic a...
After forward pass - GPU Memory: 0.00GB allocated, 0.00GB reserved
Time taken: 107.21 seconds (for 3952 tokens)
	*** Analysis complete! (float32=True) ***



Saving avg. matrices:   0%|          | 0/32 [00:00<?, ?it/s][A
Saving avg. matrices:   3%|▎         | 1/32 [00:11<05:57, 11.53s/it][A
Saving avg. matrices:   6%|▋         | 2/32 [00:23<06:02, 12.07s/it][A
Saving avg. matrices:   9%|▉         | 3/32 [00:33<05:12, 10.78s/it][A
Saving avg. matrices:  12%|█▎        | 4/32 [00:37<03:50,  8.21s/it][A
Saving avg. matrices:  16%|█▌        | 5/32 [00:41<02:59,  6.63s/it][A
Saving avg. matrices:  19%|█▉        | 6/32 [00:45<02:32,  5.85s/it][A
Saving avg. matrices:  22%|██▏       | 7/32 [00:49<02:08,  5.12s/it][A
Saving avg. matrices:  25%|██▌       | 8/32 [00:53<01:54,  4.75s/it][A
Saving avg. matrices:  28%|██▊       | 9/32 [00:58<01:55,  5.04s/it][A
Saving avg. matrices:  31%|███▏      | 10/32 [01:04<01:56,  5.31s/it][A
Saving avg. matrices:  34%|███▍      | 11/32 [01:10<01:51,  5.32s/it][A
Saving avg. matrices:  38%|███▊      | 12/32 [01:16<01:50,  5.53s/it][A
Saving avg. matrices:  41%|████      | 13/32 [01:21<01:42,  5.40s/it

  ✓ Vertical scores: (32, 32, 80)
  ✓ Kurtosis: (32, 32)

Trace 43/50: 90 steps
  Computing 32 layers × 32 heads...


  Layers:   0%|          | 0/32 [00:00<?, ?it/s]

Running on Windows, using CPU (len(tokens)=4604)
Running on CPU!
Analyzing text: You are Alex, an advanced artificial intelligence specializing in project management and strategic a...
After forward pass - GPU Memory: 0.00GB allocated, 0.00GB reserved
Time taken: 136.78 seconds (for 4604 tokens)
	*** Analysis complete! (float32=True) ***



Saving avg. matrices:   0%|          | 0/32 [00:00<?, ?it/s][A
Saving avg. matrices:   3%|▎         | 1/32 [00:15<08:11, 15.85s/it][A
Saving avg. matrices:   6%|▋         | 2/32 [00:21<04:47,  9.59s/it][A
Saving avg. matrices:   9%|▉         | 3/32 [00:25<03:32,  7.31s/it][A
Saving avg. matrices:  12%|█▎        | 4/32 [00:30<02:52,  6.16s/it][A
Saving avg. matrices:  16%|█▌        | 5/32 [00:33<02:23,  5.30s/it][A
Saving avg. matrices:  19%|█▉        | 6/32 [00:37<02:02,  4.71s/it][A
Saving avg. matrices:  22%|██▏       | 7/32 [00:41<01:49,  4.37s/it][A
Saving avg. matrices:  25%|██▌       | 8/32 [00:44<01:40,  4.19s/it][A
Saving avg. matrices:  28%|██▊       | 9/32 [00:48<01:35,  4.15s/it][A
Saving avg. matrices:  31%|███▏      | 10/32 [00:53<01:36,  4.36s/it][A
Saving avg. matrices:  34%|███▍      | 11/32 [00:59<01:41,  4.82s/it][A
Saving avg. matrices:  38%|███▊      | 12/32 [01:05<01:42,  5.11s/it][A
Saving avg. matrices:  41%|████      | 13/32 [01:13<01:53,  5.98s/it

  ✓ Vertical scores: (32, 32, 90)
  ✓ Kurtosis: (32, 32)

Trace 44/50: 156 steps
  Computing 32 layers × 32 heads...


  Layers:   0%|          | 0/32 [00:00<?, ?it/s]

Running on Windows, using CPU (len(tokens)=3163)
Running on CPU!
Analyzing text: You are Alex, an advanced artificial intelligence specializing in project management and strategic a...
After forward pass - GPU Memory: 0.00GB allocated, 0.00GB reserved
Time taken: 91.05 seconds (for 3163 tokens)
	*** Analysis complete! (float32=True) ***



Saving avg. matrices:   0%|          | 0/32 [00:00<?, ?it/s][A
Saving avg. matrices:   3%|▎         | 1/32 [00:05<03:01,  5.85s/it][A
Saving avg. matrices:   6%|▋         | 2/32 [00:11<02:57,  5.93s/it][A
Saving avg. matrices:   9%|▉         | 3/32 [00:18<02:56,  6.08s/it][A
Saving avg. matrices:  12%|█▎        | 4/32 [00:24<02:55,  6.28s/it][A
Saving avg. matrices:  16%|█▌        | 5/32 [00:31<02:50,  6.32s/it][A
Saving avg. matrices:  19%|█▉        | 6/32 [00:37<02:43,  6.29s/it][A
Saving avg. matrices:  22%|██▏       | 7/32 [00:43<02:38,  6.34s/it][A
Saving avg. matrices:  25%|██▌       | 8/32 [00:51<02:40,  6.67s/it][A
Saving avg. matrices:  28%|██▊       | 9/32 [01:01<02:57,  7.73s/it][A
Saving avg. matrices:  31%|███▏      | 10/32 [01:08<02:48,  7.65s/it][A
Saving avg. matrices:  34%|███▍      | 11/32 [01:16<02:41,  7.71s/it][A
Saving avg. matrices:  38%|███▊      | 12/32 [01:23<02:31,  7.59s/it][A
Saving avg. matrices:  41%|████      | 13/32 [01:27<02:04,  6.55s/it

  ✓ Vertical scores: (32, 32, 156)
  ✓ Kurtosis: (32, 32)

Trace 45/50: 123 steps
  Computing 32 layers × 32 heads...


  Layers:   0%|          | 0/32 [00:00<?, ?it/s]

Running on Windows, using CPU (len(tokens)=5126)
Running on CPU!
Analyzing text: You are Alex, an advanced artificial intelligence specializing in project management and strategic a...
After forward pass - GPU Memory: 0.00GB allocated, 0.00GB reserved
Time taken: 153.56 seconds (for 5126 tokens)
	*** Analysis complete! (float32=True) ***



Saving avg. matrices:   0%|          | 0/32 [00:00<?, ?it/s][A
Saving avg. matrices:   3%|▎         | 1/32 [00:10<05:22, 10.42s/it][A
Saving avg. matrices:   6%|▋         | 2/32 [00:19<04:48,  9.61s/it][A
Saving avg. matrices:   9%|▉         | 3/32 [00:27<04:22,  9.05s/it][A
Saving avg. matrices:  12%|█▎        | 4/32 [00:35<04:00,  8.61s/it][A
Saving avg. matrices:  16%|█▌        | 5/32 [00:45<04:00,  8.90s/it][A
Saving avg. matrices:  19%|█▉        | 6/32 [00:54<03:51,  8.91s/it][A
Saving avg. matrices:  22%|██▏       | 7/32 [01:05<04:01,  9.67s/it][A
Saving avg. matrices:  25%|██▌       | 8/32 [01:16<04:02, 10.09s/it][A
Saving avg. matrices:  28%|██▊       | 9/32 [01:31<04:29, 11.71s/it][A
Saving avg. matrices:  31%|███▏      | 10/32 [01:43<04:21, 11.90s/it][A
Saving avg. matrices:  34%|███▍      | 11/32 [01:55<04:10, 11.92s/it][A
Saving avg. matrices:  38%|███▊      | 12/32 [02:09<04:10, 12.50s/it][A
Saving avg. matrices:  41%|████      | 13/32 [02:24<04:09, 13.16s/it

  ✓ Vertical scores: (32, 32, 123)
  ✓ Kurtosis: (32, 32)

Trace 46/50: 69 steps
  Computing 32 layers × 32 heads...


  Layers:   0%|          | 0/32 [00:00<?, ?it/s]

Running on Windows, using CPU (len(tokens)=3278)
Running on CPU!
Analyzing text: You are Alex, an advanced artificial intelligence specializing in project management and strategic a...
After forward pass - GPU Memory: 0.00GB allocated, 0.00GB reserved
Time taken: 99.59 seconds (for 3278 tokens)
	*** Analysis complete! (float32=True) ***



Saving avg. matrices:   0%|          | 0/32 [00:00<?, ?it/s][A
Saving avg. matrices:   3%|▎         | 1/32 [00:06<03:20,  6.45s/it][A
Saving avg. matrices:   6%|▋         | 2/32 [00:09<02:18,  4.62s/it][A
Saving avg. matrices:   9%|▉         | 3/32 [00:11<01:35,  3.30s/it][A
Saving avg. matrices:  12%|█▎        | 4/32 [00:14<01:28,  3.17s/it][A
Saving avg. matrices:  16%|█▌        | 5/32 [00:17<01:27,  3.23s/it][A
Saving avg. matrices:  19%|█▉        | 6/32 [00:20<01:14,  2.87s/it][A
Saving avg. matrices:  22%|██▏       | 7/32 [00:23<01:16,  3.07s/it][A
Saving avg. matrices:  25%|██▌       | 8/32 [00:26<01:09,  2.90s/it][A
Saving avg. matrices:  28%|██▊       | 9/32 [00:29<01:08,  2.98s/it][A
Saving avg. matrices:  31%|███▏      | 10/32 [00:32<01:07,  3.07s/it][A
Saving avg. matrices:  34%|███▍      | 11/32 [00:35<01:04,  3.06s/it][A
Saving avg. matrices:  38%|███▊      | 12/32 [00:39<01:06,  3.32s/it][A
Saving avg. matrices:  41%|████      | 13/32 [00:42<01:01,  3.25s/it

  ✓ Vertical scores: (32, 32, 69)
  ✓ Kurtosis: (32, 32)

Trace 47/50: 169 steps
  Computing 32 layers × 32 heads...


  Layers:   0%|          | 0/32 [00:00<?, ?it/s]

Running on Windows, using CPU (len(tokens)=3797)
Running on CPU!
Analyzing text: You are Alex, an advanced artificial intelligence specializing in project management and strategic a...
After forward pass - GPU Memory: 0.00GB allocated, 0.00GB reserved
Time taken: 108.24 seconds (for 3797 tokens)
	*** Analysis complete! (float32=True) ***



Saving avg. matrices:   0%|          | 0/32 [00:00<?, ?it/s][A
Saving avg. matrices:   3%|▎         | 1/32 [00:12<06:15, 12.13s/it][A
Saving avg. matrices:   6%|▋         | 2/32 [00:21<05:16, 10.55s/it][A
Saving avg. matrices:   9%|▉         | 3/32 [00:30<04:50, 10.03s/it][A
Saving avg. matrices:  12%|█▎        | 4/32 [00:41<04:41, 10.05s/it][A
Saving avg. matrices:  16%|█▌        | 5/32 [00:50<04:28,  9.95s/it][A
Saving avg. matrices:  19%|█▉        | 6/32 [01:01<04:23, 10.14s/it][A
Saving avg. matrices:  22%|██▏       | 7/32 [01:12<04:18, 10.34s/it][A
Saving avg. matrices:  25%|██▌       | 8/32 [01:21<03:57,  9.90s/it][A
Saving avg. matrices:  28%|██▊       | 9/32 [01:31<03:49,  9.97s/it][A
Saving avg. matrices:  31%|███▏      | 10/32 [01:39<03:26,  9.40s/it][A
Saving avg. matrices:  34%|███▍      | 11/32 [01:48<03:18,  9.47s/it][A
Saving avg. matrices:  38%|███▊      | 12/32 [01:59<03:14,  9.74s/it][A
Saving avg. matrices:  41%|████      | 13/32 [02:09<03:07,  9.86s/it

  ✓ Vertical scores: (32, 32, 169)
  ✓ Kurtosis: (32, 32)

Trace 48/50: 74 steps
  Computing 32 layers × 32 heads...


  Layers:   0%|          | 0/32 [00:00<?, ?it/s]

Running on Windows, using CPU (len(tokens)=3520)
Running on CPU!
Analyzing text: You are Alex, an advanced artificial intelligence specializing in project management and strategic a...
After forward pass - GPU Memory: 0.00GB allocated, 0.00GB reserved
Time taken: 89.99 seconds (for 3520 tokens)
	*** Analysis complete! (float32=True) ***



Saving avg. matrices:   0%|          | 0/32 [00:00<?, ?it/s][A
Saving avg. matrices:   3%|▎         | 1/32 [00:04<02:20,  4.54s/it][A
Saving avg. matrices:   6%|▋         | 2/32 [00:11<03:03,  6.12s/it][A
Saving avg. matrices:   9%|▉         | 3/32 [00:21<03:49,  7.91s/it][A
Saving avg. matrices:  12%|█▎        | 4/32 [00:29<03:35,  7.69s/it][A
Saving avg. matrices:  16%|█▌        | 5/32 [00:36<03:21,  7.48s/it][A
Saving avg. matrices:  19%|█▉        | 6/32 [00:40<02:43,  6.30s/it][A
Saving avg. matrices:  22%|██▏       | 7/32 [00:42<02:07,  5.11s/it][A
Saving avg. matrices:  25%|██▌       | 8/32 [00:45<01:41,  4.22s/it][A
Saving avg. matrices:  28%|██▊       | 9/32 [00:47<01:23,  3.63s/it][A
Saving avg. matrices:  31%|███▏      | 10/32 [00:49<01:10,  3.23s/it][A
Saving avg. matrices:  34%|███▍      | 11/32 [00:52<01:02,  2.97s/it][A
Saving avg. matrices:  38%|███▊      | 12/32 [00:54<00:55,  2.76s/it][A
Saving avg. matrices:  41%|████      | 13/32 [00:56<00:49,  2.59s/it

  ✓ Vertical scores: (32, 32, 74)
  ✓ Kurtosis: (32, 32)

Trace 49/50: 70 steps
  Computing 32 layers × 32 heads...


  Layers:   0%|          | 0/32 [00:00<?, ?it/s]

Running on Windows, using CPU (len(tokens)=3718)
Running on CPU!
Analyzing text: You are Alex, an advanced artificial intelligence specializing in project management and strategic a...
After forward pass - GPU Memory: 0.00GB allocated, 0.00GB reserved
Time taken: 100.80 seconds (for 3718 tokens)
	*** Analysis complete! (float32=True) ***



Saving avg. matrices:   0%|          | 0/32 [00:00<?, ?it/s][A
Saving avg. matrices:   3%|▎         | 1/32 [00:08<04:37,  8.96s/it][A
Saving avg. matrices:   6%|▋         | 2/32 [00:15<03:37,  7.25s/it][A
Saving avg. matrices:   9%|▉         | 3/32 [00:22<03:31,  7.29s/it][A
Saving avg. matrices:  12%|█▎        | 4/32 [00:30<03:35,  7.71s/it][A
Saving avg. matrices:  16%|█▌        | 5/32 [00:36<03:06,  6.90s/it][A
Saving avg. matrices:  19%|█▉        | 6/32 [00:39<02:31,  5.82s/it][A
Saving avg. matrices:  22%|██▏       | 7/32 [00:43<02:06,  5.07s/it][A
Saving avg. matrices:  25%|██▌       | 8/32 [00:47<01:54,  4.76s/it][A
Saving avg. matrices:  28%|██▊       | 9/32 [00:52<01:48,  4.74s/it][A
Saving avg. matrices:  31%|███▏      | 10/32 [00:56<01:43,  4.72s/it][A
Saving avg. matrices:  34%|███▍      | 11/32 [01:00<01:35,  4.52s/it][A
Saving avg. matrices:  38%|███▊      | 12/32 [01:05<01:29,  4.47s/it][A
Saving avg. matrices:  41%|████      | 13/32 [01:09<01:25,  4.52s/it

  ✓ Vertical scores: (32, 32, 70)
  ✓ Kurtosis: (32, 32)

Trace 50/50: 182 steps
  Computing 32 layers × 32 heads...


  Layers:   0%|          | 0/32 [00:00<?, ?it/s]

Running on Windows, using CPU (len(tokens)=4395)
Running on CPU!
Analyzing text: You are Alex, an advanced artificial intelligence specializing in project management and strategic a...
After forward pass - GPU Memory: 0.00GB allocated, 0.00GB reserved
Time taken: 114.42 seconds (for 4395 tokens)
	*** Analysis complete! (float32=True) ***



Saving avg. matrices:   0%|          | 0/32 [00:00<?, ?it/s][A
Saving avg. matrices:   3%|▎         | 1/32 [00:15<07:45, 15.03s/it][A
Saving avg. matrices:   6%|▋         | 2/32 [00:24<05:54, 11.81s/it][A
Saving avg. matrices:   9%|▉         | 3/32 [00:33<05:07, 10.62s/it][A
Saving avg. matrices:  12%|█▎        | 4/32 [00:41<04:22,  9.38s/it][A
Saving avg. matrices:  16%|█▌        | 5/32 [00:49<04:03,  9.01s/it][A
Saving avg. matrices:  19%|█▉        | 6/32 [00:58<03:50,  8.85s/it][A
Saving avg. matrices:  22%|██▏       | 7/32 [01:05<03:25,  8.24s/it][A
Saving avg. matrices:  25%|██▌       | 8/32 [01:12<03:08,  7.84s/it][A
Saving avg. matrices:  28%|██▊       | 9/32 [01:20<03:05,  8.09s/it][A
Saving avg. matrices:  31%|███▏      | 10/32 [01:29<03:03,  8.36s/it][A
Saving avg. matrices:  34%|███▍      | 11/32 [01:42<03:21,  9.57s/it][A
Saving avg. matrices:  38%|███▊      | 12/32 [01:54<03:30, 10.54s/it][A
Saving avg. matrices:  41%|████      | 13/32 [02:07<03:32, 11.20s/it

  ✓ Vertical scores: (32, 32, 182)
  ✓ Kurtosis: (32, 32)


In [7]:
blackmail_kurts_mean = np.mean(blackmail_kurts_list, axis=0)
print(f"\n✓ Average blackmail kurtosis shape: {blackmail_kurts_mean.shape}")


✓ Average blackmail kurtosis shape: (32, 32)


In [8]:
blackmail_receiver_heads = get_top_k_heads(blackmail_kurts_mean, k=20)

print(f"\nTop-20 Receiver Heads:")
for i, (layer, head) in enumerate(blackmail_receiver_heads):
    kurt = blackmail_kurts_mean[layer, head]
    print(f"  {i+1:2d}. Layer {layer:2d}, Head {head:2d} (κ={kurt:6.2f})")


Top-20 Receiver Heads:
   1. Layer 25, Head 15 (κ= 46.88)
   2. Layer  2, Head 21 (κ= 39.19)
   3. Layer 25, Head  4 (κ= 37.68)
   4. Layer  2, Head 12 (κ= 36.64)
   5. Layer 19, Head 17 (κ= 33.56)
   6. Layer 27, Head 12 (κ= 32.23)
   7. Layer 30, Head 17 (κ= 32.10)
   8. Layer 22, Head  5 (κ= 31.97)
   9. Layer  1, Head 19 (κ= 31.51)
  10. Layer  5, Head 30 (κ= 31.23)
  11. Layer 26, Head 11 (κ= 31.20)
  12. Layer 27, Head 13 (κ= 31.18)
  13. Layer 24, Head  5 (κ= 31.00)
  14. Layer 12, Head 17 (κ= 30.59)
  15. Layer 18, Head 12 (κ= 30.42)
  16. Layer 26, Head  8 (κ= 29.65)
  17. Layer  5, Head 24 (κ= 29.58)
  18. Layer 20, Head  6 (κ= 29.56)
  19. Layer 20, Head 16 (κ= 29.50)
  20. Layer  5, Head  4 (κ= 29.47)


## Acceptance Heads

In [9]:
acceptance_kurts_list = []

for i, trace_dict in enumerate(acceptance_traces):
    text = trace_dict['trace']
    sentences = extract_reasoning_steps(text)
    
    print(f"\nTrace {i+1}/{len(acceptance_traces)}: {len(sentences)} steps")
    
    layer_head_verts = get_all_heads_vert_scores_simple(
        text=text,
        sentences=sentences,
        model_name="qwen-15b",
        proximity_ignore=2,
        control_depth=False,
        score_type="mean"
    )
    
    print(f"  ✓ Vertical scores: {layer_head_verts.shape}")
    
    kurts = get_kurtosis(layer_head_verts)
    print(f"  ✓ Kurtosis: {kurts.shape}")
    
    acceptance_kurts_list.append(kurts)


Trace 1/50: 62 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 62)
  ✓ Kurtosis: (28, 12)

Trace 2/50: 75 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 75)
  ✓ Kurtosis: (28, 12)

Trace 3/50: 84 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 84)
  ✓ Kurtosis: (28, 12)

Trace 4/50: 149 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 149)
  ✓ Kurtosis: (28, 12)

Trace 5/50: 78 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 78)
  ✓ Kurtosis: (28, 12)

Trace 6/50: 87 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 87)
  ✓ Kurtosis: (28, 12)

Trace 7/50: 80 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 80)
  ✓ Kurtosis: (28, 12)

Trace 8/50: 88 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 88)
  ✓ Kurtosis: (28, 12)

Trace 9/50: 95 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 95)
  ✓ Kurtosis: (28, 12)

Trace 10/50: 153 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 153)
  ✓ Kurtosis: (28, 12)

Trace 11/50: 168 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 168)
  ✓ Kurtosis: (28, 12)

Trace 12/50: 164 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 164)
  ✓ Kurtosis: (28, 12)

Trace 13/50: 168 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 168)
  ✓ Kurtosis: (28, 12)

Trace 14/50: 167 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 167)
  ✓ Kurtosis: (28, 12)

Trace 15/50: 166 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 166)
  ✓ Kurtosis: (28, 12)

Trace 16/50: 170 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 170)
  ✓ Kurtosis: (28, 12)

Trace 17/50: 195 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 195)
  ✓ Kurtosis: (28, 12)

Trace 18/50: 177 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 177)
  ✓ Kurtosis: (28, 12)

Trace 19/50: 66 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 66)
  ✓ Kurtosis: (28, 12)

Trace 20/50: 168 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 168)
  ✓ Kurtosis: (28, 12)

Trace 21/50: 151 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 151)
  ✓ Kurtosis: (28, 12)

Trace 22/50: 169 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 169)
  ✓ Kurtosis: (28, 12)

Trace 23/50: 147 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 147)
  ✓ Kurtosis: (28, 12)

Trace 24/50: 69 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 69)
  ✓ Kurtosis: (28, 12)

Trace 25/50: 68 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 68)
  ✓ Kurtosis: (28, 12)

Trace 26/50: 158 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 158)
  ✓ Kurtosis: (28, 12)

Trace 27/50: 177 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 177)
  ✓ Kurtosis: (28, 12)

Trace 28/50: 177 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 177)
  ✓ Kurtosis: (28, 12)

Trace 29/50: 68 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 68)
  ✓ Kurtosis: (28, 12)

Trace 30/50: 226 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 226)
  ✓ Kurtosis: (28, 12)

Trace 31/50: 88 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 88)
  ✓ Kurtosis: (28, 12)

Trace 32/50: 218 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 218)
  ✓ Kurtosis: (28, 12)

Trace 33/50: 194 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 194)
  ✓ Kurtosis: (28, 12)

Trace 34/50: 86 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 86)
  ✓ Kurtosis: (28, 12)

Trace 35/50: 98 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 98)
  ✓ Kurtosis: (28, 12)

Trace 36/50: 211 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 211)
  ✓ Kurtosis: (28, 12)

Trace 37/50: 63 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 63)
  ✓ Kurtosis: (28, 12)

Trace 38/50: 99 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 99)
  ✓ Kurtosis: (28, 12)

Trace 39/50: 93 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 93)
  ✓ Kurtosis: (28, 12)

Trace 40/50: 133 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 133)
  ✓ Kurtosis: (28, 12)

Trace 41/50: 83 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 83)
  ✓ Kurtosis: (28, 12)

Trace 42/50: 115 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 115)
  ✓ Kurtosis: (28, 12)

Trace 43/50: 163 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 163)
  ✓ Kurtosis: (28, 12)

Trace 44/50: 177 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 177)
  ✓ Kurtosis: (28, 12)

Trace 45/50: 241 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 241)
  ✓ Kurtosis: (28, 12)

Trace 46/50: 87 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 87)
  ✓ Kurtosis: (28, 12)

Trace 47/50: 172 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 172)
  ✓ Kurtosis: (28, 12)

Trace 48/50: 146 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 146)
  ✓ Kurtosis: (28, 12)

Trace 49/50: 197 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 197)
  ✓ Kurtosis: (28, 12)

Trace 50/50: 88 steps
  Computing 28 layers × 12 heads...


                                                         

  ✓ Vertical scores: (28, 12, 88)
  ✓ Kurtosis: (28, 12)




In [10]:
acceptance_kurts_mean = np.mean(acceptance_kurts_list, axis=0)
print(f"\n✓ Average acceptance kurtosis: {acceptance_kurts_mean.shape}")


✓ Average acceptance kurtosis: (28, 12)


In [11]:
acceptance_receiver_heads = get_top_k_heads(acceptance_kurts_mean, k=20)

print(f"\nTop-20 Receiver Heads:")
for i, (layer, head) in enumerate(acceptance_receiver_heads):
    kurt = acceptance_kurts_mean[layer, head]
    print(f"  {i+1:2d}. Layer {layer:2d}, Head {head:2d} (κ={kurt:6.2f})")


Top-20 Receiver Heads:
   1. Layer  0, Head  8 (κ= 98.77)
   2. Layer  6, Head  8 (κ= 83.04)
   3. Layer 14, Head  5 (κ= 81.39)
   4. Layer  3, Head  2 (κ= 80.72)
   5. Layer  3, Head  5 (κ= 79.15)
   6. Layer  5, Head  9 (κ= 76.90)
   7. Layer 25, Head  2 (κ= 76.77)
   8. Layer 24, Head  1 (κ= 76.33)
   9. Layer 24, Head  0 (κ= 75.41)
  10. Layer 11, Head  6 (κ= 75.19)
  11. Layer 22, Head  0 (κ= 74.30)
  12. Layer 23, Head  9 (κ= 74.19)
  13. Layer  6, Head 11 (κ= 74.16)
  14. Layer 17, Head 11 (κ= 73.81)
  15. Layer 12, Head  4 (κ= 73.28)
  16. Layer  3, Head 11 (κ= 72.05)
  17. Layer  6, Head 10 (κ= 70.62)
  18. Layer  4, Head  7 (κ= 70.48)
  19. Layer 22, Head  2 (κ= 70.38)
  20. Layer  1, Head  8 (κ= 69.07)
