In [1]:
import sqlite3
from pathlib import Path
from collections import defaultdict

# Load the autointerp database in sqlite
autointerp_path = Path("~/autointerp/r1-logic/autointerp.db").expanduser()
conn = sqlite3.connect("file:" + autointerp_path.as_posix() + "?mode=ro", uri=True) # URI mode for read-only access

# convenience function for running queries
def run_query(query: str, conn: sqlite3.Connection):
    res = conn.execute(query)

    column_names = [desc[0] for desc in res.description]
    data = [dict(zip(column_names, row)) for row in res.fetchall()]
    return data

In [2]:
print('Autointerp database example row:')
example = run_query("SELECT * FROM autointerp limit 1", conn)[0]

for col_name, col_value in example.items():
    print(f'{col_name}: {col_value}')

Autointerp database example row:
feature_id: 0
label: Cycles in graphs and algorithms
seqs: []
indices: []
quality: 0.9
interestingness: 0.6
model_name: claude-3-7-sonnet-latest
prompt_version: v0-dev


In [3]:
# print 10 feature labels
query = "SELECT feature_id, label FROM autointerp limit 10"
results = run_query(query, conn)

for row in results:
    print(f'{row["feature_id"]}: {row["label"]}')

0: Cycles in graphs and algorithms
1: Sorting collections and processing sorted data
3: Competitive programming input specification
5: Algorithm optimization
6: searching for a specific instance or solution
7: Prepositions indicating origin or source
8: Initializing variables to zero in programming
9: Mathematical calculation and numerical reasoning
10: Recognizing computational complexity limitations
11: Once a solution is found, transition to the next step


In [4]:
# attach the tokens db
tokens_path = Path("~/autointerp/r1-logic/tokens.db").expanduser()
conn.execute(f"ATTACH DATABASE '{tokens_path}' as tokens")

<sqlite3.Cursor at 0x7600b8d703c0>

In [5]:
print('Tokens database example row:')
example = run_query("SELECT * FROM tokens.tokens limit 1", conn)[0]

for col_name, col_value in example.items():
    print(f'{col_name}: {col_value}')

Tokens database example row:
sequence_id: 0
token_idx: 0
token_int: 0
token_text: <｜begin▁of▁sentence｜>


In [6]:
# attach the SAE latent activations db
feature_activations_path = Path("~/autointerp/r1-logic/feature_activations.db").expanduser()
conn.execute(f"ATTACH DATABASE '{feature_activations_path}' as feature_activations")

<sqlite3.Cursor at 0x7600b8d70cc0>

In [7]:
print('SAE latent activations example row:')
example = run_query("SELECT * FROM feature_activations.activations limit 1", conn)[0]

for col_name, col_value in example.items():
    print(f'{col_name}: {col_value}')

SAE latent activations example row:
feature_id: 0
sequence_id: 15
token_idx: 36
strength: 0.2251070737838745


In [8]:
# Here, we will join tables to get the top k activations for a given feature, then print the subsequences around those activations

TOP_K_SEQ = 10
FEATURE_ID = 764
SEQUENCE_WINDOW = 10

query = f"""
with subsequences as (
    with top_acts as (
        SELECT
            autointerp.feature_id,
            autointerp.label,
            acts.sequence_id,
            acts.token_idx,
            acts.strength
        FROM
            autointerp
        JOIN
            feature_activations.activations acts ON autointerp.feature_id = acts.feature_id
        WHERE
            autointerp.feature_id = {FEATURE_ID}
        ORDER BY
            acts.strength DESC
        LIMIT {TOP_K_SEQ}
    )

    SELECT
        top_acts.feature_id,
        top_acts.label,
        top_acts.sequence_id,
        top_acts.token_idx as top_act_token_idx,
        top_acts.strength,
        tokens.token_idx as token_idx,
        tokens.token_text
    FROM
        tokens.tokens as tokens
    join
        top_acts on tokens.sequence_id = top_acts.sequence_id
    where
        abs(tokens.token_idx - top_acts.token_idx) <= {SEQUENCE_WINDOW//2}
)

SELECT
    *
FROM
    subsequences
"""

results = run_query(query, conn)

subsequences = {}

for row in results:
    feat_id = row["feature_id"]
    seq_id = row["sequence_id"]

    if subsequences.get(feat_id) is None:
        subsequences[feat_id] = {}
        subsequences[feat_id]["label"] = row["label"]
        subsequences[feat_id]["subsequences"] = {}

    if subsequences[feat_id]["subsequences"].get(seq_id) is None:
        subsequences[feat_id]["subsequences"][seq_id] = {}
        subsequences[feat_id]["subsequences"][seq_id]["top_idx"] = row["top_act_token_idx"]
        subsequences[feat_id]["subsequences"][seq_id]["top_strength"] = row["strength"]
        subsequences[feat_id]["subsequences"][seq_id]["tokens"] = {}

    subsequences[feat_id]["subsequences"][seq_id]["tokens"][row["token_idx"]] = row["token_text"]
    

for feat_id, feat_data in subsequences.items():
    print(f'{feat_id}: {feat_data["label"]}')

    for seq_id, seq_data in feat_data["subsequences"].items():
        top_idx = seq_data["top_idx"]
        top_token = seq_data["tokens"][top_idx]
        top_strength = seq_data["top_strength"]

        ordered_tokens = sorted(seq_data["tokens"].items(), key=lambda x: x[0])

        before_top = [token[1] for token in ordered_tokens if token[0] < top_idx]
        after_top = [token[1] for token in ordered_tokens if token[0] > top_idx]

        subsequence = repr(''.join(before_top + [ "<<" + top_token + ">>" ] + after_top))

        print(f'  {top_strength} : {subsequence}')

764: Problem-solving breakthrough confirmation
  1.4216057062149048 : ' votes >= this threshold.\n\n<<Yes>>. So the steps for'
  1.3952926397323608 : ' global_max_abs.\n\n<<Yes>>! Because in all cases'
  1.3868962526321411 : 'prev+1 papers.\n\n<<Yes>>, that makes sense.'
  1.3851138353347778 : ' one in that subset.\n\n<<Yes>>, that makes sense.'
  1.3729127645492554 : "'s the next one.\n\n<<Yes>>, that makes sense."
  1.3646918535232544 : ' the split is done.\n\n<<Yes>>. So the split into'
  1.3638285398483276 : ') are processed first.\n\n<<Yes>>, this makes sense.'
  1.3534859418869019 : " fit within the grid.\n\n<<Yes>>, that's the key"
  1.3534499406814575 : ' maximum of these values.\n\n<<Yes>>. Because for the split'
  1.3437250852584839 : ' in this subgraph.\n\n<<Yes>>, this seems manageable.'
