In [1]:
import duckdb
from pathlib import Path

# Load the autointerp database in duckdb
autointerp_path = Path("/mnt/polished-lake/autointerp/r1-logic/autointerp.db").expanduser()
ddb_conn = duckdb.connect(autointerp_path, read_only=True)
ddb_conn.execute("SET sqlite_all_varchar=true") # for sqlite type compatibility

# convenience function for running queries
def run_query(query: str, ddb_conn: duckdb.DuckDBPyConnection):
    res = ddb_conn.execute(query)

    column_names = [desc[0] for desc in res.description]
    data = [dict(zip(column_names, row)) for row in res.fetchall()]
    return data

In [2]:
print('Autointerp database example row:')
example = run_query("SELECT * FROM autointerp limit 1", ddb_conn)[0]

for col_name, col_value in example.items():
    print(f'{col_name}: {col_value}')

Autointerp database example row:


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

feature_id: 0
label: Cycles in graphs and algorithms
seqs: []
indices: []
quality: 0.9
interestingness: 0.6
model_name: claude-3-7-sonnet-latest
prompt_version: v0-dev


In [3]:
# print 10 feature labels
query = "SELECT feature_id, label FROM autointerp limit 10"
results = run_query(query, ddb_conn)

for row in results:
    print(f'{row["feature_id"]}: {row["label"]}')

0: Cycles in graphs and algorithms
1: Sorting collections and processing sorted data
3: Competitive programming input specification
5: Algorithm optimization
6: searching for a specific instance or solution
7: Prepositions indicating origin or source
8: Initializing variables to zero in programming
9: Mathematical calculation and numerical reasoning
10: Recognizing computational complexity limitations
11: Once a solution is found, transition to the next step


In [4]:
# attach the tokens db
ddb_conn.execute("ATTACH '/mnt/polished-lake/autointerp/r1-logic/tokens.db'")

<duckdb.duckdb.DuckDBPyConnection at 0x7c6b10693db0>

In [6]:
print('Tokens database example row:')
example = run_query("SELECT * FROM tokens.tokens limit 1", ddb_conn)[0]

for col_name, col_value in example.items():
    print(f'{col_name}: {col_value}')

Tokens database example row:


In [7]:
# attach the SAE latent activations db
ddb_conn.execute("ATTACH '/mnt/polished-lake/autointerp/r1-logic/feature_activations.db'")

<duckdb.duckdb.DuckDBPyConnection at 0x7bfc33739a70>

In [None]:
print('SAE latent activations example row:')
example = run_query("SELECT * FROM feature_activations.activations limit 1", ddb_conn)[0]

for col_name, col_value in example.items():
    print(f'{col_name}: {col_value}')

SAE latent activations example row:


In [None]:
query = """
SELECT
    autointerp.feature_id,
    autointerp.label,
    acts.strength,
    tokens.decoded_token
FROM
    autointerp
JOIN
    feature_activations.activations acts ON autointerp.feature_id = acts.feature_id
JOIN
    tokens.tokens tokens ON acts.token_idx = tokens.token_idx
WHERE
    autointerp.feature_id = 11
ORDER BY
    acts.strength DESC
LIMIT 10
"""
results = run_query(query, ddb_conn)

print(f'{results[0]["feature_id"]}: {results[0]["label"]}')
for row in results:
    print(f'{row["decoded_token"]}: {row["strength"]}')