#Set-up

In [1]:
from google.colab import drive
drive.mount('/content/drive')

!pip install transformers

Mounted at /content/drive


In [2]:
# set seeds
import random
import numpy as np
import torch

def set_seed(seed):
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)

set_seed(42)

# Load NT model

In [3]:
"loading smallest nucleotide transformer (50m params)"


from transformers import AutoTokenizer, AutoModelForMaskedLM
import torch

num_params = 50 ## default 50

# Import the tokenizer and the model
tokenizer_nt = AutoTokenizer.from_pretrained(f"InstaDeepAI/nucleotide-transformer-v2-{num_params}m-multi-species", trust_remote_code=True)
model_nt = AutoModelForMaskedLM.from_pretrained(f"InstaDeepAI/nucleotide-transformer-v2-{num_params}m-multi-species", trust_remote_code=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/129 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/28.7k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/101 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

esm_config.py:   0%|          | 0.00/14.9k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/InstaDeepAI/nucleotide-transformer-v2-50m-multi-species:
- esm_config.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_esm.py:   0%|          | 0.00/58.2k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/InstaDeepAI/nucleotide-transformer-v2-50m-multi-species:
- modeling_esm.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/224M [00:00<?, ?B/s]

# Load and preprocess addgene dataset

In [4]:
import pandas as pd


# Constants
TEST_DATA_PATH = '/content/drive/MyDrive/NOO_paper/Datasets/WorldWide/BLAST_geac_ext_169k_val_random.csv'
TRAIN_DATA_PATH = '/content/drive/MyDrive/NOO_paper/Datasets/WorldWide/BLAST_geac_ext_169k_train_random.csv'
INFREQUENT_THRESHOLD = 10

def split_test_data(test_data):
    """Split test data into input and target variables."""
    y_test = test_data['nations']
    x_test = test_data[['sequence']]
    return x_test, y_test

def replace_infrequent_labels(labels, threshold=INFREQUENT_THRESHOLD):
    """Identify and replace infrequent labels."""
    label_counts = labels.value_counts()
    infrequent_labels = label_counts[label_counts < threshold].index
    return labels.replace(infrequent_labels, 'infrequent')

def map_labels_to_integers(labels):
    """Map labels to integers."""
    unique_labels = labels.unique()
    return {label: int(i) for i, label in enumerate(unique_labels)}

def without_US(data):
    """Filter out rows where the nation is 'UNITED STATES'."""
    data_wo_US = data[data['nations'] != 'UNITED STATES']
    data_wo_US.reset_index(drop=True, inplace=True)

    data_w_US = data[data['nations'] == 'UNITED STATES']
    data_w_US.reset_index(drop=True, inplace=True)
    return data_wo_US, data_w_US

def US_vs_them(labels):
    """Categorize labels into 'UNITED STATES' and 'NON US'."""
    return labels.apply(lambda x: x if x == 'UNITED STATES' else 'NON US')

def pad_sequence(seq, length, pad_char='N'):
    """Pad sequences to the specified length with the given character."""
    return seq.ljust(length, pad_char)[:length]

# Load data
train_data = pd.read_csv(TRAIN_DATA_PATH)
test_data = pd.read_csv(TEST_DATA_PATH)

print(f'test_data shape: {test_data.shape}')

# Remove US
# train_data, train_data_US = without_US(train_data)
# test_data, test_data_US = without_US(test_data)

print(f'test_data shape: {test_data.shape}')

# Split data
x_train, y_train = train_data[['sequence']], train_data['nations']
x_test, y_test = split_test_data(test_data)

print(f'test_data shape: {y_test.shape}')
print(f'x_train shape: {x_train.shape}')
print(f'y_train shape: {y_train.shape}')

# Combine labels from train and test datasets
processed_labels = pd.concat([y_train, y_test], axis=0, ignore_index=True)
label_to_int = map_labels_to_integers(processed_labels)


# map labels to integers
y_train = y_train.map(label_to_int)
y_test = y_test.map(label_to_int)

print(f'y_test shape: {y_test.shape}')


# reset indices before concat
x_train.reset_index(drop=True, inplace=True)
y_train.reset_index(drop=True, inplace=True)
x_test.reset_index(drop=True, inplace=True)
y_test.reset_index(drop=True, inplace=True)

df_train = pd.concat([x_train, y_train], axis=1)
df_val = pd.concat([x_test, y_test], axis=1)

print(f'test_data shape: {test_data.shape}')


# Filter out sequences shorter than min_length and clean them
min_length = 0
df_train = df_train[df_train['sequence'].str.len() > min_length]
df_val = df_val[df_val['sequence'].str.len() > min_length]

print(f'test_data shape: {test_data.shape}')


# Ensure indices are reset correctly
df_train.reset_index(drop=True, inplace=True)
df_val.reset_index(drop=True, inplace=True)

# Display the split data
print("Train Data Shape:", df_train.shape)
print("Validation Data Shape:", df_val.shape)


test_data shape: (15551, 4)
test_data shape: (15551, 4)
test_data shape: (15551,)
x_train shape: (93306, 1)
y_train shape: (93306,)
y_test shape: (15551,)
test_data shape: (15551, 4)
test_data shape: (15551, 4)
Train Data Shape: (93306, 2)
Validation Data Shape: (15551, 2)


# Set-up & Load SAE

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

cfg = {
    "seed": 49,
    "batch_size": 4096*6,
    "buffer_mult": 384,
    "lr": 5e-5,
    "num_tokens": tokenizer_nt.vocab_size,
    "d_model": 512,
    "l1_coeff": 1e-1,
    "beta1": 0.9,
    "beta2": 0.999,
    "dict_mult": 8, # hidden_d = d_model * dict_mult
    "seq_len": 512,
    "d_mlp": 512,
    "enc_dtype":"fp32",
    "remove_rare_dir": False,
    "total_training_steps": 10000,
    "lr_warm_up_steps": 1000,
    "device": "cuda"
}
cfg["model_batch_size"] = 64
cfg["buffer_size"] = cfg["batch_size"] * cfg["buffer_mult"]
cfg["buffer_batches"] = cfg["buffer_size"] // cfg["seq_len"]

DTYPES = {"fp32": torch.float32, "fp16": torch.float16, "bf16": torch.bfloat16}

class AutoEncoder(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        # HP-choices
        d_hidden = cfg["d_mlp"] * cfg["dict_mult"]
        d_mlp = cfg["d_mlp"]
        self.l0_coeff = cfg.get("l0_coeff", 5)
        self.threshold = cfg.get("activation_threshold", 0.3)
        # Temperature for sigmoid approximation
        self.temperature = cfg.get("temperature", 1.0)
        dtype = DTYPES[cfg["enc_dtype"]]
        torch.manual_seed(cfg["seed"])

        self.W_enc = nn.Parameter(torch.nn.init.kaiming_uniform_(torch.empty(d_mlp, d_hidden, dtype=dtype)))
        self.W_dec = nn.Parameter(torch.nn.init.kaiming_uniform_(torch.empty(d_hidden, d_mlp, dtype=dtype)))
        self.b_enc = nn.Parameter(torch.zeros(d_hidden, dtype=dtype))
        self.b_dec = nn.Parameter(torch.zeros(d_mlp, dtype=dtype))
        self.W_dec.data[:] = self.W_dec / self.W_dec.norm(dim=-1, keepdim=True)

        self.d_hidden = d_hidden
        self.to("cuda") if torch.cuda.is_available() else self.to("cpu")

    def get_continuous_l0(self, x):
        """
        Compute continuous relaxation of L0 norm using sigmoid
        This provides useful gradients unlike the discrete L0
        """
        # Shifted sigmoid to approximate step function
        return torch.sigmoid((x.abs() - self.threshold) / self.temperature)

    def forward(self, x):
        # encoding and decoding of input vec
        x_cent = x - self.b_dec
        pre_acts = x_cent @ self.W_enc + self.b_enc
        acts = F.relu(pre_acts)

        # Compute continuous L0 approximation before thresholding
        l0_proxy = self.get_continuous_l0(acts)

        # Apply hard threshold for forward pass --- This is actually jumprelu (I think!)
        acts_sparse = (acts.abs() > self.threshold).float() * acts
        x_reconstruct = acts_sparse @ self.W_dec + self.b_dec

        # L2 Loss (Reconstruction Loss)
        l2_loss = F.mse_loss(x_reconstruct.float(), x.float(), reduction='none')
        l2_loss = l2_loss.sum(-1)
        l2_loss = l2_loss.mean()

        # Normalized MSE for reporting
        nmse = torch.norm(x - x_reconstruct, p=2) / torch.norm(x, p=2)

        # Continuous L0 loss (using sigmoid approximation)
        l0_loss = l0_proxy.sum(dim=1).mean()

        # Total Loss: reconstruction + sparsity
        loss = l2_loss + self.l0_coeff * l0_loss

        # For monitoring: true L0 count (not used in optimization)
        true_l0 = (acts_sparse.float().abs() > 0).float().sum(dim=1).mean()

        # For monitoring: L1 loss
        l1_loss = acts_sparse.float().abs().sum(-1).mean()

        return loss, x_reconstruct, acts_sparse, l2_loss, nmse, l1_loss, true_l0

    @torch.no_grad()
    def remove_parallel_component_of_grads(self):
        W_dec_normed = self.W_dec / self.W_dec.norm(dim=-1, keepdim=True)
        W_dec_grad_proj = (self.W_dec.grad * W_dec_normed).sum(-1, keepdim=True) * W_dec_normed
        self.W_dec.grad -= W_dec_grad_proj



sae_model = AutoEncoder(cfg)
sae_res = AutoEncoder(cfg)
sae_l10 = AutoEncoder(cfg)


## Load already-trained SAE

In [6]:
weights_path = "/content/drive/MyDrive/SAEs_for_Genomics/Weights/nt50m_sae_+40mtokens.pt"
state_dict = torch.load(weights_path, weights_only=True)
sae_model.load_state_dict(state_dict)

<All keys matched successfully>

# Using trained SAE to interpret the NuclTrans

In [7]:
## load custom functions from utils.py

import sys
sys.path.append('//content/drive/MyDrive/SAEs_for_Genomics')

import importlib
import utils
importlib.reload(utils)

<module 'utils' from '//content/drive/MyDrive/SAEs_for_Genomics/utils.py'>

## Analysing Rare Features (copied & adapted)

In [None]:
val_seqs = df_val['sequence'].tolist()
val_tokens = tokenizer_nt(val_seqs, max_length=512, padding='max_length', truncation=True, return_tensors="pt")

For each feature we can get the frequency at which it's non-zero (per token, averaged across a bunch of batches), and plot a histogram

In [None]:
@torch.no_grad()
def get_freqs(num_batches=20, local_encoder=None):
    if local_encoder is None:
        local_encoder = encoder

    # initialise frequency counters to 0 for all hidden neurons
    act_freq_scores = torch.zeros(4096, dtype=torch.float32).cuda()
    total = 0

    for i in range(num_batches):
        # prepare batch of tokens to input to the model
        tokens = val_tokens[i*cfg['model_batch_size']:(i+1)*cfg['model_batch_size']]

        # run model on batch of tokens
        #_, cache = model.run_with_cache(tokens, stop_at_layer=1, names_filter=utils.get_act_name("post", 0))
        mlp_act = utils.get_layer_activations(model_nt, tokens['input_ids'].cuda(), tokens['attention_mask'].cuda())
        mlp_act = mlp_act[0] # unnest

        # extract mlp activations and reshape for SAE
        mlp_act = mlp_act.reshape(-1, d_mlp)

        # normalise using same approach as for traning (optional)



        # input the acts into an SAE, get the SAEs hidden acts
        loss, x_reconstruct, hidden, l2_loss, nmse, l1_loss, true_l0 = local_encoder(mlp_act) ## acts is the second/third output -> 1/2
        act_freq_scores += (hidden > 0).sum(0) # increase counter if act > 0
        total+=hidden.shape[0]

    act_freq_scores /= total # turn counts into frequencies

    # calc and print number of never activated SAE units
    num_dead = (act_freq_scores==0).float().mean()
    print("Num dead", num_dead)

    return act_freq_scores


In [None]:
d_model = cfg["d_model"]
d_mlp = cfg["d_mlp"]
model = model_nt.cuda()

sae_model.cuda()
sae_model.eval()

freqs = get_freqs(num_batches = 20,
                  local_encoder = sae_model) # what % of time is a hidden unit activated > 0?

NameError: name 'val_tokens' is not defined

In [None]:
# how many sae units are rarely activated?

rare_T = 1e-4 #

print(f'Of {d_model*cfg["dict_mult"]} hidden SAE units, {sum(freqs < rare_T).item()} are very rarely activated')	#


# how many features are dense i.e. activate very often?

often_T = 0.3
print(f'Of {d_model*cfg["dict_mult"]} hidden SAE units, {sum(freqs > often_T).item()} are activated very often')	#

In [None]:
import plotly.express as px

# Add 1e-9 so that dead features show up as log_freq -9
log_freq = (freqs + 10**-9).log10()
log_freq = log_freq.cpu().detach().numpy()

px.histogram(log_freq, title="Log Frequency of Features", histnorm='percent')

In [None]:
import numpy as np

# Get indices where freqs is not 0
mask = freqs != 0
indices = torch.where(mask)[0]
print(indices)

# Save indices to a file
np.save('non_rare_feature_indices.npy', indices.cpu().numpy())

In [None]:
encoder = sae_model # just renaming for simplicity


# Q: why encoder (as opposed to decoder) matrix?

is_rare = freqs < 1e-4 # get bool mask
rare_enc = encoder.W_enc[:, is_rare] # get cols from enc matrix
rare_mean = rare_enc.mean(-1) # average these cols

# cosine similarity of rare features to average rare feature and plot
cosine_sim = rare_mean @ encoder.W_enc / rare_mean.norm() / encoder.W_enc.norm(dim=0)

# move to cpu
cosine_sim = cosine_sim.cpu().detach().numpy()
is_rare = is_rare.cpu().detach().numpy()

px.histogram(cosine_sim,
             title="Cosine Sim with Average Rare Feature",
             color=is_rare,
             labels={"color": "is_rare", "count": "percent", "value": "cosine_sim"},
             marginal="box", histnorm="percent", barmode='overlay')

## Loading test-sequence w annotations

In [8]:
import pandas as pd
import torch
from transformers import AutoTokenizer

def load_and_process_annotations(file_path):
    """Load CSV and add 'valseq_' prefix to seq_id column if not already present."""
    df = pd.read_csv(file_path)
    df['seq_id'] = df['seq_id'].astype(str)
    # Add 'valseq_' prefix only if it's not already there
    df['seq_id'] = df['seq_id'].apply(lambda x: x if x.startswith('valseq_') else f'valseq_{x}')
    return df

def extract_and_tokenize_sequences(df_annotations, df_val, tokenizer_nt):
    """Extract sequence IDs, get corresponding sequences, and tokenize them."""
    # Extract and sort sequence IDs
    seq_ids = list(set(df_annotations['seq_id']))
    # More robust parsing of sequence IDs
    parsed_ids = []
    for seq_id in seq_ids:
        try:
            if 'valseq_' in seq_id:
                parsed_ids.append(int(seq_id.split('valseq_')[1]))
            else:
                parsed_ids.append(int(seq_id))
        except ValueError:
            print(f"Warning: Could not parse seq_id: {seq_id}")
            continue

    seq_ids = sorted(parsed_ids)

    # Get and tokenize sequences
    sequences = df_val['sequence'].iloc[seq_ids].tolist()
    tokens = tokenizer_nt(
        sequences,
        max_length=512,
        padding='max_length',
        truncation=True,
        return_tensors="pt"
    )

    return tokens, seq_ids

# File paths
base_path = '/content/drive/MyDrive/SAEs_for_Genomics'
files = {
    's0': f'{base_path}/ann_of_1000_seqs_set0.csv',
    's1': f'{base_path}/ann_of_1000_seqs_set1.csv',
    's2': f'{base_path}/ann_of_1000_seqs_set2.csv',
}

# Process all files
dfs = {key: load_and_process_annotations(path) for key, path in files.items()}

# Extract and tokenize sequences for each dataset
results = {
    key: extract_and_tokenize_sequences(df, df_val, tokenizer_nt)
    for key, df in dfs.items()
}

# Unpack results if needed
tokens_s0, seq_ids_s0 = results['s0']
tokens_s1, seq_ids_s1 = results['s1']
tokens_s2, seq_ids_s2 = results['s2']

In [None]:
## SANITY CHECKs

for _ in range(100):

    # check that sequencs are not identical at the same poistion
    N = np.random.randint(0, len(seq_ids_s1))
    assert not torch.equal(tokens_s1['input_ids'][N], tokens_s2['input_ids'][N])
    assert not torch.equal(tokens_s1['input_ids'][N], tokens_s0['input_ids'][N])
    assert not torch.equal(tokens_s2['input_ids'][N], tokens_s0['input_ids'][N])

# look at overlap between seq_ids
assert len(set(seq_ids_s1).intersection(set(seq_ids_s2))) == 0
assert len(set(seq_ids_s1).intersection(set(seq_ids_s0))) == 0
assert len(set(seq_ids_s2).intersection(set(seq_ids_s0))) == 0



## From tokenised sequences create df of each token with annotation

### skip for N >= 1000

In [None]:
# Create a table that lists each token in the sequences alongside its annotation(s)

for i in ['s0', 's1', 's2']:

    if i == 's0':
        tokens = tokens_s0['input_ids']
        seq_ids = seq_ids_s0

    elif i == 's1':
        tokens = tokens_s1['input_ids']
        seq_ids = seq_ids_s1
    elif i == 's2':
        tokens = tokens_s2['input_ids']
        seq_ids = seq_ids_s2

    token_df = utils.make_token_df_new(
                          tokens = tokens.squeeze(),
                          tokenizer = tokenizer_nt,
                          df_annotated = dfs[i],
                          seq_ids = seq_ids,
                          len_prefix = 6, ## choice: what should these be?
                          len_suffix = 6,
                          nucleotides_per_token = 6, # particular to this model
                          descriptor_col = 'Feature' # values: Feature, Type, Description
    )
    token_df

    # save token_df
    token_df.to_csv(f'/content/drive/MyDrive/SAEs_for_Genomics/Annotated_seqs/token_df_1k_s{i}.csv', index=False)

NameError: name 'tokens_s0' is not defined

### and load directly

In [9]:
# load token_df for >= 1000 seqs
token_df_1k_s1 = pd.read_csv('/content/drive/MyDrive/SAEs_for_Genomics/Annotated_seqs/token_df_1k_ss1.csv')
token_df_1k_s2 = pd.read_csv('/content/drive/MyDrive/SAEs_for_Genomics/Annotated_seqs/token_df_1k_ss2.csv')
token_df_1k_s0 = pd.read_csv('/content/drive/MyDrive/SAEs_for_Genomics/Annotated_seqs/token_df_1k_ss0.csv')


### Running SAE

Let's go and investigate a non rare feature.

We start by getting the SAE activations for (all) token in our dataset

In [10]:
d_model = cfg["d_model"]
d_mlp = cfg["d_mlp"]
num_layer = 11
batch_size = 128

tokens = tokens_s2 #@param options:

# Calculate batch information
total_tokens = tokens['input_ids'].shape[0] * tokens['input_ids'].shape[1]
num_batches = (total_tokens + batch_size - 1) // batch_size

all_latents = []

# Ensure models are in eval mode
sae_model.eval()
model_nt.eval()

for i in range(num_batches):
   start_idx = i * batch_size
   end_idx = min((i + 1) * batch_size, total_tokens)

   # Reshape tokens for current batch
   batch_input_ids = tokens['input_ids'][start_idx:end_idx].cuda()
   batch_attention_mask = tokens['attention_mask'][start_idx:end_idx].cuda()

   with torch.no_grad():
         # Get MLP activations
          mlp_act = utils.get_layer_activations(model_nt.cuda(),
                                                batch_input_ids,
                                                batch_attention_mask,
                                                layer_N=num_layer)
          mlp_act = mlp_act[0].reshape(-1, d_mlp)

          # Forward pass through SAE
          loss, x_reconstruct, latents, l2_loss, nmse, l1_loss, true_l0 = sae_model(mlp_act)
          all_latents.append(latents)

# Combine results, move to cpu before
all_latents = [x.cpu() for x in all_latents]
combined_latents = torch.cat(all_latents, dim=0).cpu()
torch.cuda.empty_cache()

In [12]:
latent_id = 946 # or set particular int value in range 0, 4095

# we avoid modifying token_df directly as its very time-consuming to reload if we mess it up
token_df_copy = token_df_1k_s2.copy() #

# get the activation value for the N-th unit in the SAE for each input in batch
hidden_act_feature_id = combined_latents[:, latent_id] # N = feature_id

# add this to the dataframe
token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()

# sort to show the most activating tokens on top, add colours
token_df_copy.sort_values(f"latent-{latent_id}-act", ascending=False).head(20
                                                                           ).style.background_gradient("coolwarm")

Unnamed: 0,seq_id,token_pos,tokens,context,token_annotations,context_annotations,e-value annotation,percentage match,latent-946-act
114502,3720,326,CGACTA,CCCCAGGGCCGTACGCACCCTCGCCGCCGCGTTCGC |CGACTA| CCCCGCCACGCGCCACACCGTCGATCCGGACCGCCA,['PuroR'],['PuroR'],[0.],[99.334],11.779472
500043,15254,331,CGACTA,CCCCAGGGCCGTACGCACCCTCGCCGCCGCGTTCGC |CGACTA| CCCCGCCACGCGCCACACCGTCGATCCGGACCGCCA,['PuroR'],['PuroR'],[0.],[99.334],11.647073
156998,4959,326,CGACTA,CCCCAGGGCCGTACGCACCCTCGCCGCCGCGTTCGC |CGACTA| CCCCGCCACGCGCCACACCGTCGATCCGGACCGCCA,['PuroR'],['PuroR'],[0.],[99.334],11.561013
24390,633,326,CGACTA,CCCCAGGGCCGTACGCACCCTCGCCGCCGCGTTCGC |CGACTA| CCCCGCCACGCGCCACACCGTCGATCCGGACCGCCA,['PuroR'],['PuroR'],[0.],[99.334],11.458394
422726,12927,326,CGACTA,CCCCAGGGCCGTACGCACCCTCGCCGCCGCGTTCGC |CGACTA| CCCCGCCACGCGCCACACCGTCGATCCGGACCGCCA,['PuroR'],['PuroR'],[0.],[99.334],11.427544
215783,6676,231,CGACTA,CCCCAGGGCCGTACGCACCCTCGCCGCCGCGTTCGC |CGACTA| CCCCGCCACGCGCCACACCGTCGATCCGGACCGCCA,['PuroR'],['PuroR'],[0.],[99.334],11.115554
215787,6676,235,CGTCGA,CGCCGCGTTCGCCGACTACCCCGCCACGCGCCACAC |CGTCGA| TCCGGACCGCCACATCGAGCGGGTCACCGAGCTGCA,['PuroR'],['PuroR'],[0.],[99.334],11.106042
279783,8440,231,CGACTA,CCCCAGGGCCGTACGCACCCTCGCCGCCGCGTTCGC |CGACTA| CCCCGCCACGCGCCACACCGTCGATCCGGACCGCCA,['PuroR'],['PuroR'],[0.],[99.334],11.072762
279787,8440,235,CGTCGA,CGCCGCGTTCGCCGACTACCCCGCCACGCGCCACAC |CGTCGA| TCCGGACCGCCACATCGAGCGGGTCACCGAGCTGCA,['PuroR'],['PuroR'],[0.],[99.334],11.070346
157002,4959,330,CGTCGA,CGCCGCGTTCGCCGACTACCCCGCCACGCGCCACAC |CGTCGA| TCCGGACCGCCACATCGAGCGGGTCACCGAGCTGCA,['PuroR'],['PuroR'],[0.],[99.334],10.911417


We can now sort and display the top tokens that activate the hidden SAE unit


## Auto-searching monosemantic latents

1. Searching *functional* SAE Latents

In [None]:
def safe_get_annotations(ann_entry):
    if isinstance(ann_entry, str):
        try:
            return eval(ann_entry)
        except:
            return []
    return ann_entry  # already a list

N_latents = 4096
latent_dict = {}
# Then modify the analysis:
for latent_id in range(N_latents):
    hidden_act_feature_id = combined_latents[:, latent_id]
    token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()

    most_activating_tokens = token_df_copy.sort_values(f"latent-{latent_id}-act", ascending=False).head(20)

    # Skip if any activations are 0
    if (most_activating_tokens[f"latent-{latent_id}-act"] == 0).any():
        continue

    annotations = [safe_get_annotations(ann) for ann in most_activating_tokens['token_annotations']]

    if annotations:
        annotation_counts = {}
        for ann_list in annotations:
            for ann in ann_list:
                annotation_counts[ann] = annotation_counts.get(ann, 0) + 1

        common_annotations = {ann for ann, count in annotation_counts.items()
                            if count >= 10 and ann not in {'special token: <cls>', 'special token: <pad>'}}

        if common_annotations:
            latent_dict[latent_id] = common_annotations
            print(f"\nLatent {latent_id} appears to detect: {common_annotations}")
            print("Top 20 activating tokens and their annotations:")
            for _, row in most_activating_tokens.iterrows():
                print(f"Token: {row['tokens']}, Annotations: {safe_get_annotations(row['token_annotations'])}, "
                      f"Activation: {row[f'latent-{latent_id}-act']:.3f}")


Latent 52 appears to detect: {"5' LTR"}
Top 20 activating tokens and their annotations:
Token: GAATCG, Annotations: ["5' LTR"], Activation: 15.712
Token: GGACTC, Annotations: ["5' LTR"], Activation: 15.393
Token: GAATCG, Annotations: ["5' LTR"], Activation: 14.980
Token: ATCGTG, Annotations: ["5' LTR"], Activation: 14.475
Token: GGACTC, Annotations: ["5' LTR"], Activation: 14.333
Token: GAATCG, Annotations: ["5' LTR"], Activation: 14.265
Token: GACTCG, Annotations: ["5' LTR"], Activation: 14.118
Token: ATCCGA, Annotations: ["5' LTR"], Activation: 13.852
Token: AATCGT, Annotations: ["5' LTR"], Activation: 13.348
Token: AATCGT, Annotations: ["5' LTR"], Activation: 13.330
Token: CGAATC, Annotations: ["5' LTR"], Activation: 13.137
Token: GAATCG, Annotations: ["5' LTR (truncated)", 'mir-1937'], Activation: 12.841
Token: GCTGAT, Annotations: ["5' LTR"], Activation: 12.808
Token: GCATCC, Annotations: ["5' LTR"], Activation: 12.794
Token: CTCTTG, Annotations: ["5' LTR"], Activation: 12.584
To

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 130 appears to detect: {'AmpR promoter'}
Top 20 activating tokens and their annotations:
Token: GAACCC, Annotations: ['AmpR promoter'], Activation: 10.737
Token: GAACCC, Annotations: ['AmpR promoter'], Activation: 10.506
Token: GAACCC, Annotations: ['AmpR promoter'], Activation: 10.505
Token: TAACCC, Annotations: ['luxA'], Activation: 10.343
Token: AACCCT, Annotations: ['MEN1'], Activation: 10.328
Token: GAACCC, Annotations: ['AmpR promoter'], Activation: 10.316
Token: AACCCT, Annotations: ['V5 tag'], Activation: 10.310
Token: GAACCG, Annotations: ['OCS terminator'], Activation: 10.048
Token: GAACCC, Annotations: ['AmpR promoter'], Activation: 10.017
Token: AACCCT, Annotations: ['AmpR promoter'], Activation: 10.000
Token: GAACCC, Annotations: ['AmpR promoter'], Activation: 9.981
Token: GAACCC, Annotations: ['AmpR promoter'], Activation: 9.980
Token: GAACCC, Annotations: ['AmpR promoter'], Activation: 9.944
Token: GAACCC, Annotations: ['AmpR promoter'], Activation: 9.930
Token: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 156 appears to detect: {'RDX'}
Top 20 activating tokens and their annotations:
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 30.568
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 18.774
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 17.089
Token: CTGGAT, Annotations: ['RDX'], Activation: 16.562
Token: CGTCGA, Annotations: ['RDX'], Activation: 16.145
Token: AATCAG, Annotations: ['RDX'], Activation: 14.493
Token: A, Annotations: [], Activation: 13.860
Token: GAGCAG, Annotations: ['RDX'], Activation: 13.851
Token: CTTGAA, Annotations: ['RDX'], Activation: 13.681
Token: AAGGAG, Annotations: ['RDX'], Activation: 13.557
Token: GAATTC, Annotations: ['RDX'], Activation: 13.288
Token: CGAAAA, Annotations: ['RDX'], Activation: 13.214
Token: TGGCAA, Annotations: ['RDX'], Activation: 13.123
Token: ATTAAA, Annotations: ['RDX'], Activation: 12.871
Token: GACCAG, Annotations: ['RDX'], Activation: 12.787
Token: CAAATT, Annotations: ['RDX']

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 180 appears to detect: {'SV40 promoter'}
Top 20 activating tokens and their annotations:
Token: CAGGCA, Annotations: ['SV40 promoter'], Activation: 10.203
Token: CAGGCA, Annotations: ['SV40 promoter'], Activation: 10.158
Token: CAGGCA, Annotations: ['SV40 promoter'], Activation: 9.907
Token: CAGGCA, Annotations: ['P/V'], Activation: 9.187
Token: GCAGGC, Annotations: ['SV40 promoter'], Activation: 9.118
Token: CAGGCA, Annotations: ['SV40 promoter'], Activation: 9.110
Token: GCAGGC, Annotations: ['SV40 promoter'], Activation: 9.004
Token: GCAGGC, Annotations: ['SV40 promoter'], Activation: 8.978
Token: GCAGGC, Annotations: ['SV40 promoter'], Activation: 8.909
Token: CAGGCA, Annotations: ['avrBs3'], Activation: 8.908
Token: GCAGGC, Annotations: ['SV40 promoter'], Activation: 8.673
Token: CAGGCA, Annotations: ['AmpR'], Activation: 8.559
Token: CAGGCA, Annotations: ['SV40 promoter'], Activation: 8.519
Token: CAGGCA, Annotations: [], Activation: 8.460
Token: CAGGCA, Annotations: [], 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 187 appears to detect: {'EGFP'}
Top 20 activating tokens and their annotations:
Token: CATTAT, Annotations: ['EGFP'], Activation: 35.795
Token: TATCAT, Annotations: ['EGFP'], Activation: 32.586
Token: TATCAT, Annotations: ['mEGFP'], Activation: 31.173
Token: TATCAT, Annotations: ['mEGFP'], Activation: 31.153
Token: TATCAT, Annotations: ['EGFP'], Activation: 31.076
Token: TATCAT, Annotations: ['EGFP'], Activation: 30.830
Token: TTCTAT, Annotations: ['WNT6'], Activation: 30.711
Token: CTACCT, Annotations: ['E2-Crimson'], Activation: 29.986
Token: CTACCT, Annotations: ['EGFP'], Activation: 29.955
Token: TATCAT, Annotations: ['EGFP'], Activation: 29.941
Token: CAAGCA, Annotations: ['EGFP'], Activation: 29.875
Token: CAAGCA, Annotations: ['EGFP'], Activation: 29.039
Token: CTACAA, Annotations: ['cEGFP'], Activation: 28.760
Token: CAAGCA, Annotations: ['mEGFP'], Activation: 28.622
Token: TATCAT, Annotations: ['mEGFP'], Activation: 28.587
Token: GCCCTG, Annotations: ['EGFP'], Activati

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 196 appears to detect: {'AmpR'}
Top 20 activating tokens and their annotations:
Token: GTGCTG, Annotations: ['bla(M)'], Activation: 14.634
Token: GTGCTG, Annotations: ['AmpR'], Activation: 14.497
Token: GTGCTG, Annotations: ['STAT1'], Activation: 14.107
Token: GTGCTG, Annotations: ['M13 gene III'], Activation: 14.095
Token: GTGCTG, Annotations: ['bla(M)'], Activation: 14.080
Token: GTGCTG, Annotations: ['PDHA1'], Activation: 13.950
Token: GTGCTG, Annotations: ['AmpR'], Activation: 13.938
Token: GTGCTG, Annotations: ['AmpR'], Activation: 13.831
Token: GTGCTG, Annotations: ['AmpR'], Activation: 13.771
Token: GTGCTG, Annotations: ['MBP'], Activation: 13.768
Token: GTGCTG, Annotations: ['fsaA'], Activation: 13.749
Token: GTGCTG, Annotations: ['AmpR'], Activation: 13.742
Token: GTGCTG, Annotations: ['AmpR'], Activation: 13.708
Token: GTGCTG, Annotations: ['AmpR'], Activation: 13.631
Token: GTGCTG, Annotations: ['AmpR'], Activation: 13.620
Token: GTGCTG, Annotations: ['parB'], Activa

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 231 appears to detect: {'CMV promoter', 'UL126'}
Top 20 activating tokens and their annotations:
Token: ACGGGG, Annotations: [], Activation: 14.267
Token: ACGGGG, Annotations: [], Activation: 14.210
Token: ACGGGG, Annotations: ['CMV promoter', 'UL126'], Activation: 13.938
Token: ACGGGG, Annotations: ['CMV promoter', 'UL126'], Activation: 13.934
Token: ACGGGG, Annotations: ['CMV promoter', 'UL126'], Activation: 13.783
Token: ACGGGG, Annotations: [], Activation: 13.485
Token: ACGGGG, Annotations: ['CMV promoter', 'UL126'], Activation: 13.483
Token: ACGGGG, Annotations: [], Activation: 13.429
Token: ACGGGG, Annotations: ['CMV promoter', 'UL126'], Activation: 13.373
Token: ACGGGG, Annotations: ['CMV promoter', 'UL126'], Activation: 13.366
Token: ACGGGG, Annotations: [], Activation: 13.324
Token: ACGGGG, Annotations: ['CMV promoter', 'UL126'], Activation: 13.304
Token: ACGGGG, Annotations: [], Activation: 13.248
Token: ACGGGG, Annotations: ['CMV promoter', 'UL126'], Activation: 13.2

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 243 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: TTATGC, Annotations: ['lac promoter', 'penA'], Activation: 10.734
Token: TTATGC, Annotations: ['apr'], Activation: 10.285
Token: TTATGC, Annotations: ['CMV enhancer'], Activation: 10.060
Token: TAAGGG, Annotations: ['f1 ori'], Activation: 9.790
Token: TAAGGG, Annotations: ['f1 ori'], Activation: 9.730
Token: TTATGC, Annotations: ['CMV enhancer'], Activation: 9.478
Token: TTATGC, Annotations: ['CMV enhancer'], Activation: 9.461
Token: TTATGC, Annotations: ['U58'], Activation: 9.423
Token: TTATGC, Annotations: ['CMV enhancer'], Activation: 9.416
Token: TTAGGC, Annotations: ['RSF1010 RepC'], Activation: 9.406
Token: TTATGC, Annotations: ['CMV enhancer'], Activation: 9.382
Token: TAGGGA, Annotations: ['CMV intron'], Activation: 9.365
Token: TAAGGG, Annotations: ['TcR'], Activation: 9.326
Token: TTATGC, Annotations: ['CMV enhancer'], Activation: 9.273
Token: TTATGC, Annotations: ['CMV enha

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 246 appears to detect: {'ori'}
Top 20 activating tokens and their annotations:
Token: GCCGCA, Annotations: ['AmpR'], Activation: 10.567
Token: TTTAGG, Annotations: [], Activation: 10.534
Token: CCACCG, Annotations: ['ori', 'RNAI'], Activation: 10.170
Token: CTCTGA, Annotations: [], Activation: 10.154
Token: CCACCG, Annotations: ['ori', 'RNAI'], Activation: 10.133
Token: CCACCG, Annotations: ['ori', 'RNAI'], Activation: 9.874
Token: CACCAG, Annotations: [], Activation: 9.828
Token: CCACCG, Annotations: ['ori', 'RNAI'], Activation: 9.553
Token: GGATCT, Annotations: ['ori'], Activation: 9.372
Token: TTGAGG, Annotations: [], Activation: 9.288
Token: CCACCG, Annotations: ['ori', 'RNAI'], Activation: 9.237
Token: TTTCGG, Annotations: ['gag-pol'], Activation: 9.211
Token: CCACCG, Annotations: ['ori', 'RNAI'], Activation: 9.093
Token: CCACCG, Annotations: ['ori', 'RNAI'], Activation: 8.940
Token: CCACCA, Annotations: ['Prkn'], Activation: 8.894
Token: CGACGC, Annotations: ['ori'], Acti

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 276 appears to detect: {'AmpR'}
Top 20 activating tokens and their annotations:
Token: CTTACC, Annotations: ['AmpR'], Activation: 10.095
Token: CTTACC, Annotations: ['AmpR'], Activation: 10.047
Token: CTTACC, Annotations: ['AmpR'], Activation: 9.968
Token: CTTACC, Annotations: ['AmpR'], Activation: 9.803
Token: CTTACC, Annotations: ['AmpR'], Activation: 9.796
Token: CTTACC, Annotations: ['AmpR'], Activation: 9.573
Token: CTTACC, Annotations: ['AmpR'], Activation: 9.550
Token: CTTACC, Annotations: ['AmpR'], Activation: 9.509
Token: CTTACC, Annotations: ['AmpR'], Activation: 9.275
Token: CTTACC, Annotations: ['AmpR'], Activation: 9.102
Token: CTTACC, Annotations: ['ori'], Activation: 8.941
Token: CTTACC, Annotations: ['AmpR'], Activation: 8.925
Token: CTTACC, Annotations: ['ZFHD1 (DNA binding domain)'], Activation: 8.922
Token: CTTACC, Annotations: ['Sesn1'], Activation: 8.847
Token: CTTACC, Annotations: ['FCGR1A'], Activation: 8.776
Token: CTTACC, Annotations: ['ori'], Activatio

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 278 appears to detect: {'f1 ori'}
Top 20 activating tokens and their annotations:
Token: CGCTAC, Annotations: ['f1 ori'], Activation: 11.768
Token: CGCTAC, Annotations: ['f1 ori'], Activation: 11.465
Token: CGCTAC, Annotations: ['f1 ori'], Activation: 11.396
Token: CGCTAC, Annotations: ['f1 ori'], Activation: 11.259
Token: CGCTAC, Annotations: ['f1 ori'], Activation: 11.254
Token: CGCTAC, Annotations: ['f1 ori'], Activation: 10.824
Token: CGCTAC, Annotations: ['f1 ori'], Activation: 10.792
Token: CGCTAC, Annotations: ['f1 ori'], Activation: 10.686
Token: CGCTAC, Annotations: ['f1 ori'], Activation: 10.612
Token: CGCTAC, Annotations: ['ori', 'RNAI'], Activation: 10.591
Token: CGCTAC, Annotations: ['f1 ori'], Activation: 10.590
Token: CGCTAC, Annotations: ['f1 ori'], Activation: 10.498
Token: CGCTAC, Annotations: ['ori', 'RNAI'], Activation: 10.481
Token: CGCTAC, Annotations: ['ori', 'RNAI'], Activation: 10.391
Token: CGCTAC, Annotations: ['ori', 'RNAI'], Activation: 10.381
Token

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 299 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: AATAAT, Annotations: ['CMV enhancer'], Activation: 25.203
Token: AATAAT, Annotations: ['CMV enhancer'], Activation: 24.981
Token: AATAAT, Annotations: ['CMV enhancer'], Activation: 24.633
Token: AATAAT, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 24.541
Token: AATAAT, Annotations: ['CMV enhancer'], Activation: 24.522
Token: AATAAT, Annotations: ['CMV enhancer'], Activation: 24.486
Token: AATAAT, Annotations: ['CMV enhancer'], Activation: 24.462
Token: AATAAT, Annotations: ['CMV enhancer'], Activation: 24.332
Token: AATAAT, Annotations: ['CMV enhancer'], Activation: 24.157
Token: AATAAT, Annotations: ['CMV enhancer'], Activation: 24.089
Token: AATAAT, Annotations: ['CMV enhancer'], Activation: 24.069
Token: AATAAT, Annotations: ['CMV enhancer'], Activation: 24.026
Token: AATAAT, Annotations: ['CMV enhancer'], Activation: 24.011
Token: AATAAT, Annotations: ['CMV enha

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 328 appears to detect: {'LYS2'}
Top 20 activating tokens and their annotations:
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 18.248
Token: CGTTTA, Annotations: ['(3-F)Tyr-EGFP'], Activation: 16.338
Token: ATTCAC, Annotations: ['LYS2'], Activation: 12.474
Token: TTTGTA, Annotations: ['LYS2'], Activation: 11.913
Token: TAATTG, Annotations: ['LYS2'], Activation: 11.395
Token: AATTTG, Annotations: ['LYS2'], Activation: 11.104
Token: GCCCGG, Annotations: ['AmpR'], Activation: 10.581
Token: TTCCTT, Annotations: ['LYS2'], Activation: 9.976
Token: AACCTG, Annotations: ['LYS2'], Activation: 9.680
Token: GGTCTG, Annotations: ['KanR', 'kanMX'], Activation: 9.598
Token: TTGGAG, Annotations: ['LYS2'], Activation: 9.558
Token: CGCCTA, Annotations: ['LYS2'], Activation: 9.547
Token: GCTTTT, Annotations: ['AmpR'], Activation: 9.484
Token: GAGAGG, Annotations: ['LYS2'], Activation: 9.420
Token: GGTACG, Annotations: ['LYS2'], Activation: 9.395
Token: CACCTG, Annotations: ['H2

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 335 appears to detect: {'hGH poly(A) signal'}
Top 20 activating tokens and their annotations:
Token: TTCTCC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 21.116
Token: ATTCTC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 19.264
Token: TTCTTC, Annotations: ['C9orf85'], Activation: 15.845
Token: CCTTGG, Annotations: ['hGH poly(A) signal'], Activation: 5.069
Token: ACCCAC, Annotations: ['hGH poly(A) signal'], Activation: 3.940
Token: CTTGGC, Annotations: ['hGH poly(A) signal'], Activation: 3.827
Token: ATTTGA, Annotations: [], Activation: 3.659
Token: GTCCTC, Annotations: [], Activation: 3.603
Token: ACCACT, Annotations: ['hGH poly(A) signal'], Activation: 2.084
Token: TATTAG, Annotations: [], Activation: 1.942
Token: TTTGGT, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 1.848
Token: AAGCGA, Annotations: ['C9orf85'], Activation: 1.774
Token: TGATCT, Annotations: ['hGH poly(A) signal'], Activation: 1.565
Token: TGCCAT, Annotations: [], Ac

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 362 appears to detect: {'CMV intron', 'T7 promoter'}
Top 20 activating tokens and their annotations:
Token: CTATAG, Annotations: ['T7 promoter', 'CMV intron'], Activation: 13.259
Token: CTATAA, Annotations: [], Activation: 11.617
Token: CTATAG, Annotations: [], Activation: 11.512
Token: CTATAG, Annotations: ['T7 promoter', 'CMV intron'], Activation: 11.353
Token: CTATAG, Annotations: ['T7 promoter', 'CMV intron'], Activation: 11.294
Token: CTATAA, Annotations: ['ori'], Activation: 11.284
Token: CTATAG, Annotations: ['T7 promoter', 'CMV intron'], Activation: 11.276
Token: CTATAG, Annotations: ['T7 promoter', 'CMV intron'], Activation: 11.274
Token: CTATAG, Annotations: ['T7 promoter', 'CMV intron'], Activation: 11.255
Token: CTATAG, Annotations: ['UL126', 'CMV intron'], Activation: 11.242
Token: CTATAG, Annotations: ['HSPA14'], Activation: 11.181
Token: CTATAA, Annotations: ['ori'], Activation: 11.151
Token: CTATAG, Annotations: ['T7 promoter', 'CMV intron'], Activation: 11.067


  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 380 appears to detect: {'C9orf85'}
Top 20 activating tokens and their annotations:
Token: TGGGAT, Annotations: ['hGH poly(A) signal'], Activation: 16.334
Token: GGGATT, Annotations: ['C8orf17'], Activation: 14.145
Token: TGGGTT, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 12.053
Token: CTGGGA, Annotations: ['hGH poly(A) signal'], Activation: 9.247
Token: GGGTTC, Annotations: ['C9orf85'], Activation: 8.661
Token: GGGTTC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 5.329
Token: AGCTGG, Annotations: ['C9orf85'], Activation: 4.793
Token: TGGGAC, Annotations: [], Activation: 3.886
Token: CTAGAA, Annotations: ['CaMKII promoter'], Activation: 3.664
Token: GACTAC, Annotations: ['C9orf85'], Activation: 3.472
Token: GATTCC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 3.258
Token: GGATTC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 2.144
Token: GGCAAT, Annotations: ['EF-1α core promoter'], Activation: 1.921
Token: GGCAAT, Ann

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 416 appears to detect: {'dCas9'}
Top 20 activating tokens and their annotations:
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 21.682
Token: TTTAGA, Annotations: ['dCas9'], Activation: 20.420
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 18.965
Token: TTTATA, Annotations: ['dCas9'], Activation: 17.645
Token: TTTAGC, Annotations: ['dCas9'], Activation: 17.550
Token: CAAACA, Annotations: ['dCas9'], Activation: 17.174
Token: GTTTGT, Annotations: ['dCas9'], Activation: 16.908
Token: GCATTA, Annotations: ['dCas9'], Activation: 16.894
Token: TGTTAT, Annotations: ['dCas9'], Activation: 16.141
Token: GGGTTA, Annotations: ['leu1'], Activation: 15.946
Token: TTTTGA, Annotations: ['dCas9'], Activation: 15.941
Token: ATTTTC, Annotations: ['dCas9'], Activation: 15.540
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 15.473
Token: ATATAA, Annotations: ['dCas9'], Activation: 15.463
Token: TGAGAT, Annotations: ['dCas9'], Activation: 15.396


  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 429 appears to detect: {'AmpR'}
Top 20 activating tokens and their annotations:
Token: ATAGTT, Annotations: ['AmpR'], Activation: 11.528
Token: ATAGTT, Annotations: ['RPOL_BPT7'], Activation: 11.376
Token: ATAGTT, Annotations: ['AmpR'], Activation: 11.217
Token: ATAGTT, Annotations: ['AmpR'], Activation: 10.969
Token: ATAGTT, Annotations: ['bom'], Activation: 10.953
Token: ATAGTT, Annotations: ['AmpR'], Activation: 10.899
Token: ATAGTT, Annotations: ['AmpR'], Activation: 10.768
Token: ATAGTT, Annotations: ['bom'], Activation: 10.295
Token: ATAGTT, Annotations: ['AmpR'], Activation: 10.253
Token: ATAGTT, Annotations: ['AmpR'], Activation: 10.180
Token: ATAGTT, Annotations: ['AmpR'], Activation: 10.156
Token: ATAGTT, Annotations: ['CYC1 terminator'], Activation: 10.138
Token: ATAGTT, Annotations: ['CYC1 terminator'], Activation: 10.135
Token: ATAGTT, Annotations: ['AmpR'], Activation: 10.106
Token: ATAGTT, Annotations: ['AmpR'], Activation: 9.976
Token: ATAGTT, Annotations: ['Amp

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 452 appears to detect: {'RRE'}
Top 20 activating tokens and their annotations:
Token: GCAGAA, Annotations: ['bacterial terminator'], Activation: 11.548
Token: GCAGAA, Annotations: ['HELZ'], Activation: 11.467
Token: GCAGAA, Annotations: ['RRE', 'RRE', 'env'], Activation: 11.335
Token: GCAGAA, Annotations: ['ori'], Activation: 11.299
Token: GCAGAA, Annotations: ['ori'], Activation: 11.107
Token: GCAGAA, Annotations: ['RRE', 'RRE', 'env'], Activation: 11.072
Token: GCAGAA, Annotations: ['TY3B-G'], Activation: 10.991
Token: GCAGAA, Annotations: ['RRE', 'RRE', 'env'], Activation: 10.971
Token: GCAGAA, Annotations: [], Activation: 10.956
Token: GCAGAA, Annotations: ['ITSN1'], Activation: 10.938
Token: GCAGAA, Annotations: ['RRE', 'RRE', 'env'], Activation: 10.886
Token: GCAGAA, Annotations: ['RDX'], Activation: 10.885
Token: GCAGAA, Annotations: ['env'], Activation: 10.884
Token: GCAGAA, Annotations: ['ori'], Activation: 10.875
Token: GCAGAA, Annotations: ['lacZ'], Activation: 10.77

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 454 appears to detect: {'CMV enhancer', 'CMV promoter'}
Top 20 activating tokens and their annotations:
Token: AGGGAC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 10.043
Token: AGGGAC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 9.948
Token: AGGGAC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 9.872
Token: AGGGAC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 9.864
Token: AGGGAC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 9.836
Token: AGGGAC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 9.790
Token: AGGGAC, Annotations: ['loxP'], Activation: 9.615
Token: AGGGAC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 9.598
Token: TGGGAC, Annotations: ['CMV enhancer'], Activation: 9.573
Token: AGGGAC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 9.518
Token: AGGGAC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 9.502
Token: AGGGAC, Annotations: ['CMV enhancer', 'C

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 469 appears to detect: {'CMV enhancer', 'CMV promoter'}
Top 20 activating tokens and their annotations:
Token: CCATTG, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 17.472
Token: CCATTG, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 17.466
Token: CCATTG, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 17.353
Token: CCATTG, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 17.207
Token: CCATTG, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 17.175
Token: CCATTG, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 17.154
Token: CCATTG, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 17.128
Token: CCATTG, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 17.095
Token: CCATTG, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 17.039
Token: CCATTG, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 17.033
Token: CCATTG, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 17.0

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 475 appears to detect: {'KanR', 'kanMX'}
Top 20 activating tokens and their annotations:
Token: AGAATA, Annotations: ['TOPBP1'], Activation: 12.273
Token: GAATGA, Annotations: ['KanR', 'kanMX'], Activation: 10.842
Token: CAGAAT, Annotations: [], Activation: 10.682
Token: AAGAAT, Annotations: ['cat', 'PDK intron'], Activation: 10.583
Token: GGGAAT, Annotations: ['Dendra2'], Activation: 10.516
Token: AGAATA, Annotations: ['KanR', 'kanMX'], Activation: 10.460
Token: AGAATA, Annotations: ['KanR', 'kanMX'], Activation: 10.419
Token: GAATGA, Annotations: ['kanMX', 'KanR'], Activation: 10.409
Token: GAATGA, Annotations: ['kanMX', 'KanR'], Activation: 10.376
Token: AGAATA, Annotations: ['KanR', 'kanMX'], Activation: 10.224
Token: GAATGA, Annotations: ['KanR', 'kanMX'], Activation: 10.154
Token: GAATGA, Annotations: ['KanR', 'kanMX'], Activation: 10.147
Token: AGAATA, Annotations: ['luxB'], Activation: 10.141
Token: GAATGA, Annotations: ['KanR', 'kanMX'], Activation: 10.119
Token: GAATG

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 540 appears to detect: {'env'}
Top 20 activating tokens and their annotations:
Token: GAGAGA, Annotations: ['Ac5 promoter'], Activation: 17.506
Token: AGAGAG, Annotations: ['env'], Activation: 17.429
Token: GAGAGA, Annotations: ['env'], Activation: 17.215
Token: GAGAGA, Annotations: ['Ac5 promoter'], Activation: 16.924
Token: TCTCTC, Annotations: ['T7 promoter', 'CMV intron'], Activation: 16.399
Token: GAGAGA, Annotations: ['env'], Activation: 16.110
Token: GAGAGA, Annotations: ['env'], Activation: 16.088
Token: AGAGAG, Annotations: ['env'], Activation: 15.586
Token: GAGAGA, Annotations: ['env'], Activation: 15.442
Token: TCTCTC, Annotations: [], Activation: 14.108
Token: GAGAGA, Annotations: ['env'], Activation: 14.051
Token: GAGACA, Annotations: ['env'], Activation: 13.975
Token: GAGAGA, Annotations: [], Activation: 13.969
Token: GAGACA, Annotations: ['env'], Activation: 13.755
Token: CAGAGA, Annotations: ['env'], Activation: 13.539
Token: GAGAGA, Annotations: ['env'], Activa

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 541 appears to detect: {'C9orf85'}
Top 20 activating tokens and their annotations:
Token: TCTGTC, Annotations: ['C9orf85'], Activation: 17.763
Token: TCTCAC, Annotations: ['C9orf85'], Activation: 13.380
Token: TTCACC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 11.629
Token: TTCACC, Annotations: [], Activation: 10.493
Token: GCCCAG, Annotations: ['C9orf85'], Activation: 8.129
Token: TCACCA, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 7.930
Token: CTCCTT, Annotations: [], Activation: 4.918
Token: TTTGAG, Annotations: ['C9orf85'], Activation: 2.500
Token: AACCAA, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 2.346
Token: TCAAAA, Annotations: [], Activation: 2.175
Token: AACCCC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 1.901
Token: CAGGGA, Annotations: ['hGH poly(A) signal'], Activation: 1.772
Token: ATTGGG, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 1.605
Token: GAACCA, Annotations: ['hGH poly(A) si

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 654 appears to detect: {"5' LTR"}
Top 20 activating tokens and their annotations:
Token: AATAAA, Annotations: ["5' LTR"], Activation: 14.736
Token: ATAAAA, Annotations: ["5' LTR"], Activation: 14.203
Token: CAATAA, Annotations: ["5' LTR"], Activation: 13.887
Token: ATCGTG, Annotations: ["5' LTR"], Activation: 13.877
Token: TCGCCC, Annotations: ["5' LTR"], Activation: 13.773
Token: GAGCCC, Annotations: ["5' LTR"], Activation: 13.337
Token: CCAATC, Annotations: ['LTR'], Activation: 13.240
Token: GATTGA, Annotations: ["5' LTR"], Activation: 13.237
Token: TAAAAG, Annotations: ["5' LTR"], Activation: 13.121
Token: CCTCCG, Annotations: ["5' LTR"], Activation: 12.992
Token: AGAGCC, Annotations: ["5' LTR"], Activation: 12.909
Token: CGCCCG, Annotations: ["5' LTR"], Activation: 12.833
Token: ATCGTG, Annotations: ["5' LTR"], Activation: 12.820
Token: CCAATC, Annotations: ['LTR'], Activation: 12.756
Token: TCGCCC, Annotations: ["5' LTR"], Activation: 12.749
Token: AGTCCT, Annotations: ["5

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 703 appears to detect: {'ori'}
Top 20 activating tokens and their annotations:
Token: AGCTGG, Annotations: ['ori'], Activation: 10.748
Token: AGCTGG, Annotations: ['ori'], Activation: 10.648
Token: AGCTGG, Annotations: [], Activation: 10.589
Token: AGCTGG, Annotations: ['ori'], Activation: 10.285
Token: AGCTGG, Annotations: ['ori'], Activation: 10.273
Token: AGCTGG, Annotations: ['EGR1'], Activation: 10.270
Token: AGCTGG, Annotations: ['ori'], Activation: 10.222
Token: AGCTGG, Annotations: ['ori'], Activation: 10.207
Token: AGCTGG, Annotations: ['ori'], Activation: 10.198
Token: AGCTGG, Annotations: ['MFN1'], Activation: 10.129
Token: AGCTGG, Annotations: ['CUP1 promoter'], Activation: 10.094
Token: AGCTGG, Annotations: ['ori'], Activation: 10.039
Token: AGCTGG, Annotations: ['Cidec'], Activation: 9.981
Token: AGCTGG, Annotations: ['ori'], Activation: 9.950
Token: AGCTGG, Annotations: ['ori'], Activation: 9.881
Token: AGCTGG, Annotations: ['ori'], Activation: 9.866
Token: AGCTG

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 727 appears to detect: {'ori'}
Top 20 activating tokens and their annotations:
Token: GACTTG, Annotations: ['ori'], Activation: 10.655
Token: GACTTG, Annotations: ['ori'], Activation: 10.501
Token: GACTTG, Annotations: ['ori'], Activation: 10.405
Token: GACTTG, Annotations: ['ori'], Activation: 10.352
Token: GACTTG, Annotations: ['ori'], Activation: 10.333
Token: GACTTG, Annotations: ['ori'], Activation: 10.293
Token: GACTTG, Annotations: ['ori'], Activation: 10.275
Token: GACTTG, Annotations: ['ori'], Activation: 10.096
Token: GACTTG, Annotations: ['ori'], Activation: 10.050
Token: AACTTG, Annotations: ['gRNA scaffold', 'tracrRNA'], Activation: 10.046
Token: AACTTG, Annotations: ['gRNA scaffold', 'tracrRNA'], Activation: 10.038
Token: GACTTG, Annotations: ['ori'], Activation: 10.033
Token: AACTTG, Annotations: ['tracrRNA', 'tracrRNA'], Activation: 10.020
Token: GACTTG, Annotations: ['ori'], Activation: 9.974
Token: GACTTG, Annotations: ['ori'], Activation: 9.930
Token: GACTTG,

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 729 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: CAACGA, Annotations: ['CMV enhancer'], Activation: 19.346
Token: CAACGA, Annotations: ['CMV enhancer'], Activation: 19.304
Token: CAACGA, Annotations: ['CMV enhancer'], Activation: 19.004
Token: CAACGA, Annotations: ['CMV enhancer'], Activation: 18.956
Token: CAACGA, Annotations: ['CMV enhancer'], Activation: 18.888
Token: CAACGA, Annotations: ['CMV enhancer'], Activation: 18.870
Token: CAACGA, Annotations: ['CMV enhancer'], Activation: 18.817
Token: CAACGA, Annotations: ['CMV enhancer'], Activation: 18.812
Token: CAACGA, Annotations: ['CMV enhancer'], Activation: 18.708
Token: CAACGA, Annotations: ['CMV enhancer'], Activation: 18.664
Token: CAACGA, Annotations: ['CMV enhancer'], Activation: 18.573
Token: CAACGA, Annotations: ['CMV enhancer'], Activation: 18.571
Token: CAACGA, Annotations: ['CMV enhancer'], Activation: 18.450
Token: CAACGA, Annotations: ['CMV enhancer'], Activation: 1

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 772 appears to detect: {'AmpR promoter'}
Top 20 activating tokens and their annotations:
Token: ACAATA, Annotations: ['AmpR promoter'], Activation: 8.672
Token: ACAATA, Annotations: ['AmpR promoter'], Activation: 8.293
Token: ACAATA, Annotations: ['bGH poly(A) signal'], Activation: 8.196
Token: ACAATA, Annotations: ['AmpR promoter'], Activation: 8.186
Token: ACAATA, Annotations: ['bGH poly(A) signal'], Activation: 8.164
Token: ACAATA, Annotations: ['bGH poly(A) signal'], Activation: 8.131
Token: ACAATA, Annotations: [], Activation: 7.974
Token: ACAATA, Annotations: ['AmpR promoter'], Activation: 7.971
Token: ACAATA, Annotations: ['AmpR promoter'], Activation: 7.900
Token: ACAATA, Annotations: ['AmpR promoter'], Activation: 7.899
Token: ACAATA, Annotations: ['AmpR promoter'], Activation: 7.881
Token: ACAATA, Annotations: ['AmpR promoter'], Activation: 7.803
Token: ACAATA, Annotations: ['AmpR promoter'], Activation: 7.752
Token: ACAATA, Annotations: ['AmpR promoter'], Activation:

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 806 appears to detect: {'SV40 promoter'}
Top 20 activating tokens and their annotations:
Token: ATGCAA, Annotations: ['SV40 promoter'], Activation: 13.998
Token: ATGCAA, Annotations: ['SV40 promoter'], Activation: 13.803
Token: ATGCAA, Annotations: ['SV40 promoter'], Activation: 13.504
Token: ATGCAA, Annotations: ['SV40 promoter'], Activation: 13.380
Token: ATGCAA, Annotations: ['SV40 promoter'], Activation: 13.166
Token: ATGCAA, Annotations: ['SV40 promoter'], Activation: 13.034
Token: ATGCAA, Annotations: ['SV40 promoter'], Activation: 12.690
Token: TATGCA, Annotations: ['SV40 promoter'], Activation: 12.685
Token: TATGCA, Annotations: ['SV40 promoter'], Activation: 12.681
Token: TATGCA, Annotations: ['SV40 promoter'], Activation: 12.577
Token: ATGCAA, Annotations: ['SV40 promoter'], Activation: 12.463
Token: ATGCAA, Annotations: ['SV40 promoter'], Activation: 12.391
Token: TATGCA, Annotations: ['SV40 promoter'], Activation: 12.364
Token: TATGCA, Annotations: ['SV40 promoter']

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 808 appears to detect: {"5' LTR"}
Top 20 activating tokens and their annotations:
Token: TAAAGC, Annotations: ["5' LTR"], Activation: 32.437
Token: TAAAGC, Annotations: ["5' LTR"], Activation: 31.762
Token: TAAAGC, Annotations: ["5' LTR"], Activation: 31.026
Token: AAGCCT, Annotations: ["5' LTR"], Activation: 30.720
Token: ATAAAG, Annotations: ["5' LTR"], Activation: 30.212
Token: AAAGCC, Annotations: ["5' LTR"], Activation: 30.081
Token: AAAGCC, Annotations: ["5' LTR"], Activation: 29.663
Token: AAGCCT, Annotations: ["5' LTR (truncated)"], Activation: 27.719
Token: AATAAA, Annotations: ['LTR'], Activation: 27.289
Token: AAGCCT, Annotations: ["5' LTR"], Activation: 27.199
Token: AATAAA, Annotations: ['LTR'], Activation: 27.095
Token: TAAAGC, Annotations: ["5' LTR (truncated)"], Activation: 27.016
Token: CAATAA, Annotations: ['LTR'], Activation: 26.866
Token: AAGCCT, Annotations: ["5' LTR"], Activation: 26.703
Token: CAATAA, Annotations: ['LTR'], Activation: 25.481
Token: AATAAA

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 823 appears to detect: {'SV40 promoter', 'SV40 ori', 'SELP_SV40'}
Top 20 activating tokens and their annotations:
Token: GCAGAG, Annotations: ['SV40 ori', 'SV40 promoter', 'SELP_SV40'], Activation: 11.130
Token: AGCAGA, Annotations: ['ori', 'RNAI'], Activation: 10.961
Token: AGCAGA, Annotations: ['ori', 'RNAI'], Activation: 10.896
Token: AGCAGA, Annotations: ['ori', 'RNAI'], Activation: 10.834
Token: GCAGAG, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 10.778
Token: GCAGAG, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 10.744
Token: GCAGAG, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 10.726
Token: GCAGAG, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 10.487
Token: GCAGAG, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 10.422
Token: GCAGAG, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 10.387
Token: AGCAGA, Annotations: ['ori', 'RNAI'], Acti

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 829 appears to detect: {'capTEV™'}
Top 20 activating tokens and their annotations:
Token: CATCAT, Annotations: ['capTEV™'], Activation: 19.956
Token: CATCAT, Annotations: ['capTEV™'], Activation: 19.369
Token: CCACCA, Annotations: ['9xHis'], Activation: 18.926
Token: CATCAT, Annotations: ['capTEV™'], Activation: 18.620
Token: TCATCA, Annotations: ['capTEV™'], Activation: 18.384
Token: ATCATC, Annotations: ['GCaMP6f'], Activation: 18.376
Token: ATCATC, Annotations: ['GCaMP6m'], Activation: 18.329
Token: CATCAT, Annotations: ['capTEV™'], Activation: 17.684
Token: CATCAT, Annotations: ['capTEV™'], Activation: 17.652
Token: AGAAGA, Annotations: [], Activation: 17.171
Token: ATCATC, Annotations: ['capTEV™'], Activation: 17.125
Token: GCAGCA, Annotations: ['KA2'], Activation: 17.121
Token: GCAGCA, Annotations: [], Activation: 17.042
Token: AGAAGA, Annotations: [], Activation: 17.008
Token: CATCAT, Annotations: ['capTEV™'], Activation: 16.851
Token: GCAGCA, Annotations: ['KA2'], Activ

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 847 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: CTTATG, Annotations: ['CMV enhancer'], Activation: 25.070
Token: CTTATG, Annotations: ['CMV enhancer'], Activation: 24.788
Token: CTTATG, Annotations: ['CMV enhancer'], Activation: 24.385
Token: CTTATG, Annotations: ['CMV enhancer'], Activation: 24.266
Token: CTTATG, Annotations: ['CMV enhancer'], Activation: 24.234
Token: CTTATG, Annotations: ['CMV enhancer'], Activation: 24.131
Token: CTTATG, Annotations: ['CMV enhancer'], Activation: 24.073
Token: CTTATG, Annotations: ['CMV enhancer'], Activation: 24.062
Token: CTTATG, Annotations: ['CMV enhancer'], Activation: 23.930
Token: CTTATG, Annotations: ['CMV enhancer'], Activation: 23.907
Token: CTTATG, Annotations: ['CMV enhancer'], Activation: 23.878
Token: CTTATG, Annotations: ['CMV enhancer'], Activation: 23.849
Token: CTTATG, Annotations: ['CMV enhancer'], Activation: 23.670
Token: CTTATG, Annotations: ['CMV enhancer'], Activation: 2

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 877 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: GGCAGT, Annotations: ['CMV enhancer'], Activation: 19.225
Token: GGCAGT, Annotations: ['CMV enhancer'], Activation: 19.192
Token: GGCAGT, Annotations: ['CMV enhancer'], Activation: 19.112
Token: GGCAGT, Annotations: ['CMV enhancer'], Activation: 19.088
Token: GGCAGT, Annotations: ['CMV enhancer'], Activation: 19.046
Token: GGCAGT, Annotations: ['CMV enhancer'], Activation: 18.924
Token: GGCAGT, Annotations: ['CMV enhancer'], Activation: 18.871
Token: GGCAGT, Annotations: ['CMV enhancer'], Activation: 18.821
Token: GGCAGT, Annotations: ['CMV enhancer'], Activation: 18.779
Token: GGCAGT, Annotations: ['CMV enhancer'], Activation: 18.684
Token: GGCAGT, Annotations: ['CMV enhancer'], Activation: 18.623
Token: GGCAGT, Annotations: ['CMV enhancer'], Activation: 18.612
Token: GGCAGT, Annotations: ['CMV enhancer'], Activation: 18.611
Token: GGCAGT, Annotations: ['CMV enhancer'], Activation: 1

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 887 appears to detect: {'C9orf85'}
Top 20 activating tokens and their annotations:
Token: ACCCAG, Annotations: [], Activation: 13.423
Token: AAGATT, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 12.924
Token: TGTGTC, Annotations: ['C16orf89'], Activation: 11.693
Token: AACCCC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 11.663
Token: TGAAAC, Annotations: ['C16orf89'], Activation: 11.567
Token: GAACCC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 10.442
Token: AACCCT, Annotations: ['C16orf89'], Activation: 8.878
Token: GTCTCT, Annotations: ['C16orf89'], Activation: 8.130
Token: TGAGCC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 7.517
Token: GTGCCA, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 7.168
Token: GATTCC, Annotations: ['tRNA'], Activation: 6.848
Token: CGCAAC, Annotations: ['C16orf89'], Activation: 6.433
Token: ATGCCT, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 6.086
Token: AATT

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 890 appears to detect: {'AmpR'}
Top 20 activating tokens and their annotations:
Token: CGCCGG, Annotations: ['lacZ'], Activation: 12.561
Token: CGCCGG, Annotations: ['lacZ'], Activation: 12.308
Token: CGCCGG, Annotations: ['FZD10'], Activation: 12.051
Token: CGCCGG, Annotations: ['mEmerald'], Activation: 11.857
Token: CGCCGG, Annotations: ['mGreenLantern'], Activation: 11.841
Token: CGCCGG, Annotations: ['AmpR'], Activation: 11.666
Token: CGCCGG, Annotations: ['EGFP'], Activation: 11.665
Token: CGCCGG, Annotations: ['AmpR'], Activation: 11.646
Token: CGCCGG, Annotations: ['AmpR'], Activation: 11.622
Token: CGCCGG, Annotations: ['AmpR'], Activation: 11.581
Token: CGCCGG, Annotations: ['EGFP'], Activation: 11.414
Token: CGCCGG, Annotations: ['AmpR'], Activation: 11.394
Token: CGCCGG, Annotations: ['TcR'], Activation: 11.380
Token: CGCCGG, Annotations: ['AmpR'], Activation: 11.380
Token: CGCCGG, Annotations: ['EGFP'], Activation: 11.269
Token: CGCCGG, Annotations: ['AmpR'], Activa

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 895 appears to detect: {'ori', 'RNAI'}
Top 20 activating tokens and their annotations:
Token: TTTGTT, Annotations: ['ori', 'RNAI'], Activation: 14.829
Token: TTTGTT, Annotations: ['TY3B-I'], Activation: 13.357
Token: TTTGTT, Annotations: ['ori', 'RNAI'], Activation: 13.019
Token: TTTGTT, Annotations: ['ori', 'RNAI'], Activation: 12.984
Token: TTTGTT, Annotations: ['ori', 'RNAI'], Activation: 12.961
Token: TTTGTT, Annotations: ['ori', 'RNAI'], Activation: 12.833
Token: TTTGTT, Annotations: ['ori', 'RNAI'], Activation: 12.793
Token: TTTGTT, Annotations: ['ori', 'RNAI'], Activation: 12.751
Token: TTTGTT, Annotations: ['ori', 'RNAI'], Activation: 12.633
Token: TTTGTT, Annotations: [], Activation: 12.411
Token: TTTGTT, Annotations: ['MFN1'], Activation: 12.399
Token: TTTGTT, Annotations: ['ori', 'RNAI'], Activation: 12.388
Token: TTTGTT, Annotations: ['ori', 'RNAI'], Activation: 12.328
Token: TTTGTT, Annotations: ['luxA'], Activation: 12.323
Token: TTTGTT, Annotations: ['ori', 'RNAI

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 908 appears to detect: {'chimeric intron'}
Top 20 activating tokens and their annotations:
Token: CGTGCG, Annotations: ['chimeric intron'], Activation: 14.393
Token: GCCGGG, Annotations: ['chimeric intron'], Activation: 14.164
Token: CCGGGG, Annotations: ['chimeric intron'], Activation: 13.910
Token: CGCGGG, Annotations: ['chimeric intron'], Activation: 13.904
Token: CCGGGG, Annotations: ['chimeric intron'], Activation: 13.770
Token: GCGGCG, Annotations: ['chicken β-actin promoter'], Activation: 13.635
Token: GCGGCG, Annotations: ['chimeric intron', 'chicken β-actin promoter'], Activation: 13.579
Token: CTGCGG, Annotations: ['chimeric intron'], Activation: 13.457
Token: CGAGGG, Annotations: ['chicken β-actin promoter'], Activation: 13.447
Token: CGTGCG, Annotations: ['chimeric intron'], Activation: 13.436
Token: GGCGAG, Annotations: ['chicken β-actin promoter'], Activation: 13.428
Token: GCGCGG, Annotations: ['chimeric intron'], Activation: 13.411
Token: CGGGCG, Annotations: ['

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 929 appears to detect: {'ori', 'RNAI'}
Top 20 activating tokens and their annotations:
Token: CGCAAC, Annotations: ['C16orf89'], Activation: 14.350
Token: CTCGAG, Annotations: ['9xHis'], Activation: 10.277
Token: AAAAAC, Annotations: ['ori', 'RNAI'], Activation: 10.256
Token: CTCGAG, Annotations: ['9xHis'], Activation: 9.771
Token: AAAAAC, Annotations: ['ori', 'RNAI'], Activation: 9.474
Token: AAAAAC, Annotations: ['ori', 'RNAI'], Activation: 9.104
Token: GAGCAC, Annotations: ['9xHis'], Activation: 8.934
Token: AAAAAC, Annotations: ['ori', 'RNAI'], Activation: 8.862
Token: AAAAAC, Annotations: ['ori', 'RNAI'], Activation: 8.738
Token: AAAAAC, Annotations: ['ori', 'RNAI'], Activation: 8.606
Token: AAAAAC, Annotations: ['ori', 'RNAI'], Activation: 8.579
Token: AAAAAC, Annotations: ['ori', 'RNAI'], Activation: 8.393
Token: AAAAAC, Annotations: ['ori', 'RNAI'], Activation: 8.252
Token: AAAAAC, Annotations: ['ori', 'RNAI'], Activation: 8.099
Token: AAAAAC, Annotations: ['ori', 'RNAI

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 946 appears to detect: {'PuroR'}
Top 20 activating tokens and their annotations:
Token: CGACTA, Annotations: ['PuroR'], Activation: 11.779
Token: CGACTA, Annotations: ['PuroR'], Activation: 11.647
Token: CGACTA, Annotations: ['PuroR'], Activation: 11.561
Token: CGACTA, Annotations: ['PuroR'], Activation: 11.458
Token: CGACTA, Annotations: ['PuroR'], Activation: 11.428
Token: CGACTA, Annotations: ['PuroR'], Activation: 11.116
Token: CGTCGA, Annotations: ['PuroR'], Activation: 11.106
Token: CGACTA, Annotations: ['PuroR'], Activation: 11.073
Token: CGTCGA, Annotations: ['PuroR'], Activation: 11.070
Token: CGTCGA, Annotations: ['PuroR'], Activation: 10.911
Token: CGTCGA, Annotations: ['PuroR'], Activation: 10.855
Token: CGTCGA, Annotations: ['PuroR'], Activation: 10.847
Token: CGAGTA, Annotations: ['PuroR'], Activation: 10.781
Token: CGAGTA, Annotations: ['PuroR'], Activation: 10.769
Token: CGTCGA, Annotations: ['PuroR'], Activation: 10.753
Token: CACGCG, Annotations: ['PuroR'], Ac

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 954 appears to detect: {'ori'}
Top 20 activating tokens and their annotations:
Token: GTGCAC, Annotations: ['ori'], Activation: 12.229
Token: GTGCAC, Annotations: ['ori'], Activation: 12.187
Token: GTGCAC, Annotations: ['ori'], Activation: 12.164
Token: GTGCAC, Annotations: ['ori'], Activation: 12.149
Token: GTGCAC, Annotations: ['ori'], Activation: 12.077
Token: GTGCAC, Annotations: ['ori'], Activation: 12.060
Token: GTGCAC, Annotations: ['ori'], Activation: 11.999
Token: GTGCAC, Annotations: ['ori'], Activation: 11.991
Token: GTGCAC, Annotations: ['ori'], Activation: 11.617
Token: GTGCAC, Annotations: ['ori'], Activation: 11.543
Token: GTGCAC, Annotations: ['WNT2B'], Activation: 11.291
Token: GTGCAC, Annotations: ['ori'], Activation: 11.239
Token: GTGCAC, Annotations: ['ori'], Activation: 10.998
Token: GTGCAC, Annotations: ['ori'], Activation: 10.975
Token: GTGCAC, Annotations: ['AmpR'], Activation: 10.925
Token: GTGCAC, Annotations: ['ori'], Activation: 10.870
Token: GTGCAC,

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 957 appears to detect: {'SmR'}
Top 20 activating tokens and their annotations:
Token: ACATCA, Annotations: ['SmR'], Activation: 22.688
Token: ACATCA, Annotations: ['SmR'], Activation: 22.467
Token: ACATCA, Annotations: ['SmR'], Activation: 20.774
Token: ACATCA, Annotations: ['SmR'], Activation: 20.001
Token: ACATCA, Annotations: ['SmR'], Activation: 19.966
Token: ACATCA, Annotations: ['SmR'], Activation: 19.956
Token: ACATCA, Annotations: ['SmR'], Activation: 19.288
Token: AAGTCA, Annotations: ['SmR'], Activation: 13.904
Token: AAGTCA, Annotations: ['SmR'], Activation: 13.840
Token: AAGTCA, Annotations: ['SmR'], Activation: 13.463
Token: AAGTCA, Annotations: ['SmR'], Activation: 12.465
Token: AAGTCA, Annotations: ['SmR'], Activation: 12.459
Token: AAGTCA, Annotations: ['SmR'], Activation: 12.177
Token: AAGTCA, Annotations: ['SmR'], Activation: 12.158
Token: ACATTC, Annotations: [], Activation: 11.247
Token: TTCCGT, Annotations: ['SmR'], Activation: 9.860
Token: TTCCGT, Annotati

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 962 appears to detect: {'SV40 promoter', 'SV40 ori', 'SELP_SV40'}
Top 20 activating tokens and their annotations:
Token: TTTATT, Annotations: ['chicken β-actin promoter'], Activation: 16.153
Token: TTTATT, Annotations: ['chicken β-actin promoter'], Activation: 14.968
Token: TTTATT, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 14.879
Token: TTATTT, Annotations: ['chicken β-actin promoter'], Activation: 14.496
Token: TTTATT, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 14.401
Token: TTTATT, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 14.249
Token: TTTATT, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 14.248
Token: TTTATT, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 14.179
Token: TTATTT, Annotations: ['chicken β-actin promoter'], Activation: 14.172
Token: TTTATT, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 14.154
Token: TTTATT, Annotatio

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 967 appears to detect: {'CMV enhancer', 'CMV IE94 promoter'}
Top 20 activating tokens and their annotations:
Token: CGTCAA, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 18.459
Token: CGTCAA, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 18.458
Token: CGTCAA, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 18.292
Token: CGTCAA, Annotations: ['CMV promoter'], Activation: 18.216
Token: CGTCAA, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 18.182
Token: CGTCAA, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 18.149
Token: CGTCAA, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 18.019
Token: CGTCAA, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 18.018
Token: CGTCAA, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 17.992
Token: CGTCAA, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 17.942
Token: CGTCAA, Annotations: ['CMV enhancer',

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 990 appears to detect: {'AmpR promoter'}
Top 20 activating tokens and their annotations:
Token: AAATGT, Annotations: ['AmpR promoter'], Activation: 11.098
Token: AAATGT, Annotations: ['AmpR promoter'], Activation: 10.891
Token: AAATGT, Annotations: ['AmpR promoter'], Activation: 10.880
Token: AATGTG, Annotations: ['SV40 promoter'], Activation: 10.851
Token: AAATGT, Annotations: ['AmpR promoter'], Activation: 10.849
Token: AAATGT, Annotations: ['AmpR promoter'], Activation: 10.726
Token: AAATGT, Annotations: ['AmpR promoter'], Activation: 10.647
Token: AAATGT, Annotations: ['AmpR promoter'], Activation: 10.576
Token: AAATGT, Annotations: [], Activation: 10.469
Token: AAATGT, Annotations: ['AmpR promoter'], Activation: 10.380
Token: AATGTG, Annotations: ['AmpR promoter'], Activation: 10.246
Token: AAATGT, Annotations: ['AmpR promoter'], Activation: 10.227
Token: AAATGT, Annotations: ['AmpR promoter'], Activation: 10.183
Token: AAATGT, Annotations: ['AmpR promoter'], Activation: 1

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 1001 appears to detect: {'f1 ori'}
Top 20 activating tokens and their annotations:
Token: AAAACT, Annotations: ['f1 ori'], Activation: 10.720
Token: AAAACT, Annotations: ['f1 ori'], Activation: 10.208
Token: AAAACT, Annotations: ['f1 ori'], Activation: 9.628
Token: AAAACT, Annotations: ['f1 ori'], Activation: 9.375
Token: AAAACT, Annotations: ['f1 ori'], Activation: 9.319
Token: AAAACT, Annotations: ['f1 ori'], Activation: 8.933
Token: AAAACT, Annotations: ['f1 ori'], Activation: 8.666
Token: AAAACT, Annotations: [], Activation: 8.586
Token: AAAACT, Annotations: ['f1 ori'], Activation: 8.568
Token: AAAACT, Annotations: ['f1 ori'], Activation: 8.565
Token: AAAACT, Annotations: ['f1 ori'], Activation: 8.563
Token: AAAACT, Annotations: ['f1 ori'], Activation: 8.514
Token: AAAACT, Annotations: ['f1 ori'], Activation: 8.508
Token: AAAACT, Annotations: ['f1 ori'], Activation: 8.499
Token: AAAACT, Annotations: ['f1 ori'], Activation: 8.348
Token: AAAACT, Annotations: ['f1 ori'], Activ

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1025 appears to detect: {'CMV enhancer', 'CMV IE94 promoter'}
Top 20 activating tokens and their annotations:
Token: CCCTAT, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 12.123
Token: CCCTAT, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 11.934
Token: CCCTAT, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 11.901
Token: CCCTAT, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 11.797
Token: CCCTAT, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 11.498
Token: CCCTAT, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 11.491
Token: CCCTAT, Annotations: ['B4GALT1'], Activation: 11.452
Token: CCCTAT, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 11.099
Token: CCCTAT, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 11.013
Token: CCCTAT, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 11.008
Token: CCCTAT, Annotations: ['CMV enhancer', 'CM

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1041 appears to detect: {'C9orf85', 'hGH poly(A) signal'}
Top 20 activating tokens and their annotations:
Token: TTCACC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 13.323
Token: TTCACC, Annotations: [], Activation: 11.681
Token: TCACCA, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 9.163
Token: AACCAA, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 7.843
Token: TCTCAC, Annotations: ['C9orf85'], Activation: 6.595
Token: TTTGAG, Annotations: ['C9orf85'], Activation: 2.952
Token: GTTTTG, Annotations: [], Activation: 1.782
Token: GCTGGA, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 1.682
Token: GCAATT, Annotations: ['hGH poly(A) signal'], Activation: 1.500
Token: ACAGAG, Annotations: ['C9orf85'], Activation: 1.197
Token: TCGATC, Annotations: ['C8orf17'], Activation: 1.183
Token: GGGTCT, Annotations: ['hGH poly(A) signal'], Activation: 1.053
Token: GCACTT, Annotations: ['C16orf89'], Activation: 0.797
Token: CCTGCG, Annotatio

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1062 appears to detect: {'SV40 promoter'}
Top 20 activating tokens and their annotations:
Token: GGAAAG, Annotations: ['SV40 promoter'], Activation: 13.175
Token: GGAAAG, Annotations: ['SV40 promoter'], Activation: 12.832
Token: GGAAAG, Annotations: ['SV40 promoter'], Activation: 12.300
Token: GGAAAG, Annotations: ['SV40 promoter'], Activation: 12.092
Token: GGAAAG, Annotations: ['SV40 promoter'], Activation: 12.007
Token: GGAAAG, Annotations: ['avrBs3'], Activation: 11.968
Token: GGAAAG, Annotations: ['SV40 promoter'], Activation: 11.924
Token: GGAAAG, Annotations: ['SV40 promoter'], Activation: 11.851
Token: GGAAAG, Annotations: [], Activation: 11.764
Token: GGAAAG, Annotations: [], Activation: 11.636
Token: GGAAAG, Annotations: ['SV40 promoter'], Activation: 11.565
Token: GGAAAG, Annotations: ['avrXa10'], Activation: 11.403
Token: GGAAAG, Annotations: ['SV40 promoter'], Activation: 11.260
Token: GGAAAG, Annotations: ['SV40 promoter'], Activation: 11.229
Token: GGAAAG, Annota

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1136 appears to detect: {'AmpR promoter'}
Top 20 activating tokens and their annotations:
Token: TACAAT, Annotations: ['Ubc4'], Activation: 11.826
Token: GCGGAA, Annotations: ['AmpR promoter'], Activation: 11.139
Token: TCTAGG, Annotations: ['IRES'], Activation: 10.981
Token: GCGGAA, Annotations: ['AmpR promoter'], Activation: 10.851
Token: GCGGAA, Annotations: ['AmpR promoter'], Activation: 10.812
Token: GCGGAA, Annotations: ['AmpR promoter'], Activation: 10.803
Token: GCGGAA, Annotations: ['AmpR promoter'], Activation: 10.799
Token: GCGGAA, Annotations: ['AmpR promoter'], Activation: 10.705
Token: N, Annotations: [], Activation: 10.555
Token: CGGGCG, Annotations: ['UbC promoter'], Activation: 10.528
Token: TTACTG, Annotations: ['Cdh3'], Activation: 10.338
Token: GCGGAA, Annotations: ['AmpR promoter'], Activation: 10.272
Token: AGGGTG, Annotations: ['LTR'], Activation: 10.229
Token: GTTCAG, Annotations: ['ori'], Activation: 10.222
Token: <cls>, Annotations: ['special token: <c

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 1144 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: GATTAT, Annotations: ['CMV enhancer'], Activation: 13.222
Token: GATTAT, Annotations: ['CMV enhancer'], Activation: 13.173
Token: GATTAT, Annotations: ['CMV enhancer'], Activation: 12.949
Token: GATTAT, Annotations: ['CMV enhancer'], Activation: 12.463
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 12.446
Token: GATTAT, Annotations: ['CMV enhancer'], Activation: 12.411
Token: GATTAT, Annotations: ['CMV enhancer'], Activation: 12.377
Token: GATTAT, Annotations: ['CMV enhancer'], Activation: 12.367
Token: GATTAT, Annotations: ['CMV enhancer'], Activation: 12.158
Token: GATTAT, Annotations: ['CMV enhancer'], Activation: 12.126
Token: ATTGAC, Annotations: ['CMV enhancer'], Activation: 11.984
Token: TGACTA, Annotations: ['CMV enhancer'], Activation: 11.843
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 11.656
Token: GATTAT, Annotations: ['CMV enhancer']

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1182 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: CATTGA, Annotations: ['CMV enhancer'], Activation: 22.039
Token: CATTGA, Annotations: ['CMV enhancer'], Activation: 21.855
Token: CATTGA, Annotations: ['CMV enhancer'], Activation: 21.733
Token: CATTGA, Annotations: ['CMV enhancer'], Activation: 21.545
Token: CATTGA, Annotations: ['CMV enhancer'], Activation: 21.485
Token: CATTGA, Annotations: ['CMV enhancer'], Activation: 21.423
Token: CATTGA, Annotations: ['CMV enhancer'], Activation: 21.372
Token: CATTGA, Annotations: ['CMV enhancer'], Activation: 21.362
Token: CATTGA, Annotations: ['CMV enhancer'], Activation: 21.327
Token: CATTGA, Annotations: ['CMV enhancer'], Activation: 21.257
Token: CATTGA, Annotations: ['CMV enhancer'], Activation: 21.223
Token: CATTGA, Annotations: ['CMV enhancer'], Activation: 21.166
Token: CATTGA, Annotations: ['CMV enhancer'], Activation: 21.165
Token: CATTGA, Annotations: ['CMV enhancer'], Activation: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1239 appears to detect: {'chicken β-actin promoter'}
Top 20 activating tokens and their annotations:
Token: TATTTA, Annotations: ['chicken β-actin promoter'], Activation: 22.277
Token: TATTTA, Annotations: [], Activation: 22.216
Token: TTATTT, Annotations: ['chicken β-actin promoter'], Activation: 22.109
Token: TATTTT, Annotations: ['chicken β-actin promoter'], Activation: 21.918
Token: ATTTAT, Annotations: ['chicken β-actin promoter'], Activation: 21.723
Token: TATTTA, Annotations: ['chicken β-actin promoter'], Activation: 21.042
Token: TTTATT, Annotations: ['chicken β-actin promoter'], Activation: 20.656
Token: TATTTT, Annotations: ['chicken β-actin promoter'], Activation: 20.309
Token: ATTTTT, Annotations: ['chicken β-actin promoter'], Activation: 20.090
Token: TATTTA, Annotations: ['chicken β-actin promoter'], Activation: 19.838
Token: TTATTT, Annotations: ['chicken β-actin promoter'], Activation: 19.700
Token: ATTTTT, Annotations: ['chicken β-actin promoter'], Activation: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 1240 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: TGACCT, Annotations: ['CMV enhancer'], Activation: 18.946
Token: TGACCT, Annotations: ['CMV enhancer'], Activation: 18.934
Token: TGACCT, Annotations: ['CMV enhancer'], Activation: 18.797
Token: TGACCT, Annotations: ['CMV enhancer'], Activation: 18.627
Token: TGACCT, Annotations: ['CMV enhancer'], Activation: 18.573
Token: TGACCT, Annotations: ['CMV enhancer'], Activation: 18.571
Token: TGACCT, Annotations: ['CMV enhancer'], Activation: 18.552
Token: TGACCT, Annotations: ['CMV enhancer'], Activation: 18.545
Token: TGACCT, Annotations: ['CMV enhancer'], Activation: 18.392
Token: TGACCT, Annotations: ['CMV enhancer'], Activation: 18.249
Token: TGACCT, Annotations: ['CMV enhancer'], Activation: 18.248
Token: TGACCT, Annotations: ['CMV enhancer'], Activation: 18.192
Token: TGACCT, Annotations: ['CMV enhancer'], Activation: 18.148
Token: TGACCT, Annotations: ['CMV enhancer'], Activation: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 1251 appears to detect: {'CMV enhancer', 'CMV promoter'}
Top 20 activating tokens and their annotations:
Token: CTTTCC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 12.326
Token: CTTTCC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 12.283
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 12.238
Token: CTTTCC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 12.156
Token: CTTTCC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 11.861
Token: CTTTCC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 11.849
Token: CTTTCC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 11.791
Token: CTTTCC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 11.789
Token: CTTTCC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 11.776
Token: CTTTCC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 11.685
Token: CTTTCC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 11.628
Token

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1264 appears to detect: {'env'}
Top 20 activating tokens and their annotations:
Token: GTGGAG, Annotations: ['env'], Activation: 11.742
Token: TGGTGC, Annotations: ['RRE', 'env'], Activation: 11.182
Token: GTGGAG, Annotations: ['env'], Activation: 11.165
Token: TGGTGC, Annotations: ['RRE', 'env'], Activation: 11.074
Token: TGGTGC, Annotations: ['RRE', 'env'], Activation: 11.036
Token: TGGTGC, Annotations: ['RRE', 'env'], Activation: 10.785
Token: AGACAT, Annotations: ['cPPT/CTS', 'gag-pol'], Activation: 10.750
Token: TGGTGC, Annotations: ['RRE', 'env'], Activation: 10.738
Token: AGACAT, Annotations: ['cPPT/CTS', 'gag-pol'], Activation: 10.684
Token: GTGCAG, Annotations: ['RRE', 'env'], Activation: 10.558
Token: CAACAG, Annotations: ['cPPT/CTS', 'gag-pol'], Activation: 10.341
Token: GTGCAG, Annotations: ['RRE', 'env'], Activation: 10.330
Token: ACATAC, Annotations: ['cPPT/CTS', 'gag-pol'], Activation: 10.316
Token: ACAAAC, Annotations: ['cPPT/CTS', 'gag-pol'], Activation: 10.304

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 1265 appears to detect: {'SV40 promoter'}
Top 20 activating tokens and their annotations:
Token: CTCCCC, Annotations: ['SV40 promoter'], Activation: 16.402
Token: CTCCCC, Annotations: ['SV40 promoter'], Activation: 15.336
Token: CTCCCC, Annotations: ['SV40 promoter'], Activation: 15.070
Token: CTCCCC, Annotations: ['SV40 promoter'], Activation: 14.913
Token: CTCCCC, Annotations: ['SV40 promoter'], Activation: 14.321
Token: CTCCCC, Annotations: ['SV40 promoter'], Activation: 14.207
Token: CTCCCC, Annotations: ['SV40 promoter'], Activation: 13.264
Token: CTCCCC, Annotations: ['SV40 promoter'], Activation: 13.170
Token: CTCCCC, Annotations: ['SV40 promoter'], Activation: 13.086
Token: CTCCCC, Annotations: ['SV40 promoter'], Activation: 12.724
Token: CTCCCC, Annotations: ['SV40 promoter'], Activation: 12.312
Token: GCTCCC, Annotations: ['SV40 promoter'], Activation: 11.634
Token: GCTCCC, Annotations: ['SV40 promoter'], Activation: 11.560
Token: CTCCCC, Annotations: ['SV40 promoter'

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1279 appears to detect: {'f1 ori'}
Top 20 activating tokens and their annotations:
Token: GTGTAG, Annotations: ['f1 ori'], Activation: 11.447
Token: TAGCGG, Annotations: ['f1 ori'], Activation: 10.667
Token: TAGCGG, Annotations: ['f1 ori'], Activation: 10.442
Token: GGGCGC, Annotations: ['f1 ori'], Activation: 10.380
Token: GGGCGC, Annotations: ['f1 ori'], Activation: 10.315
Token: GGGCGC, Annotations: ['f1 ori'], Activation: 10.203
Token: GGTCAC, Annotations: ['f1 ori'], Activation: 10.103
Token: GTGTAG, Annotations: ['f1 ori'], Activation: 10.099
Token: CTACAC, Annotations: ['f1 ori'], Activation: 9.982
Token: TAGCGG, Annotations: ['f1 ori'], Activation: 9.977
Token: TGTAGC, Annotations: ['f1 ori'], Activation: 9.890
Token: CGCCCT, Annotations: ['f1 ori'], Activation: 9.851
Token: GGGCGC, Annotations: ['f1 ori'], Activation: 9.748
Token: CAAGTG, Annotations: ['f1 ori'], Activation: 9.742
Token: GTAGCG, Annotations: ['f1 ori'], Activation: 9.722
Token: CAAGTG, Annotations: ['f

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1336 appears to detect: {'chimeric intron'}
Top 20 activating tokens and their annotations:
Token: TAATTA, Annotations: ['chimeric intron'], Activation: 12.772
Token: GGACGG, Annotations: ['chimeric intron'], Activation: 9.721
Token: CCCTTC, Annotations: ['chimeric intron'], Activation: 8.858
Token: GCGGGA, Annotations: ['chimeric intron'], Activation: 7.677
Token: GCGGGA, Annotations: ['chimeric intron'], Activation: 7.661
Token: CGGGCG, Annotations: ['chimeric intron'], Activation: 7.528
Token: AAAGGC, Annotations: [], Activation: 7.379
Token: TTTGTG, Annotations: ['chimeric intron'], Activation: 7.344
Token: TCCTCC, Annotations: ['chimeric intron'], Activation: 7.270
Token: GCGGGA, Annotations: ['chimeric intron'], Activation: 7.257
Token: GCCTTC, Annotations: ['rrnB T2 terminator'], Activation: 7.212
Token: GCGCTT, Annotations: ['chimeric intron'], Activation: 7.031
Token: CGGGAC, Annotations: ['chimeric intron'], Activation: 7.030
Token: AAAGGC, Annotations: [], Activation

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1366 appears to detect: {'CMV promoter', 'UL126'}
Top 20 activating tokens and their annotations:
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 14.747
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 14.289
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 14.288
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 14.282
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 13.957
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 13.937
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 13.894
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 13.793
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 13.730
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 13.581
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 13.566
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 13.561
Token:

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1400 appears to detect: {'Cas9(N)'}
Top 20 activating tokens and their annotations:
Token: CTGCTG, Annotations: ['Cas9(N)'], Activation: 12.179
Token: AAAGTG, Annotations: ['Cas9(N)'], Activation: 11.887
Token: CACATG, Annotations: ['Cas9(N)'], Activation: 11.114
Token: GTGTAT, Annotations: ['Cas9(N)'], Activation: 10.892
Token: CTGCCC, Annotations: ['Cas9(N)'], Activation: 10.828
Token: GCCGCC, Annotations: ['Cas9(N)'], Activation: 10.516
Token: TCTATG, Annotations: ['Cas9(N)'], Activation: 10.481
Token: AAGGAC, Annotations: ['Cas9(N)'], Activation: 10.401
Token: CTGACC, Annotations: ['Cas9(N)'], Activation: 10.188
Token: GAGGGA, Annotations: ['Cas9(N)'], Activation: 10.170
Token: AACGAG, Annotations: ['Cas9(N)'], Activation: 10.106
Token: ACCGAC, Annotations: ['Cas9(N)'], Activation: 9.951
Token: AGCCAG, Annotations: ['Cas9(N)'], Activation: 9.864
Token: GACCAG, Annotations: ['Cas9(N)'], Activation: 9.839
Token: TTCAAG, Annotations: ['Cas9(N)'], Activation: 9.820
Token: CCCCT

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 1405 appears to detect: {'SV40 promoter'}
Top 20 activating tokens and their annotations:
Token: GAAAGT, Annotations: ['SV40 promoter'], Activation: 9.598
Token: GAAAGT, Annotations: ['SV40 promoter'], Activation: 9.429
Token: GAAAGT, Annotations: ['SV40 promoter'], Activation: 9.113
Token: GAAAGT, Annotations: ['SV40 promoter'], Activation: 9.090
Token: GAAAGT, Annotations: ['SV40 promoter'], Activation: 8.952
Token: GAAAGT, Annotations: ['SV40 promoter'], Activation: 8.764
Token: GAAAGT, Annotations: ['SV40 promoter'], Activation: 8.709
Token: GAAAGT, Annotations: ['SV40 promoter'], Activation: 8.518
Token: GAAAGT, Annotations: ['SV40 promoter'], Activation: 8.234
Token: GAAAGT, Annotations: ['SV40 promoter'], Activation: 8.032
Token: GAAAGT, Annotations: ['SV40 promoter'], Activation: 8.016
Token: GAAAGT, Annotations: ['SV40 promoter'], Activation: 7.278
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 5.591
Token: GAAAGT, Annotations: ['SV40 promoter'], Acti

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 1411 appears to detect: {'SV40 promoter'}
Top 20 activating tokens and their annotations:
Token: ATTAAT, Annotations: ['penA'], Activation: 11.679
Token: AATTAG, Annotations: ['SV40 promoter'], Activation: 10.168
Token: AATTAG, Annotations: ['SV40 promoter'], Activation: 10.136
Token: AATTAG, Annotations: ['SV40 promoter'], Activation: 10.103
Token: AATTAG, Annotations: ['SV40 promoter'], Activation: 10.056
Token: AATTAG, Annotations: ['SV40 promoter'], Activation: 9.944
Token: AATTAG, Annotations: ['SV40 promoter'], Activation: 9.931
Token: AATTAG, Annotations: ['SV40 promoter'], Activation: 9.916
Token: AATTAG, Annotations: ['SV40 promoter'], Activation: 9.896
Token: AATTAG, Annotations: ['SV40 promoter'], Activation: 9.822
Token: ATTAAT, Annotations: ['penA'], Activation: 9.714
Token: AATTAG, Annotations: ['SV40 promoter'], Activation: 9.691
Token: AATTAG, Annotations: ['SV40 promoter'], Activation: 9.660
Token: ATTAAT, Annotations: ['U6 promoter'], Activation: 9.660
Token: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 1413 appears to detect: {'CMV enhancer', 'CMV promoter'}
Top 20 activating tokens and their annotations:
Token: ACTTTC, Annotations: ['CMV promoter', 'UL126'], Activation: 14.760
Token: CTTTCC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 14.520
Token: CTTTCC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 14.408
Token: ACTTTC, Annotations: ['CMV enhancer'], Activation: 14.388
Token: CTTTCC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 14.350
Token: CTTTCC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 14.278
Token: CTTTCC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 14.252
Token: CTTTCC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 14.231
Token: ACTTTC, Annotations: ['CMV promoter', 'UL126'], Activation: 14.199
Token: CTTTCC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 14.196
Token: ACTTTC, Annotations: ['CMV promoter', 'UL126'], Activation: 14.148
Token: ACTTTC, Annotations: ['CMV

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1445 appears to detect: {'CMV promoter', 'UL126'}
Top 20 activating tokens and their annotations:
Token: TGGGAG, Annotations: ['CMV promoter', 'UL126'], Activation: 21.549
Token: TGGGAG, Annotations: ['CMV promoter', 'UL126'], Activation: 21.344
Token: TGGGAG, Annotations: ['CMV promoter', 'UL126'], Activation: 21.186
Token: TGGGAG, Annotations: ['CMV promoter', 'UL126'], Activation: 21.183
Token: TGGGAG, Annotations: ['CMV promoter', 'UL126'], Activation: 21.135
Token: TGGGAG, Annotations: ['CMV promoter', 'UL126'], Activation: 21.002
Token: TGGGAG, Annotations: ['CMV promoter', 'UL126'], Activation: 20.996
Token: TGGGAG, Annotations: ['CMV promoter', 'UL126'], Activation: 20.987
Token: TGGGAG, Annotations: ['CMV promoter', 'UL126'], Activation: 20.943
Token: TGGGAG, Annotations: ['CMV promoter', 'UL126'], Activation: 20.870
Token: TGGGAG, Annotations: ['CMV promoter', 'UL126'], Activation: 20.848
Token: TGGGAG, Annotations: ['CMV promoter', 'UL126'], Activation: 20.803
Token:

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 1453 appears to detect: {'AmpR'}
Top 20 activating tokens and their annotations:
Token: ATTCAG, Annotations: ['lacI'], Activation: 11.528
Token: ATTCAG, Annotations: ['THLA_STAAC'], Activation: 11.167
Token: ATTCAG, Annotations: ['lacI'], Activation: 11.138
Token: ATTCAG, Annotations: ['AmpR'], Activation: 10.863
Token: ATTCAG, Annotations: ['AmpR'], Activation: 10.834
Token: GTTCAG, Annotations: ['parB'], Activation: 10.820
Token: ATTCAG, Annotations: ['AmpR'], Activation: 10.805
Token: ATTCAG, Annotations: ['AmpR'], Activation: 10.742
Token: ATTCAG, Annotations: ['AmpR'], Activation: 10.713
Token: ATTCAG, Annotations: ['AmpR'], Activation: 10.643
Token: ATTCAG, Annotations: ['AmpR'], Activation: 10.549
Token: ATTCAG, Annotations: ['AmpR'], Activation: 10.546
Token: ATTCAG, Annotations: ['AmpR'], Activation: 10.533
Token: ATTCAA, Annotations: ['AmpR'], Activation: 10.470
Token: ATTCAG, Annotations: ['AmpR'], Activation: 10.421
Token: ATTCAG, Annotations: ['AmpR'], Activation: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1475 appears to detect: {'CMV promoter', 'UL126'}
Top 20 activating tokens and their annotations:
Token: TTTGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 12.563
Token: TTTGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 12.500
Token: TTTGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 12.481
Token: TTTGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 12.373
Token: TTTGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 12.191
Token: TTTGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 12.150
Token: AGTTTG, Annotations: ['CMV promoter', 'UL126'], Activation: 11.867
Token: TTTGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 11.860
Token: TTTGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 11.810
Token: TTTGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 11.787
Token: TTTGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 11.719
Token: TTTGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 11.678
Token:

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 1480 appears to detect: {'C9orf85'}
Top 20 activating tokens and their annotations:
Token: CCAAGT, Annotations: ['C9orf85'], Activation: 12.315
Token: AGCTGG, Annotations: ['C9orf85'], Activation: 12.146
Token: GACTAC, Annotations: ['C9orf85'], Activation: 11.250
Token: CCGAGT, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 9.529
Token: CCCGAG, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 8.866
Token: AGTGCA, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 6.419
Token: AGCTGG, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 5.981
Token: GCTGGA, Annotations: ['C9orf85'], Activation: 5.018
Token: GTGCAG, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 4.450
Token: GCTGGA, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 4.054
Token: AGTGCT, Annotations: ['C8orf17'], Activation: 3.432
Token: CTAGAG, Annotations: [], Activation: 2.881
Token: CGGAAG, Annotations: [], Activation: 2.345
Token: AGAGGG, Annotatio

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1499 appears to detect: {'AmpR'}
Top 20 activating tokens and their annotations:
Token: AGAGTA, Annotations: ['AmpR'], Activation: 12.652
Token: AGAGTA, Annotations: ['AmpR'], Activation: 12.623
Token: AGAGTA, Annotations: ['AmpR'], Activation: 12.574
Token: AGAGTA, Annotations: ['AmpR'], Activation: 12.412
Token: AGAGTA, Annotations: ['AmpR'], Activation: 11.978
Token: AGAGTA, Annotations: ['AmpR'], Activation: 11.935
Token: AGAGTA, Annotations: ['AmpR'], Activation: 11.753
Token: AGAGTA, Annotations: ['AmpR'], Activation: 11.750
Token: AGAGTA, Annotations: ['AmpR'], Activation: 11.665
Token: AGAGTA, Annotations: ['AmpR'], Activation: 11.047
Token: AGAGTA, Annotations: ['AmpR'], Activation: 10.959
Token: AGAGTA, Annotations: ['AmpR'], Activation: 10.833
Token: AGAGTA, Annotations: ['AmpR'], Activation: 10.784
Token: AGAGTA, Annotations: ['AmpR'], Activation: 10.733
Token: AGAGTA, Annotations: ['AmpR'], Activation: 10.621
Token: AGAGTA, Annotations: ['AmpR'], Activation: 10.582

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 1510 appears to detect: {'ori', 'RNAI'}
Top 20 activating tokens and their annotations:
Token: TGGCTT, Annotations: ['ori', 'RNAI'], Activation: 9.881
Token: TGGCTT, Annotations: ['LEU2'], Activation: 9.855
Token: TGGCTT, Annotations: ['U6 promoter'], Activation: 9.749
Token: TGGCTT, Annotations: ['ori', 'RNAI'], Activation: 9.718
Token: TGGCTT, Annotations: ['ori', 'RNAI'], Activation: 9.682
Token: TGGCTT, Annotations: ['ori', 'RNAI'], Activation: 9.581
Token: TGGCTT, Annotations: ['ori', 'RNAI'], Activation: 9.520
Token: TGGCTT, Annotations: ['LEU2'], Activation: 9.501
Token: TGGCTT, Annotations: ['ori', 'RNAI'], Activation: 9.460
Token: TGGCTT, Annotations: ['ori', 'RNAI'], Activation: 9.459
Token: TGGCTT, Annotations: ['ori', 'RNAI'], Activation: 9.407
Token: TGGCTT, Annotations: ['TpR'], Activation: 9.325
Token: TGGCTT, Annotations: ['ori'], Activation: 9.305
Token: TGGCTT, Annotations: ['U6 promoter'], Activation: 9.290
Token: TGGCTT, Annotations: ['ori', 'RNAI'], Activat

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 1520 appears to detect: {'SmR'}
Top 20 activating tokens and their annotations:
Token: TTGTGC, Annotations: ['SmR'], Activation: 24.735
Token: TTGTGC, Annotations: ['SmR'], Activation: 24.663
Token: TTGTGC, Annotations: ['SmR'], Activation: 23.217
Token: TTGTGC, Annotations: ['SmR'], Activation: 22.906
Token: TTGTGC, Annotations: ['SmR'], Activation: 22.831
Token: TTGTGC, Annotations: ['SmR'], Activation: 22.372
Token: TTGTGC, Annotations: ['SmR'], Activation: 22.291
Token: TGTGCA, Annotations: ['SmR'], Activation: 21.296
Token: TGTGCA, Annotations: ['SmR'], Activation: 20.569
Token: TGTGCA, Annotations: ['SmR'], Activation: 20.412
Token: TGTGCA, Annotations: ['SmR'], Activation: 19.657
Token: CATTGT, Annotations: ['SmR'], Activation: 14.402
Token: CCATTG, Annotations: ['SmR'], Activation: 14.171
Token: CCATTG, Annotations: ['SmR'], Activation: 14.045
Token: TATCTA, Annotations: [], Activation: 13.844
Token: CATTGT, Annotations: ['SmR'], Activation: 13.555
Token: CCATTG, Annota

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1535 appears to detect: {'CMV enhancer', 'CMV promoter'}
Top 20 activating tokens and their annotations:
Token: GTCAAT, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 9.954
Token: GTCAAT, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 9.890
Token: GCGGAT, Annotations: ['UBA7'], Activation: 9.823
Token: GTCAAT, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 9.517
Token: GTCAAT, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 9.471
Token: GTCAAT, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 9.396
Token: GTCAAT, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 9.382
Token: GTCAAT, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 9.358
Token: GTCAAT, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 9.325
Token: GTCAAT, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 9.294
Token: GTCAAT, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 9.276
Token: GTCAAT, Annotations

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1570 appears to detect: {'SmR'}
Top 20 activating tokens and their annotations:
Token: CGACAT, Annotations: ['SmR'], Activation: 12.241
Token: CGACAT, Annotations: ['SmR'], Activation: 11.173
Token: CGACAT, Annotations: ['SmR'], Activation: 10.728
Token: CGACAT, Annotations: ['SmR'], Activation: 10.159
Token: ACCCTC, Annotations: [], Activation: 8.087
Token: TGATCT, Annotations: ['SmR'], Activation: 6.491
Token: AGACCT, Annotations: [], Activation: 6.431
Token: TGATCT, Annotations: ['SmR'], Activation: 6.393
Token: CACGAT, Annotations: ['SmR'], Activation: 6.289
Token: AAATCA, Annotations: [], Activation: 6.284
Token: GTCACC, Annotations: [], Activation: 6.194
Token: TGCCTC, Annotations: [], Activation: 6.153
Token: CCAAAC, Annotations: [], Activation: 6.084
Token: TGATCT, Annotations: ['SmR'], Activation: 5.937
Token: TGATCT, Annotations: ['SmR'], Activation: 5.552
Token: AAAAAC, Annotations: [], Activation: 5.445
Token: TCGACA, Annotations: ['SmR'], Activation: 5.333
Token: T

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1589 appears to detect: {'SV40 promoter'}
Top 20 activating tokens and their annotations:
Token: TCTCAA, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 16.265
Token: TCTCAA, Annotations: ['SV40 promoter'], Activation: 16.032
Token: TCTCAA, Annotations: ['SV40 promoter'], Activation: 15.833
Token: TCTCAA, Annotations: ['SV40 promoter'], Activation: 15.463
Token: TCTCAA, Annotations: ['SV40 promoter'], Activation: 15.352
Token: TCTCAA, Annotations: ['SV40 promoter'], Activation: 15.120
Token: TCTCAA, Annotations: ['SV40 promoter'], Activation: 15.091
Token: TCTCAA, Annotations: ['SV40 promoter'], Activation: 15.001
Token: TCTCAA, Annotations: ['SV40 promoter'], Activation: 14.862
Token: TCTCAA, Annotations: ['SV40 promoter'], Activation: 14.856
Token: TCTCAA, Annotations: ['SV40 promoter'], Activation: 14.840
Token: TCTCAA, Annotations: ['SV40 promoter', 'SV40 ori'], Activation: 14.837
Token: TCTCAA, Annotations: ['SV40 promoter'], Activation: 14.800
Token: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1611 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: GTAAAC, Annotations: ['CMV enhancer'], Activation: 18.657
Token: GTAAAC, Annotations: ['CMV enhancer'], Activation: 18.153
Token: GTAAAC, Annotations: ['CMV enhancer'], Activation: 18.108
Token: GTAAAC, Annotations: ['CMV enhancer'], Activation: 17.707
Token: GTAAAC, Annotations: ['CMV enhancer'], Activation: 17.675
Token: GTAAAC, Annotations: ['CMV enhancer'], Activation: 17.639
Token: GTAAAC, Annotations: ['CMV enhancer'], Activation: 17.576
Token: GTAAAC, Annotations: ['CMV enhancer'], Activation: 17.573
Token: GTAAAC, Annotations: ['CMV enhancer'], Activation: 17.564
Token: GTAAAC, Annotations: ['CMV enhancer'], Activation: 17.457
Token: GTAAAC, Annotations: ['CMV enhancer'], Activation: 17.448
Token: GTAAAC, Annotations: ['CMV enhancer'], Activation: 17.249
Token: GTAAAC, Annotations: ['CMV enhancer'], Activation: 17.165
Token: GTAAAC, Annotations: ['CMV enhancer'], Activation: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 1614 appears to detect: {'f1 ori'}
Top 20 activating tokens and their annotations:
Token: GGTTAC, Annotations: ['f1 ori'], Activation: 9.063
Token: GGTTAC, Annotations: ['f1 ori'], Activation: 7.544
Token: <pad>, Annotations: ['special token: <pad>'], Activation: 7.219
Token: GGTTAC, Annotations: ['f1 ori'], Activation: 5.516
Token: GGTTAC, Annotations: ['f1 ori'], Activation: 5.473
Token: <pad>, Annotations: ['special token: <pad>'], Activation: 5.076
Token: GGTTAC, Annotations: ['f1 ori'], Activation: 4.791
Token: GGTTAC, Annotations: ['queF'], Activation: 4.728
Token: <pad>, Annotations: ['special token: <pad>'], Activation: 4.653
Token: GGTTAC, Annotations: ['f1 ori'], Activation: 4.549
Token: GGTTAC, Annotations: ['chimeric intron'], Activation: 4.524
Token: GGTTAC, Annotations: ['f1 ori'], Activation: 4.378
Token: GGTTAC, Annotations: ['f1 ori'], Activation: 4.372
Token: GGTTAC, Annotations: ['f1 ori'], Activation: 4.357
Token: GGTTAC, Annotations: ['f1 ori'], Activation:

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 1616 appears to detect: {'ori'}
Top 20 activating tokens and their annotations:
Token: GCGCAG, Annotations: ['ori'], Activation: 15.820
Token: CGCAGA, Annotations: ['ori'], Activation: 15.739
Token: GCGCAG, Annotations: ['ori'], Activation: 15.572
Token: TGGAAA, Annotations: [], Activation: 15.566
Token: CGCAGA, Annotations: ['ori'], Activation: 14.758
Token: GCGCAG, Annotations: ['ori'], Activation: 14.293
Token: CGCAGA, Annotations: ['ori'], Activation: 14.276
Token: GCGCAG, Annotations: ['ori'], Activation: 14.199
Token: GCGCAG, Annotations: ['ori'], Activation: 14.170
Token: GCGCAG, Annotations: ['ori'], Activation: 14.110
Token: GCGCAG, Annotations: ['ori'], Activation: 14.015
Token: GCGCAG, Annotations: ['ori'], Activation: 13.928
Token: CGCAGA, Annotations: ['ori'], Activation: 13.883
Token: GCGCAG, Annotations: ['ori'], Activation: 13.832
Token: CGCAGA, Annotations: ['ori'], Activation: 13.703
Token: CGCAGA, Annotations: ['ori'], Activation: 13.696
Token: GCGCAG, Annota

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1666 appears to detect: {'CMV enhancer', 'CMV promoter'}
Top 20 activating tokens and their annotations:
Token: ACGTCA, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 18.659
Token: GACGTC, Annotations: ['CMV enhancer'], Activation: 18.400
Token: ACGTCA, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 18.317
Token: ACGTCA, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 17.949
Token: ACGTCA, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 17.827
Token: ACGTCA, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 17.798
Token: ACGTCA, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 17.765
Token: GACGTC, Annotations: ['CMV enhancer'], Activation: 17.691
Token: ACGTCA, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 17.668
Token: ACGTCA, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 17.660
Token: ACGTCA, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 17.559
Token: ACGTCA, Annotations: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 1676 appears to detect: {'C9orf85'}
Top 20 activating tokens and their annotations:
Token: AGCCTC, Annotations: ['C9orf85'], Activation: 19.897
Token: AGCCTC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 11.752
Token: TGCCTC, Annotations: ['C9orf85'], Activation: 8.200
Token: CCTCCA, Annotations: ['C9orf85'], Activation: 7.936
Token: CCAAGT, Annotations: ['C9orf85'], Activation: 6.593
Token: ACCAGA, Annotations: [], Activation: 4.886
Token: GCTCTG, Annotations: ['C9orf85'], Activation: 4.371
Token: GTGAGC, Annotations: ['C8orf17'], Activation: 4.287
Token: AGCCAT, Annotations: [], Activation: 3.892
Token: CACAGT, Annotations: [], Activation: 2.995
Token: CAGCCT, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 2.949
Token: CCTCCT, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 2.835
Token: GGGTTC, Annotations: ['C9orf85'], Activation: 2.323
Token: ACTCAG, Annotations: [], Activation: 2.291
Token: CTGCCA, Annotations: ['C9orf85'], Activatio

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1702 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: AACTTA, Annotations: ['CMV enhancer'], Activation: 18.304
Token: AACTTA, Annotations: ['CMV enhancer'], Activation: 18.181
Token: AACTTA, Annotations: ['CMV enhancer'], Activation: 18.089
Token: AACTTA, Annotations: ['CMV enhancer'], Activation: 18.075
Token: AACTTA, Annotations: ['CMV enhancer'], Activation: 17.871
Token: AACTTA, Annotations: ['CMV enhancer'], Activation: 17.770
Token: AACTTA, Annotations: ['CMV enhancer'], Activation: 17.596
Token: AACTTA, Annotations: ['CMV enhancer'], Activation: 17.500
Token: AACTTA, Annotations: ['CMV enhancer'], Activation: 17.360
Token: AACTTA, Annotations: ['CMV enhancer'], Activation: 17.202
Token: AACTTA, Annotations: ['CMV enhancer'], Activation: 17.164
Token: AACTTA, Annotations: ['CMV enhancer'], Activation: 17.025
Token: AACTTA, Annotations: ['CMV enhancer'], Activation: 16.926
Token: AACTTA, Annotations: ['CMV enhancer'], Activation: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1737 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: GCTGAC, Annotations: ['RPOL_BPT7'], Activation: 12.746
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 12.419
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 12.322
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 12.233
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 12.213
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 12.166
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 12.145
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 12.127
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 12.105
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 12.093
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 12.061
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 12.060
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 11.983
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 11.

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1751 appears to detect: {'lacZα'}
Top 20 activating tokens and their annotations:
Token: AATGAC, Annotations: ['chimeric intron'], Activation: 12.601
Token: AACGAC, Annotations: ['lacZα'], Activation: 12.522
Token: AACGAC, Annotations: ['lacZα'], Activation: 12.354
Token: AACGAC, Annotations: ['lacZα'], Activation: 11.585
Token: AACGAC, Annotations: ['lacZα'], Activation: 11.380
Token: AACGAC, Annotations: ['lacZα'], Activation: 11.350
Token: AACGAC, Annotations: ['lacZα'], Activation: 11.347
Token: AACGAC, Annotations: ['lacZα'], Activation: 11.331
Token: AACGAC, Annotations: ['lacZα'], Activation: 11.221
Token: AACGAC, Annotations: ['lacZα'], Activation: 11.197
Token: AACGAC, Annotations: ['lacZα'], Activation: 11.043
Token: AATGAC, Annotations: ['chimeric intron'], Activation: 11.028
Token: AACGAC, Annotations: ['lacZα'], Activation: 10.996
Token: AACGAC, Annotations: ['lacZα'], Activation: 10.855
Token: AACGAC, Annotations: ['lacZα'], Activation: 10.824
Token: AACGAC, Annot

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1779 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: TCCTAC, Annotations: ['CMV enhancer'], Activation: 16.217
Token: TCCTAC, Annotations: ['CMV enhancer'], Activation: 15.940
Token: TCCTAC, Annotations: ['CMV enhancer'], Activation: 15.874
Token: TCCAAA, Annotations: ['CMV promoter', 'UL126'], Activation: 15.865
Token: TCCTAC, Annotations: ['CMV enhancer'], Activation: 15.763
Token: TCCTAC, Annotations: ['CMV enhancer'], Activation: 15.695
Token: TCCAAA, Annotations: ['CMV promoter', 'UL126'], Activation: 15.633
Token: TCCAAA, Annotations: ['CMV promoter', 'UL126'], Activation: 15.604
Token: TCCTAC, Annotations: ['CMV enhancer'], Activation: 15.602
Token: TCCTAC, Annotations: ['CMV enhancer'], Activation: 15.566
Token: TCCAAA, Annotations: ['CMV promoter', 'UL126'], Activation: 15.551
Token: TCCTAC, Annotations: ['CMV enhancer'], Activation: 15.481
Token: TCCTAC, Annotations: ['CMV enhancer'], Activation: 15.451
Token: TCCAAA, Annotat

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 1784 appears to detect: {"5' LTR"}
Top 20 activating tokens and their annotations:
Token: CGCTGA, Annotations: ["5' LTR"], Activation: 15.753
Token: CGCTGA, Annotations: ["5' LTR"], Activation: 15.095
Token: TGGACT, Annotations: ["5' LTR"], Activation: 12.887
Token: CGCTGA, Annotations: ["5' LTR"], Activation: 12.718
Token: GCTGAT, Annotations: ["5' LTR"], Activation: 12.599
Token: GAATCG, Annotations: ["5' LTR"], Activation: 12.071
Token: TGGACT, Annotations: ["5' LTR"], Activation: 11.966
Token: CTGATC, Annotations: ["5' LTR"], Activation: 11.795
Token: GACTCG, Annotations: ["5' LTR"], Activation: 11.792
Token: GGACTC, Annotations: ["5' LTR"], Activation: 11.719
Token: CCTTGG, Annotations: ["5' LTR"], Activation: 11.450
Token: GAATCG, Annotations: ["5' LTR"], Activation: 11.333
Token: GCTGAT, Annotations: ["5' LTR"], Activation: 11.005
Token: TGGTCT, Annotations: ["5' LTR (truncated)", 'mir-1937'], Activation: 10.992
Token: GTGGAC, Annotations: ["5' LTR"], Activation: 10.707


  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1799 appears to detect: {'CMV enhancer', 'CMV promoter'}
Top 20 activating tokens and their annotations:
Token: ATTACC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 10.658
Token: ATTACC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 10.470
Token: ATTACC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 10.432
Token: ATTACC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 10.430
Token: TTACCG, Annotations: ['ori'], Activation: 10.424
Token: ATTACC, Annotations: ['chicken β-actin promoter', 'CMV enhancer'], Activation: 10.369
Token: TTACCA, Annotations: ['CMV promoter', 'CMV enhancer'], Activation: 10.344
Token: ATTACC, Annotations: ['CMV enhancer', 'chicken β-actin promoter'], Activation: 10.329
Token: TTACCG, Annotations: ['ori'], Activation: 10.271
Token: TTACCA, Annotations: ['CMV promoter', 'CMV enhancer'], Activation: 10.163
Token: ATTACC, Annotations: ['CMV enhancer'], Activation: 10.158
Token: TTACCA, Annotations: ['CMV enha

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 1806 appears to detect: {'SmR'}
Top 20 activating tokens and their annotations:
Token: GACAAA, Annotations: ['SmR'], Activation: 18.103
Token: GACAAA, Annotations: ['SmR'], Activation: 17.482
Token: TGACAA, Annotations: ['SmR'], Activation: 17.217
Token: GACAAA, Annotations: ['SmR'], Activation: 17.200
Token: TGACAA, Annotations: ['SmR'], Activation: 17.090
Token: AAGCAA, Annotations: ['SmR'], Activation: 16.998
Token: AAGCAA, Annotations: ['SmR'], Activation: 16.839
Token: GACAAA, Annotations: ['SmR'], Activation: 16.767
Token: TGATCT, Annotations: ['SmR'], Activation: 16.602
Token: AGCAAG, Annotations: ['SmR'], Activation: 16.479
Token: GCCAGC, Annotations: ['SmR'], Activation: 16.279
Token: CTTGCT, Annotations: ['SmR'], Activation: 16.216
Token: TGATCT, Annotations: ['SmR'], Activation: 16.138
Token: TGGCTA, Annotations: ['SmR'], Activation: 16.073
Token: TGGCTA, Annotations: ['SmR'], Activation: 16.003
Token: AAGCAA, Annotations: ['SmR'], Activation: 16.002
Token: TGACAA, A

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1818 appears to detect: {'SV40 promoter', 'SV40 ori'}
Top 20 activating tokens and their annotations:
Token: CTAACT, Annotations: ['SV40 promoter', 'SV40 ori'], Activation: 12.654
Token: CTAACT, Annotations: ['SV40 promoter', 'SV40 ori'], Activation: 12.268
Token: CTAACT, Annotations: ['SV40 promoter', 'SV40 ori'], Activation: 12.132
Token: CTAACT, Annotations: ['SV40 promoter', 'SV40 ori'], Activation: 11.648
Token: CTAACT, Annotations: ['SV40 promoter', 'SV40 ori'], Activation: 11.432
Token: CTAACT, Annotations: ['SV40 promoter', 'SV40 ori'], Activation: 11.280
Token: CTAACT, Annotations: ['SV40 promoter', 'SV40 ori'], Activation: 11.268
Token: ACTCCG, Annotations: ['SV40 promoter', 'SV40 ori'], Activation: 11.217
Token: CTAACT, Annotations: ['SV40 promoter', 'SV40 ori'], Activation: 11.207
Token: CTAACT, Annotations: ['SV40 promoter', 'SV40 ori'], Activation: 11.186
Token: CTAACT, Annotations: ['SV40 promoter', 'SV40 ori'], Activation: 11.141
Token: ACTCCG, Annotations: ['SV

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1870 appears to detect: {'Dendra2'}
Top 20 activating tokens and their annotations:
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 34.517
Token: GCTGTA, Annotations: ['Dendra2'], Activation: 33.024
Token: CTCGAA, Annotations: ['Dendra2'], Activation: 31.793
Token: CTCCCA, Annotations: ['Dendra2'], Activation: 31.345
Token: CTCGGT, Annotations: ['Dendra2'], Activation: 31.268
Token: GCTGTC, Annotations: ['Dendra2'], Activation: 31.196
Token: CTTGAA, Annotations: ['Dendra2'], Activation: 31.183
Token: GGGGCC, Annotations: ['Dendra2'], Activation: 30.975
Token: CACGTG, Annotations: ['Dendra2'], Activation: 30.938
Token: CTCCTG, Annotations: ['Venus'], Activation: 30.791
Token: GAAGCA, Annotations: ['Venus'], Activation: 30.726
Token: CTTCAT, Annotations: ['Venus'], Activation: 30.658
Token: GCCCTC, Annotations: ['Dendra2'], Activation: 30.620
Token: GGGCAT, Annotations: ['Venus'], Activation: 30.591
Token: CTTGAA, Annotations: ['Dendra2'], Activation: 30.481
Toke

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1884 appears to detect: {'C9orf85'}
Top 20 activating tokens and their annotations:
Token: GCACAG, Annotations: ['C9orf85'], Activation: 12.176
Token: TAGCGT, Annotations: ['C9orf85'], Activation: 11.883
Token: AGGTGC, Annotations: ['C9orf85'], Activation: 5.648
Token: GTGGCA, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 4.942
Token: GATCTC, Annotations: ['C9orf85'], Activation: 4.901
Token: GTGCAG, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 4.324
Token: CTGCCA, Annotations: ['C9orf85'], Activation: 4.256
Token: GGGTTC, Annotations: ['C9orf85'], Activation: 4.085
Token: TGGCAC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 3.437
Token: TGGAAG, Annotations: [], Activation: 2.553
Token: AGGCAT, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 2.461
Token: GCATGA, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 2.205
Token: AGTGCA, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 2.083
Token: AATGGC, A

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 1891 appears to detect: {'ori', 'RNAI'}
Top 20 activating tokens and their annotations:
Token: AACAAA, Annotations: ['ori', 'RNAI'], Activation: 18.124
Token: AACAAA, Annotations: ['ori', 'RNAI'], Activation: 17.906
Token: AACAAA, Annotations: ['ori', 'RNAI'], Activation: 17.746
Token: AACAAA, Annotations: ['ori', 'RNAI'], Activation: 17.380
Token: AACAAA, Annotations: ['ori', 'RNAI'], Activation: 17.310
Token: AACAAA, Annotations: ['ori', 'RNAI'], Activation: 17.302
Token: AACAAA, Annotations: ['ori', 'RNAI'], Activation: 17.099
Token: ACAAAA, Annotations: ['ori', 'RNAI'], Activation: 17.082
Token: AACAAA, Annotations: ['ori', 'RNAI'], Activation: 17.077
Token: AACAAA, Annotations: ['ori', 'RNAI'], Activation: 16.998
Token: AACAAA, Annotations: ['ori', 'RNAI'], Activation: 16.963
Token: AACAAA, Annotations: ['ori', 'RNAI'], Activation: 16.749
Token: ACAAAA, Annotations: ['ori', 'RNAI'], Activation: 16.702
Token: AACAAA, Annotations: ['ori', 'RNAI'], Activation: 16.680
Token: A

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1914 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: CATGAC, Annotations: ['CMV enhancer'], Activation: 17.736
Token: CATGAC, Annotations: ['CMV enhancer'], Activation: 17.581
Token: CATGAC, Annotations: ['CMV enhancer'], Activation: 17.535
Token: CATGAC, Annotations: ['CMV enhancer'], Activation: 17.408
Token: CATGAC, Annotations: ['CMV enhancer'], Activation: 17.307
Token: CATGAC, Annotations: ['CMV enhancer'], Activation: 17.246
Token: CATGAC, Annotations: ['CMV enhancer'], Activation: 17.125
Token: CATGAC, Annotations: ['CMV enhancer'], Activation: 17.058
Token: CATGAC, Annotations: ['CMV enhancer'], Activation: 17.037
Token: CATGAC, Annotations: ['CMV enhancer'], Activation: 17.028
Token: CATGAC, Annotations: ['CMV enhancer'], Activation: 17.012
Token: CATGAC, Annotations: ['CMV enhancer'], Activation: 16.972
Token: CATGAC, Annotations: ['CMV enhancer'], Activation: 16.969
Token: CATGAC, Annotations: ['CMV enhancer'], Activation: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1941 appears to detect: {'SmR'}
Top 20 activating tokens and their annotations:
Token: ACGACG, Annotations: ['SmR'], Activation: 37.432
Token: ACGACG, Annotations: ['SmR'], Activation: 37.206
Token: ACGACG, Annotations: ['SmR'], Activation: 36.576
Token: ACGACG, Annotations: ['SmR'], Activation: 36.432
Token: ACGACG, Annotations: ['SmR'], Activation: 36.379
Token: CGACGA, Annotations: ['SmR'], Activation: 35.636
Token: ACGACG, Annotations: ['SmR'], Activation: 35.609
Token: CGACGA, Annotations: ['SmR'], Activation: 33.614
Token: ACGACG, Annotations: ['SmR'], Activation: 33.518
Token: CGACGA, Annotations: ['SmR'], Activation: 33.253
Token: CGACGA, Annotations: ['SmR'], Activation: 32.236
Token: TGTGGA, Annotations: [], Activation: 26.860
Token: TTTAGA, Annotations: ['dCas9'], Activation: 24.437
Token: TCGACG, Annotations: [], Activation: 24.261
Token: GACGAC, Annotations: ['aadA'], Activation: 23.154
Token: GATGAC, Annotations: ['SacB'], Activation: 19.881
Token: GGATGA, Annotat

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 1993 appears to detect: {'CMV enhancer', 'CMV promoter', 'UL126'}
Top 20 activating tokens and their annotations:
Token: TCAATG, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 18.307
Token: TCAATG, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 18.251
Token: TCAATG, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 18.228
Token: TCAATG, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 17.900
Token: TCAATG, Annotations: ['CMV IE94 promoter', 'CMV enhancer'], Activation: 17.847
Token: TCAATG, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 17.567
Token: TCAATG, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 17.534
Token: TCAATG, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 17.306
Token: TCAATG, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 17.300
Token: TCAATG, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 17.247
Token: TCAATG, Ann

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2001 appears to detect: {'hGH poly(A) signal'}
Top 20 activating tokens and their annotations:
Token: CAGCCT, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 25.081
Token: AGCCTC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 18.167
Token: AGCCTC, Annotations: ['C9orf85'], Activation: 18.039
Token: TCGGCC, Annotations: ['C8orf17'], Activation: 13.643
Token: TGCCTC, Annotations: ['C9orf85'], Activation: 10.192
Token: TGCCTC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 10.002
Token: CTTGGC, Annotations: ['hGH poly(A) signal'], Activation: 9.808
Token: CCTTGG, Annotations: ['hGH poly(A) signal'], Activation: 6.887
Token: CTGCCT, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 6.060
Token: CCAACC, Annotations: [], Activation: 5.060
Token: GGCCTT, Annotations: [], Activation: 4.809
Token: ACCCAC, Annotations: ['hGH poly(A) signal'], Activation: 4.116
Token: CAGCCT, Annotations: [], Activation: 3.544
Token: CTCCCA, Annotations: ['

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2067 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: GTACAT, Annotations: ['CMV enhancer'], Activation: 14.763
Token: GTACAT, Annotations: ['CMV enhancer'], Activation: 14.657
Token: GTACAT, Annotations: ['CMV enhancer'], Activation: 14.565
Token: GTACAT, Annotations: ['CMV enhancer'], Activation: 14.520
Token: GTACAT, Annotations: ['CMV enhancer'], Activation: 14.453
Token: GTACAT, Annotations: ['CMV enhancer'], Activation: 14.432
Token: GTACAT, Annotations: ['CMV enhancer'], Activation: 14.406
Token: GTACAT, Annotations: ['CMV enhancer'], Activation: 14.395
Token: GTACAT, Annotations: ['CMV enhancer'], Activation: 14.387
Token: GTACAT, Annotations: ['CMV enhancer'], Activation: 14.330
Token: GTACAT, Annotations: ['CMV enhancer'], Activation: 14.318
Token: GTACAT, Annotations: ['CMV enhancer'], Activation: 14.302
Token: GTACAT, Annotations: ['CMV enhancer'], Activation: 14.241
Token: GTACAT, Annotations: ['CMV enhancer'], Activation: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2073 appears to detect: {'tRNA'}
Top 20 activating tokens and their annotations:
Token: TAGTGG, Annotations: ['tRNA'], Activation: 18.670
Token: TAGAAT, Annotations: ['tRNA'], Activation: 12.178
Token: GTGGTC, Annotations: ['tRNA'], Activation: 5.847
Token: AGTACC, Annotations: ['tRNA'], Activation: 3.471
Token: GACCGA, Annotations: ['tRNA'], Activation: 2.226
Token: ACCAGA, Annotations: [], Activation: 2.041
Token: CTCAGT, Annotations: ['NOS promoter'], Activation: 1.746
Token: GGCTCC, Annotations: ['NOS promoter'], Activation: 1.688
Token: <pad>, Annotations: ['special token: <pad>'], Activation: 1.586
Token: AACAAA, Annotations: ['tRNA'], Activation: 1.371
Token: GCAGTA, Annotations: ['5S rRNA'], Activation: 1.317
Token: CGGTAC, Annotations: ['tRNA'], Activation: 1.304
Token: ATTCAA, Annotations: [], Activation: 1.183
Token: GCGCGG, Annotations: ['5S rRNA'], Activation: 1.168
Token: CTGCCA, Annotations: ['tRNA'], Activation: 1.166
Token: GCTCAG, Annotations: ['SV40 promoter'

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2111 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: ATGGCC, Annotations: ['RHOT2'], Activation: 15.976
Token: ATGGCC, Annotations: ['SV40 promoter'], Activation: 15.236
Token: ATGGCC, Annotations: ['iRFP713'], Activation: 15.166
Token: ATGGCC, Annotations: ['RSF1010 RepC'], Activation: 14.841
Token: ATGGCC, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 14.529
Token: ATGGCC, Annotations: ['CMV enhancer'], Activation: 14.280
Token: ATGGCC, Annotations: ['CMV enhancer'], Activation: 14.218
Token: ATGGCC, Annotations: ['SLC30A5'], Activation: 14.154
Token: ATGGCC, Annotations: ['ERG'], Activation: 14.114
Token: ATGGCC, Annotations: ['CMV enhancer'], Activation: 14.090
Token: ATGGCC, Annotations: ['CMV enhancer'], Activation: 14.068
Token: ATGGCC, Annotations: ['ACTA2'], Activation: 14.050
Token: ATGGCC, Annotations: ['CMV enhancer'], Activation: 14.002
Token: ATGGCC, Annotations: [], Activation: 13.958
Token: ATGGCC, Ann

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2138 appears to detect: {'ori'}
Top 20 activating tokens and their annotations:
Token: AAAAAA, Annotations: ['ori'], Activation: 21.489
Token: AAAAAA, Annotations: ['ori'], Activation: 20.578
Token: AAAAAA, Annotations: ['ori'], Activation: 20.051
Token: AAAAAA, Annotations: ['ori'], Activation: 19.951
Token: AAAAAA, Annotations: ['ori'], Activation: 19.784
Token: AAAAAA, Annotations: ['ori'], Activation: 19.758
Token: AAAAAA, Annotations: ['ori', 'RNAI'], Activation: 19.642
Token: AAAAAA, Annotations: ['ori'], Activation: 19.501
Token: AAAAAA, Annotations: ['ori', 'RNAI'], Activation: 19.447
Token: AAAAAA, Annotations: ['CloDF13 ori'], Activation: 19.336
Token: AAAAAA, Annotations: ['ori'], Activation: 19.253
Token: AAAAAA, Annotations: [], Activation: 19.190
Token: AAAAAA, Annotations: ['ori'], Activation: 19.115
Token: AAAAAA, Annotations: ['ori'], Activation: 19.050
Token: AAAAAA, Annotations: ['luciferase'], Activation: 19.030
Token: AAAAAA, Annotations: ['ori'], Activatio

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2168 appears to detect: {'C9orf85', 'hGH poly(A) signal'}
Top 20 activating tokens and their annotations:
Token: CCTCCT, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 22.694
Token: CCTCCA, Annotations: ['C9orf85'], Activation: 18.623
Token: GGGTTC, Annotations: ['C9orf85'], Activation: 11.447
Token: TCTCCG, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 9.506
Token: CCTCCC, Annotations: ['hGH poly(A) signal'], Activation: 8.793
Token: GGGTTC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 8.585
Token: AGACCC, Annotations: ['tRNA'], Activation: 6.946
Token: GCTCTG, Annotations: ['C9orf85'], Activation: 4.514
Token: TGGCAC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 4.082
Token: GTGCAG, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 3.842
Token: GCTGGA, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 2.591
Token: TGGGTT, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 1.836
Token: GAACCA

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2177 appears to detect: {'AmpR'}
Top 20 activating tokens and their annotations:
Token: CGAAAA, Annotations: ['AmpR'], Activation: 11.424
Token: CGAAAA, Annotations: ['AmpR'], Activation: 10.247
Token: TTTGGT, Annotations: ['AmpR'], Activation: 10.225
Token: CGAAAA, Annotations: ['AmpR'], Activation: 10.020
Token: ATGCCA, Annotations: ['AmpR'], Activation: 9.585
Token: TTTGGT, Annotations: ['AmpR'], Activation: 9.569
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 9.409
Token: TCACGC, Annotations: ['AmpR'], Activation: 9.306
Token: TCACGC, Annotations: ['AmpR'], Activation: 9.259
Token: TACTCA, Annotations: ['AmpR'], Activation: 9.163
Token: TCACGC, Annotations: ['AmpR'], Activation: 9.152
Token: AGTTGC, Annotations: ['AmpR'], Activation: 9.133
Token: TCTAAT, Annotations: [], Activation: 8.884
Token: TTTGGT, Annotations: ['AmpR'], Activation: 8.765
Token: TTTGGT, Annotations: ['AmpR'], Activation: 8.681
Token: TTTGGT, Annotations: ['AmpR'], Activation: 8.623
To

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2229 appears to detect: {"5' LTR (truncated)"}
Top 20 activating tokens and their annotations:
Token: TGTGTG, Annotations: ['Crebbp'], Activation: 12.662
Token: TGTGTG, Annotations: ["5' LTR (truncated)", 'mir-TAR'], Activation: 12.094
Token: TGTGTG, Annotations: ["5' LTR (truncated)"], Activation: 11.350
Token: TGTGTG, Annotations: ["5' LTR (truncated)"], Activation: 11.097
Token: TGTGTG, Annotations: ["5' LTR (truncated)"], Activation: 11.070
Token: TGTGTG, Annotations: ["5' LTR (truncated)", 'HIV PBS'], Activation: 10.938
Token: TGTGTG, Annotations: ["5' LTR (truncated)"], Activation: 10.862
Token: TGTGTG, Annotations: ["5' LTR (truncated)", 'HIV PBS'], Activation: 10.823
Token: TGTGTG, Annotations: ['POLG_HCVGL'], Activation: 10.721
Token: TGTGTG, Annotations: [], Activation: 10.689
Token: TGTGTG, Annotations: [], Activation: 10.618
Token: TGTGTG, Annotations: ["5' LTR (truncated)"], Activation: 10.609
Token: TGTGTG, Annotations: ['EGFR'], Activation: 10.588
Token: TGTGTG, 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2283 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: TATGGG, Annotations: ['CMV enhancer'], Activation: 12.240
Token: CTTATG, Annotations: ['CMV enhancer'], Activation: 12.071
Token: TATGGG, Annotations: ['CMV enhancer'], Activation: 11.770
Token: TATGGG, Annotations: ['CMV enhancer'], Activation: 11.650
Token: TATGGG, Annotations: ['CMV enhancer'], Activation: 11.602
Token: TATGGG, Annotations: ['CMV enhancer'], Activation: 11.593
Token: CTTATG, Annotations: ['CMV enhancer'], Activation: 11.582
Token: CTTATG, Annotations: ['CMV enhancer'], Activation: 11.577
Token: CTTATG, Annotations: ['CMV enhancer'], Activation: 11.437
Token: CTTATG, Annotations: ['CMV enhancer'], Activation: 11.346
Token: TATGGG, Annotations: ['CMV enhancer'], Activation: 11.331
Token: CTTATG, Annotations: ['CMV enhancer'], Activation: 11.326
Token: TATGGG, Annotations: ['CMV enhancer'], Activation: 11.295
Token: CTTATG, Annotations: ['CMV enhancer'], Activation: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2293 appears to detect: {'hGH poly(A) signal'}
Top 20 activating tokens and their annotations:
Token: CCTGTA, Annotations: ['C16orf89'], Activation: 25.014
Token: CCTGTA, Annotations: ['hGH poly(A) signal'], Activation: 24.986
Token: TGCCTG, Annotations: ['C16orf89'], Activation: 13.564
Token: TAATCC, Annotations: ['C16orf89'], Activation: 12.136
Token: GGAATC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 11.381
Token: ATGCCT, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 10.404
Token: ATCCCA, Annotations: ['hGH poly(A) signal'], Activation: 9.939
Token: GGCGCA, Annotations: ['C16orf89'], Activation: 9.143
Token: CTCACG, Annotations: ['C16orf89'], Activation: 7.169
Token: GCTTTA, Annotations: ["3' LTR (ΔU3)"], Activation: 7.154
Token: TTCACG, Annotations: ['hGH poly(A) signal'], Activation: 6.622
Token: TCATGC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 6.200
Token: CGGTGG, Annotations: [], Activation: 5.001
Token: ATTCCA, Annotatio

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2297 appears to detect: {'CMV promoter', 'UL126'}
Top 20 activating tokens and their annotations:
Token: TATATA, Annotations: ['CMV promoter', 'UL126'], Activation: 9.831
Token: CTATAT, Annotations: ['CMV promoter', 'UL126'], Activation: 9.816
Token: TATATA, Annotations: ['CMVd2 promoter', 'UL126'], Activation: 9.756
Token: CTATAT, Annotations: ['CMV promoter', 'UL126'], Activation: 9.603
Token: CTATAT, Annotations: ['CMV promoter', 'UL126'], Activation: 9.572
Token: TATATA, Annotations: ['CMV promoter', 'UL126'], Activation: 9.526
Token: CTATAT, Annotations: ['CMV promoter', "5' LTR (truncated)", 'UL126'], Activation: 9.521
Token: CTATAT, Annotations: ['CMV promoter', 'UL126'], Activation: 9.521
Token: TATATA, Annotations: ['CMV promoter', 'UL126'], Activation: 9.485
Token: CTATAT, Annotations: ['CMV promoter', 'UL126'], Activation: 9.432
Token: CTATAT, Annotations: ['CMV promoter', 'UL126'], Activation: 9.392
Token: TATATA, Annotations: ['CMV promoter', "5' LTR (truncated)", 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2309 appears to detect: {'avrXa10'}
Top 20 activating tokens and their annotations:
Token: TCGGTT, Annotations: ['avrXa10'], Activation: 12.426
Token: TATTCG, Annotations: ['avrXa10'], Activation: 12.329
Token: GCGCAT, Annotations: ['avrXa10'], Activation: 12.186
Token: TGGCTG, Annotations: ['avrXa10'], Activation: 12.106
Token: TACCTA, Annotations: ['avrXa10'], Activation: 11.999
Token: ATGGCT, Annotations: ['avrXa10'], Activation: 11.914
Token: GCTGCT, Annotations: ['avrXa10'], Activation: 11.911
Token: ACTTGA, Annotations: ['avrXa10'], Activation: 11.830
Token: GCTGTC, Annotations: ['avrXa10'], Activation: 11.794
Token: CATGCG, Annotations: ['avrXa10'], Activation: 11.706
Token: TGGTGG, Annotations: ['avrXa10'], Activation: 11.635
Token: CAAGAT, Annotations: ['avrXa10'], Activation: 11.625
Token: ATGGTG, Annotations: ['avrXa10'], Activation: 11.435
Token: GCACCG, Annotations: ['avrXa10'], Activation: 11.380
Token: TCGCAA, Annotations: ['avrXa10'], Activation: 11.370
Token: A

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2316 appears to detect: {'SV40 promoter'}
Top 20 activating tokens and their annotations:
Token: AGCTCC, Annotations: ['PDK intron', 'cat promoter'], Activation: 40.603
Token: TGCGTC, Annotations: [], Activation: 36.084
Token: GCATCC, Annotations: [], Activation: 31.244
Token: ACATTG, Annotations: [], Activation: 30.815
Token: <pad>, Annotations: ['special token: <pad>'], Activation: 30.801
Token: AGCAGG, Annotations: ['SV40 promoter'], Activation: 30.518
Token: AGCAGG, Annotations: ['SV40 promoter'], Activation: 30.445
Token: AGCAGG, Annotations: ['SV40 promoter'], Activation: 30.155
Token: AGCAGG, Annotations: ['SV40 promoter'], Activation: 30.054
Token: AGCAGG, Annotations: ['SV40 promoter'], Activation: 29.919
Token: <pad>, Annotations: ['special token: <pad>'], Activation: 29.176
Token: AGCAGG, Annotations: ['SV40 promoter'], Activation: 29.063
Token: AGCAGG, Annotations: ['SV40 promoter'], Activation: 29.045
Token: AGCAGG, Annotations: ['SV40 promoter'], Activation: 29.01

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2327 appears to detect: {'ori'}
Top 20 activating tokens and their annotations:
Token: TTTTTC, Annotations: ['ori'], Activation: 10.251
Token: TTTTTC, Annotations: ['ori'], Activation: 9.901
Token: TTTTTC, Annotations: ['ori'], Activation: 9.662
Token: TTTTTC, Annotations: ['ori'], Activation: 9.435
Token: TTTTTC, Annotations: ['ori', 'RNAI'], Activation: 9.419
Token: TTTTTC, Annotations: ['ori'], Activation: 9.381
Token: TTTTTC, Annotations: [], Activation: 9.343
Token: TTTTTC, Annotations: ['ori'], Activation: 9.343
Token: TTTTTC, Annotations: ['ori'], Activation: 9.331
Token: TTTTTC, Annotations: ['ori'], Activation: 9.204
Token: TTTTTC, Annotations: ['ori', 'RNAI'], Activation: 9.124
Token: TTTTTC, Annotations: ['ori'], Activation: 9.016
Token: TTTTTC, Annotations: ['ori'], Activation: 8.986
Token: TTTTTC, Annotations: ['ori'], Activation: 8.943
Token: TTTTTC, Annotations: ['ori'], Activation: 8.922
Token: TTTTTC, Annotations: ['ori', 'RNAI'], Activation: 8.912
Token: TTTTT

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2332 appears to detect: {'ori'}
Top 20 activating tokens and their annotations:
Token: CCTCTG, Annotations: [], Activation: 8.942
Token: CCTCTG, Annotations: ['ori'], Activation: 8.766
Token: CCTCTG, Annotations: ['ori'], Activation: 8.698
Token: CCTCTG, Annotations: ['ori'], Activation: 8.602
Token: CCTCTG, Annotations: ['ori'], Activation: 8.541
Token: CCTCTG, Annotations: ['ori'], Activation: 8.393
Token: CCTCTG, Annotations: ['ori'], Activation: 8.183
Token: CCTCTG, Annotations: ['ori'], Activation: 8.112
Token: CCTCTG, Annotations: ['ori'], Activation: 8.090
Token: CCTCTG, Annotations: ['ori'], Activation: 8.082
Token: CCTCTG, Annotations: ['ori'], Activation: 8.050
Token: CCTCTG, Annotations: ['ori'], Activation: 7.991
Token: CCTCTG, Annotations: ['ori'], Activation: 7.954
Token: CTCTGC, Annotations: ['ori'], Activation: 7.830
Token: CCTCTG, Annotations: ['ori'], Activation: 7.824
Token: CTCTGC, Annotations: ['ori'], Activation: 7.794
Token: CCTCTG, Annotations: ['ori'], 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2363 appears to detect: {'CMV promoter', 'UL126'}
Top 20 activating tokens and their annotations:
Token: CCAAAA, Annotations: ['CMV promoter', 'UL126'], Activation: 13.641
Token: CCAAAA, Annotations: ['CMV promoter', 'UL126'], Activation: 13.362
Token: CCAAAA, Annotations: ['CMV promoter', 'UL126'], Activation: 13.297
Token: CCAAAA, Annotations: ['CMV promoter', 'UL126'], Activation: 13.237
Token: CCAAAA, Annotations: ['UL126'], Activation: 12.843
Token: CCAAAA, Annotations: [], Activation: 12.634
Token: CCAAAA, Annotations: ['queF'], Activation: 12.622
Token: CCAAAA, Annotations: ['CMV promoter', 'UL126'], Activation: 12.503
Token: CCAAAA, Annotations: ['CMV promoter', 'UL126'], Activation: 12.363
Token: CCAAAA, Annotations: ['CMV promoter', 'UL126'], Activation: 12.361
Token: CCAAAA, Annotations: ['CMV promoter', 'UL126'], Activation: 12.345
Token: CCAAAA, Annotations: ['CMV promoter', 'UL126'], Activation: 12.318
Token: CCAAAA, Annotations: ['sacB'], Activation: 12.155
Token

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2381 appears to detect: {'tight TRE promoter'}
Top 20 activating tokens and their annotations:
Token: TGGCAT, Annotations: [], Activation: 13.450
Token: GGCATT, Annotations: [], Activation: 12.915
Token: TTACTC, Annotations: ['tight TRE promoter'], Activation: 12.754
Token: AGTCCG, Annotations: ['3xHA'], Activation: 12.445
Token: TCGAGT, Annotations: ['tight TRE promoter'], Activation: 12.300
Token: ATGGCA, Annotations: [], Activation: 12.240
Token: TCGAGT, Annotations: ['tight TRE promoter'], Activation: 12.019
Token: CGGCTC, Annotations: ['M13 gene III'], Activation: 11.998
Token: GAGTAC, Annotations: ['5X UAS'], Activation: 11.888
Token: TTACTC, Annotations: ['tight TRE promoter'], Activation: 11.878
Token: CGGCTC, Annotations: ['M13 gene III'], Activation: 11.666
Token: AGAGAA, Annotations: ['tight TRE promoter'], Activation: 11.619
Token: GTGATA, Annotations: ['tight TRE promoter'], Activation: 11.476
Token: CGGCTC, Annotations: ['M13 gene III'], Activation: 11.390
Token: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2382 appears to detect: {'MESV Ψ'}
Top 20 activating tokens and their annotations:
Token: TCTCTG, Annotations: ['MESV Ψ'], Activation: 15.460
Token: TGACTG, Annotations: [], Activation: 15.347
Token: CCGGCA, Annotations: ['MESV Ψ'], Activation: 14.454
Token: TCTTTG, Annotations: ['MESV Ψ'], Activation: 14.415
Token: TCTCTG, Annotations: ['MESV Ψ'], Activation: 14.359
Token: CTGTCT, Annotations: ['MESV Ψ'], Activation: 13.960
Token: ACTAGT, Annotations: ['MESV Ψ', 'Gammaretro CES'], Activation: 13.881
Token: TCTCTG, Annotations: ['MESV Ψ'], Activation: 13.852
Token: TCTTTG, Annotations: ['MESV Ψ'], Activation: 13.656
Token: TGCGTG, Annotations: ['MESV Ψ'], Activation: 13.543
Token: TGACTG, Annotations: [], Activation: 13.390
Token: TGACTG, Annotations: [], Activation: 13.367
Token: ATCTAA, Annotations: ['MESV Ψ'], Activation: 13.238
Token: GTCTTT, Annotations: ['MESV Ψ'], Activation: 13.203
Token: GCCTGC, Annotations: ['MESV Ψ', 'Gammaretro CES'], Activation: 13.047
Token: TGTCT

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2392 appears to detect: {'SmR'}
Top 20 activating tokens and their annotations:
Token: TCCGTG, Annotations: ['SmR'], Activation: 17.899
Token: CATCAT, Annotations: ['SmR'], Activation: 17.344
Token: TCCGTG, Annotations: ['SmR'], Activation: 17.212
Token: CCGCGC, Annotations: ['SmR'], Activation: 17.148
Token: ACATTC, Annotations: [], Activation: 17.124
Token: CATCAT, Annotations: ['SmR'], Activation: 16.763
Token: TTCCGT, Annotations: ['SmR'], Activation: 16.607
Token: CCGCTC, Annotations: [], Activation: 16.594
Token: CCGCGC, Annotations: ['SmR'], Activation: 16.299
Token: TGACTT, Annotations: [], Activation: 16.130
Token: TTCCGT, Annotations: ['SmR'], Activation: 16.012
Token: CCGCGC, Annotations: ['SmR'], Activation: 15.958
Token: CATTCA, Annotations: [], Activation: 15.924
Token: TTCCGT, Annotations: ['SmR'], Activation: 15.869
Token: ATCCAG, Annotations: ['SmR'], Activation: 15.843
Token: ACATCA, Annotations: ['SmR'], Activation: 15.838
Token: TTCCGT, Annotations: ['SmR'],

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2428 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: TGGACT, Annotations: ['cre'], Activation: 13.336
Token: GGACTT, Annotations: ['CMV enhancer'], Activation: 11.948
Token: GACTCT, Annotations: ['bom'], Activation: 11.891
Token: GGACTT, Annotations: ['CMV enhancer'], Activation: 11.873
Token: TGGACT, Annotations: [], Activation: 11.840
Token: GGACTT, Annotations: ['CMV enhancer'], Activation: 11.621
Token: GGACTT, Annotations: ['CMV enhancer'], Activation: 11.504
Token: GGACTT, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 11.409
Token: GGACTT, Annotations: ['CMV enhancer'], Activation: 11.350
Token: GGACTT, Annotations: ['CMV promoter', 'UL126'], Activation: 11.327
Token: GGACTT, Annotations: ['CMV enhancer'], Activation: 11.228
Token: GGACTT, Annotations: ['Beta'], Activation: 11.208
Token: GGACTT, Annotations: ['CMV enhancer'], Activation: 11.207
Token: GGACTT, Annotations: ['CMV enhancer'], Activation: 11.162
Token: G

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2447 appears to detect: {'hGH poly(A) signal', 'C9orf85'}
Top 20 activating tokens and their annotations:
Token: GCCCAG, Annotations: ['C9orf85'], Activation: 8.176
Token: GCTGGA, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 7.795
Token: AGCTGG, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 7.005
Token: GCTGGA, Annotations: ['C9orf85'], Activation: 5.204
Token: AACCAA, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 4.440
Token: CTCAGC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 3.731
Token: CCAGGC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 3.270
Token: GCTCCC, Annotations: ['hGH poly(A) signal'], Activation: 3.066
Token: AGTATC, Annotations: [], Activation: 2.908
Token: GAACCA, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 2.736
Token: GCTGGA, Annotations: [], Activation: 1.914
Token: ATGCCT, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 1.826
Token: <pad>, Annotations: ['sp

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2514 appears to detect: {'SV40 poly(A) signal'}
Top 20 activating tokens and their annotations:
Token: AATGCT, Annotations: ['SV40 poly(A) signal'], Activation: 15.372
Token: AATGCT, Annotations: ['SV40 poly(A) signal'], Activation: 12.727
Token: AATGCT, Annotations: ['SV40 poly(A) signal'], Activation: 11.361
Token: ATGCCT, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 9.811
Token: AAATGC, Annotations: ['SV40 poly(A) signal'], Activation: 9.260
Token: TGCTTT, Annotations: ['attL3'], Activation: 8.133
Token: ATGCTT, Annotations: ['SV40 poly(A) signal'], Activation: 8.077
Token: TGCCTG, Annotations: ['C16orf89'], Activation: 8.067
Token: TGCTTT, Annotations: ['attL1'], Activation: 7.668
Token: AAATGC, Annotations: ['SV40 poly(A) signal'], Activation: 7.548
Token: AATGCT, Annotations: ['SV40 poly(A) signal'], Activation: 7.378
Token: AATGCT, Annotations: ['SV40 poly(A) signal'], Activation: 7.059
Token: AAATGC, Annotations: ['SV40 poly(A) signal'], Activation: 6.969

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2553 appears to detect: {'CMV promoter', 'UL126'}
Top 20 activating tokens and their annotations:
Token: AATGGG, Annotations: ['CMV promoter', 'UL126'], Activation: 14.459
Token: AATGGG, Annotations: ['CMV promoter', 'UL126'], Activation: 14.427
Token: AATGGG, Annotations: ['CMV enhancer'], Activation: 14.369
Token: AATGGG, Annotations: ['CMV promoter', 'UL126'], Activation: 14.355
Token: AACGGG, Annotations: ['CMV promoter', 'UL126'], Activation: 14.295
Token: AATGGG, Annotations: ['CMV promoter', 'UL126'], Activation: 14.169
Token: AACGGG, Annotations: ['CMV promoter', 'UL126'], Activation: 14.149
Token: AATGGG, Annotations: ['CMV promoter', 'UL126'], Activation: 14.020
Token: AACGGG, Annotations: ['CMV promoter', 'UL126'], Activation: 13.992
Token: AATGGG, Annotations: ['CMV enhancer'], Activation: 13.906
Token: AACGGG, Annotations: ['CMV promoter', 'UL126'], Activation: 13.840
Token: AATGGG, Annotations: ['CMV enhancer'], Activation: 13.839
Token: AACGGG, Annotations: ['CMV

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2564 appears to detect: {'AmpR'}
Top 20 activating tokens and their annotations:
Token: TGGCTG, Annotations: ['neo'], Activation: 12.263
Token: TGGCTG, Annotations: ['lacI'], Activation: 12.213
Token: TGGCTG, Annotations: ['AmpR'], Activation: 12.044
Token: TGGCTG, Annotations: ['AmpR'], Activation: 11.957
Token: TGGCTG, Annotations: ['AmpR'], Activation: 11.945
Token: TGGCTG, Annotations: ['AmpR'], Activation: 11.882
Token: TGGCTG, Annotations: ['iRFP713'], Activation: 11.846
Token: TGGCTG, Annotations: ['AmpR'], Activation: 11.763
Token: TGGCTG, Annotations: ['lacI'], Activation: 11.753
Token: TGGCTG, Annotations: ['neo'], Activation: 11.650
Token: TGGCTG, Annotations: ['AmpR'], Activation: 11.637
Token: TGGCTG, Annotations: ['AmpR'], Activation: 11.584
Token: TGGCTG, Annotations: ['lacI'], Activation: 11.561
Token: TGGCTG, Annotations: ['AmpR'], Activation: 11.523
Token: TGGCTG, Annotations: ['AmpR'], Activation: 11.509
Token: TGGCTG, Annotations: ['AmpR'], Activation: 11.44

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2580 appears to detect: {'EGFP'}
Top 20 activating tokens and their annotations:
Token: GACCTA, Annotations: ['cEGFP'], Activation: 10.924
Token: GACCTA, Annotations: ['EGFP'], Activation: 10.810
Token: GACTTA, Annotations: ['superfolder GFP'], Activation: 10.578
Token: GACCTA, Annotations: ['EGFP'], Activation: 10.563
Token: CACCTA, Annotations: ['mEGFP'], Activation: 9.651
Token: GACCTA, Annotations: ['EGFP'], Activation: 9.608
Token: GACCTA, Annotations: ['EGFP'], Activation: 9.393
Token: CACCTA, Annotations: ['cEGFP'], Activation: 9.355
Token: CACCTA, Annotations: ['mEGFP'], Activation: 9.292
Token: CACCTA, Annotations: ['mTurquoise2'], Activation: 9.263
Token: GACCTA, Annotations: ['GJB1'], Activation: 9.179
Token: CACCTA, Annotations: ['EGFP'], Activation: 9.161
Token: CACCTA, Annotations: ['EGFP'], Activation: 9.160
Token: CACCTA, Annotations: ['SEAP'], Activation: 9.143
Token: CACCTA, Annotations: ['EGFP'], Activation: 9.135
Token: CACCTA, Annotations: ['EGFP'], Activat

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2581 appears to detect: {'SmR'}
Top 20 activating tokens and their annotations:
Token: ATCCAG, Annotations: ['SmR'], Activation: 18.669
Token: ATCCAG, Annotations: ['SmR'], Activation: 18.483
Token: ATCCAG, Annotations: ['SmR'], Activation: 17.550
Token: GCGAAC, Annotations: ['SmR'], Activation: 17.497
Token: GCGAAC, Annotations: ['SmR'], Activation: 17.186
Token: ATCCAG, Annotations: ['SmR'], Activation: 16.701
Token: ATCCAG, Annotations: ['SmR'], Activation: 16.084
Token: ATCCAG, Annotations: ['SmR'], Activation: 15.998
Token: ATCCAG, Annotations: ['SmR'], Activation: 15.321
Token: GCGAAC, Annotations: ['SmR'], Activation: 14.816
Token: CGAACT, Annotations: ['SmR'], Activation: 14.566
Token: GCGAAC, Annotations: ['SmR'], Activation: 14.116
Token: GCGAAC, Annotations: ['SmR'], Activation: 13.714
Token: GCGAAC, Annotations: ['SmR'], Activation: 13.586
Token: CTAAGC, Annotations: ['SmR'], Activation: 13.474
Token: CTAAGC, Annotations: ['SmR'], Activation: 13.119
Token: CGAACT, A

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2607 appears to detect: {'rrnB T1 terminator'}
Top 20 activating tokens and their annotations:
Token: GCCCAG, Annotations: ['rrnB T1 terminator'], Activation: 13.291
Token: GCCCAG, Annotations: ['rrnB T1 terminator'], Activation: 12.967
Token: GCCCAG, Annotations: ['rrnB T1 terminator'], Activation: 12.776
Token: GCCCAG, Annotations: ['rrnB T1 terminator'], Activation: 12.772
Token: GCCCAG, Annotations: ['rrnB T1 terminator'], Activation: 12.511
Token: GCCCAG, Annotations: ['rrnB T1 terminator'], Activation: 12.511
Token: GCCCAG, Annotations: ['rrnB T1 terminator'], Activation: 12.509
Token: GCCCAG, Annotations: ['rrnB T1 terminator'], Activation: 12.492
Token: GCCCAG, Annotations: ['rrnB T1 terminator'], Activation: 12.392
Token: GCCCAG, Annotations: ['rrnB T1 terminator'], Activation: 12.392
Token: GCCCAG, Annotations: ['rrnB T1 terminator'], Activation: 12.387
Token: GCCCAG, Annotations: ['rrnB T1 terminator'], Activation: 12.381
Token: GCCCAG, Annotations: ['PATZ1'], Activa

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2745 appears to detect: {'EF-1α promoter', 'EF-1α intron A'}
Top 20 activating tokens and their annotations:
Token: GTGGTG, Annotations: ['M13 gene III'], Activation: 9.370
Token: CTGGGG, Annotations: ['EF-1α intron A', 'EF-1α promoter'], Activation: 8.915
Token: CTGCTG, Annotations: ['EPM2A'], Activation: 8.665
Token: GGCCTG, Annotations: ['EF-1α intron A', 'EF-1α promoter'], Activation: 8.662
Token: CTGCTG, Annotations: ['AP3A_SARS2'], Activation: 8.600
Token: GGCCTG, Annotations: ['EF-1α intron A', 'EF-1α promoter'], Activation: 8.553
Token: GGCCTG, Annotations: ['EF-1α intron A', 'EF-1α promoter'], Activation: 8.483
Token: CTGGCC, Annotations: ['EF-1α intron A', 'EF-1α promoter'], Activation: 8.422
Token: TGGGCG, Annotations: ['EF-1α intron A', 'EF-1α promoter'], Activation: 8.384
Token: TGGGCG, Annotations: ['EF-1α intron A', 'EF-1α promoter'], Activation: 8.306
Token: GGGGCT, Annotations: [], Activation: 8.253
Token: CTGCTG, Annotations: ['AP3A_SARS2'], Activation: 8.234


  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2767 appears to detect: {'SV40 promoter', 'SV40 ori', 'SELP_SV40'}
Top 20 activating tokens and their annotations:
Token: TTTTTT, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 16.413
Token: TTTTTT, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 16.152
Token: TTTTTT, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 16.058
Token: TTTTTG, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 16.014
Token: TTTTTT, Annotations: [], Activation: 15.798
Token: TTTTTT, Annotations: ['TVMV protease'], Activation: 15.732
Token: TTTTTT, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 15.724
Token: TTTTTT, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 15.666
Token: TTTTTG, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 15.542
Token: TTTTTT, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 15.502
Token: TTTTTG, Annotations: ['SV

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2780 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: CCACTT, Annotations: ['CMV enhancer'], Activation: 14.451
Token: CCACTT, Annotations: ['CMV enhancer'], Activation: 14.363
Token: CCACTT, Annotations: ['CMV enhancer'], Activation: 14.121
Token: CCACTT, Annotations: ['CMV enhancer'], Activation: 13.913
Token: CCACTT, Annotations: ['CMV enhancer'], Activation: 13.828
Token: CCACTT, Annotations: ['CMV enhancer'], Activation: 13.769
Token: CCACTT, Annotations: ['CMV enhancer'], Activation: 13.608
Token: CCACTT, Annotations: ['CMV enhancer'], Activation: 13.417
Token: CCACTT, Annotations: ['CMV enhancer'], Activation: 13.340
Token: CCACTT, Annotations: ['CMV enhancer'], Activation: 13.332
Token: CCACTT, Annotations: ['CMV enhancer'], Activation: 13.298
Token: CCACTT, Annotations: ['CMV enhancer'], Activation: 13.270
Token: CCACTT, Annotations: ['CMV enhancer'], Activation: 13.239
Token: CCACTT, Annotations: ['CMV enhancer'], Activation: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2782 appears to detect: {'SV40 promoter'}
Top 20 activating tokens and their annotations:
Token: TGTGGA, Annotations: ['SV40 promoter'], Activation: 19.458
Token: TGTGGA, Annotations: ['SV40 promoter'], Activation: 19.290
Token: TGTGGA, Annotations: ['SV40 promoter'], Activation: 19.269
Token: TGTGGA, Annotations: ['SV40 promoter'], Activation: 19.114
Token: TGTGGA, Annotations: ['SV40 promoter'], Activation: 19.103
Token: TGTGGA, Annotations: ['SV40 promoter'], Activation: 18.382
Token: TGTGGA, Annotations: ['SV40 promoter'], Activation: 18.381
Token: TGTGGA, Annotations: ['SV40 promoter'], Activation: 18.373
Token: TGTGGA, Annotations: ['SV40 promoter'], Activation: 18.236
Token: TGTGGA, Annotations: ['SV40 promoter'], Activation: 18.204
Token: TGTGGA, Annotations: ['SV40 promoter'], Activation: 18.170
Token: TGTGGA, Annotations: ['SV40 promoter'], Activation: 18.122
Token: TGTGGA, Annotations: ['SV40 promoter'], Activation: 17.963
Token: TGTGGA, Annotations: ['SV40 promoter'

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2790 appears to detect: {'ori'}
Top 20 activating tokens and their annotations:
Token: CGAACC, Annotations: ['ori'], Activation: 12.900
Token: CGAACC, Annotations: ['ori'], Activation: 12.763
Token: CGAACC, Annotations: ['ori'], Activation: 12.740
Token: CGAACC, Annotations: ['ori'], Activation: 12.670
Token: CGAACC, Annotations: ['ori'], Activation: 12.546
Token: CGAACC, Annotations: ['ori'], Activation: 12.534
Token: CGAACC, Annotations: ['ori'], Activation: 12.521
Token: CGAACC, Annotations: ['ori'], Activation: 12.492
Token: CGAACC, Annotations: ['ori'], Activation: 12.489
Token: CGAACC, Annotations: ['ori'], Activation: 12.407
Token: CGAACC, Annotations: ['ori'], Activation: 12.388
Token: CGAACC, Annotations: ['ori'], Activation: 12.111
Token: CGAACC, Annotations: ['ori'], Activation: 12.019
Token: CGAACC, Annotations: ['ori'], Activation: 12.011
Token: ACGAAC, Annotations: ['ori'], Activation: 11.753
Token: ACGAAC, Annotations: ['ori'], Activation: 11.678
Token: ACGAAC, A

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2800 appears to detect: {'ori', 'RNAI'}
Top 20 activating tokens and their annotations:
Token: CAAATA, Annotations: ['ori', 'RNAI'], Activation: 11.287
Token: CAAATA, Annotations: [], Activation: 11.282
Token: CAAATA, Annotations: ['luciferase'], Activation: 11.119
Token: CAAATA, Annotations: ['SV40 poly(A) signal'], Activation: 11.107
Token: CAAATA, Annotations: ['ori', 'RNAI'], Activation: 11.081
Token: CAAATA, Annotations: ['ori', 'RNAI'], Activation: 10.964
Token: CAAATA, Annotations: ['mRuby3'], Activation: 10.935
Token: CAAATA, Annotations: ['ori', 'RNAI'], Activation: 10.845
Token: CAAATA, Annotations: ['ori', 'RNAI'], Activation: 10.705
Token: CAAATA, Annotations: ['luxB'], Activation: 10.634
Token: CAAATA, Annotations: ['ori', 'RNAI'], Activation: 10.595
Token: CAAATA, Annotations: ['SV40 poly(A) signal'], Activation: 10.510
Token: CAAATA, Annotations: ['ori', 'RNAI'], Activation: 10.419
Token: CAAATA, Annotations: ['ori', 'RNAI'], Activation: 10.385
Token: CAAATA, Ann

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2807 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: GCCCGC, Annotations: ['CMV enhancer'], Activation: 15.127
Token: GCCCGC, Annotations: ['CMV enhancer'], Activation: 15.059
Token: GCCCGC, Annotations: ['CMV enhancer'], Activation: 15.023
Token: GCCCGC, Annotations: ['CMV enhancer'], Activation: 14.765
Token: GCCCGC, Annotations: ['CMV promoter'], Activation: 14.709
Token: GCCCGC, Annotations: ['CMV enhancer'], Activation: 14.644
Token: GCCCGC, Annotations: ['CMV enhancer'], Activation: 14.561
Token: GCCCGC, Annotations: ['CMV enhancer'], Activation: 14.485
Token: GCCCGC, Annotations: ['CMV enhancer'], Activation: 14.453
Token: GCCCGC, Annotations: ['CMV enhancer'], Activation: 14.400
Token: GCCCGC, Annotations: ['CMV enhancer'], Activation: 14.382
Token: GCCCGC, Annotations: ['CMV enhancer'], Activation: 14.360
Token: GCCCGC, Annotations: ['CMV enhancer'], Activation: 14.323
Token: GCCCGC, Annotations: ['CMV enhancer'], Activation: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2809 appears to detect: {'AmpR'}
Top 20 activating tokens and their annotations:
Token: GATAAC, Annotations: ['AmpR'], Activation: 12.947
Token: GATAAC, Annotations: ['AmpR'], Activation: 12.602
Token: AGATAA, Annotations: ['araC'], Activation: 12.591
Token: GATAAC, Annotations: ['AmpR'], Activation: 12.439
Token: GATAAC, Annotations: ['SAX_LITCT'], Activation: 12.286
Token: GATAAC, Annotations: ['AmpR'], Activation: 12.162
Token: GGATAA, Annotations: ['AmpR'], Activation: 12.053
Token: GATAAC, Annotations: ['araC'], Activation: 11.994
Token: GATAAC, Annotations: ['araC'], Activation: 11.971
Token: GATAAC, Annotations: ['AmpR'], Activation: 11.950
Token: GGATAA, Annotations: ['AmpR'], Activation: 11.919
Token: GATAAC, Annotations: ['spg'], Activation: 11.881
Token: GGATAA, Annotations: ['AmpR'], Activation: 11.858
Token: GGATAA, Annotations: ['ori'], Activation: 11.828
Token: GATAAC, Annotations: ['YO343_YEAST'], Activation: 11.807
Token: GGATAA, Annotations: ['AmpR'], Activati

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2822 appears to detect: {'MCS', 'UL126'}
Top 20 activating tokens and their annotations:
Token: TCAGAT, Annotations: ['MCS', 'UL126'], Activation: 12.091
Token: TCAGAT, Annotations: ['MCS', 'UL126'], Activation: 11.651
Token: TCAGAT, Annotations: ['UL126'], Activation: 11.479
Token: TCAGAT, Annotations: ['MCS', 'UL126'], Activation: 11.468
Token: TCAGAT, Annotations: ['MCS', 'UL126'], Activation: 11.353
Token: TCAGAT, Annotations: ['MCS', 'UL126'], Activation: 11.256
Token: TCAGAT, Annotations: ['MCS', 'UL126'], Activation: 11.100
Token: TCAGAT, Annotations: ['UL126'], Activation: 11.099
Token: TCAGAT, Annotations: ['UL126'], Activation: 11.010
Token: TCAGAT, Annotations: ['MCS', 'UL126'], Activation: 10.951
Token: TCAGAT, Annotations: ['MCS', 'UL126'], Activation: 10.861
Token: TCAGAT, Annotations: ['MCS', 'UL126'], Activation: 10.835
Token: TCAGAT, Annotations: ['UL126'], Activation: 10.718
Token: TCAGAT, Annotations: ['MCS', 'UL126'], Activation: 10.677
Token: TCAGAT, Annota

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2828 appears to detect: {'AmpR'}
Top 20 activating tokens and their annotations:
Token: GAGTCA, Annotations: ['AmpR'], Activation: 10.658
Token: GAGTCA, Annotations: ['AmpR'], Activation: 10.589
Token: GAGTCA, Annotations: ['AmpR'], Activation: 10.411
Token: GAGTCA, Annotations: ['AmpR'], Activation: 10.350
Token: CCAGTC, Annotations: [], Activation: 10.324
Token: GAGTCA, Annotations: ['AmpR'], Activation: 10.139
Token: GGAGTC, Annotations: ['AmpR'], Activation: 9.964
Token: TGAGTC, Annotations: ['T7 promoter', 'CMV intron'], Activation: 9.945
Token: GAGTCA, Annotations: ['AmpR'], Activation: 9.924
Token: GAGTCA, Annotations: ['AmpR'], Activation: 9.795
Token: GGAGTC, Annotations: ['AmpR'], Activation: 9.707
Token: GAGTCA, Annotations: ['AmpR'], Activation: 9.644
Token: GGAGTC, Annotations: ['AmpR'], Activation: 9.238
Token: GGAGTC, Annotations: ['AmpR'], Activation: 9.216
Token: GGAGTC, Annotations: ['bla(M)'], Activation: 9.152
Token: CAGTCA, Annotations: ['AmpR'], Activation

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2830 appears to detect: {'SAX_LITCT'}
Top 20 activating tokens and their annotations:
Token: ACCAAC, Annotations: ['SAX_LITCT'], Activation: 10.512
Token: ACCAAC, Annotations: ['VPE_VICSA'], Activation: 10.301
Token: GGTGAC, Annotations: ['SAX_LITCT'], Activation: 9.836
Token: GGCAAG, Annotations: ['mEGFP'], Activation: 9.639
Token: AACAAC, Annotations: ['SAX_LITCT'], Activation: 9.625
Token: GCTAGC, Annotations: ['SAX_LITCT'], Activation: 9.197
Token: ATCAAC, Annotations: ['SAX_LITCT'], Activation: 9.195
Token: AAACAG, Annotations: ['SAX_LITCT'], Activation: 9.141
Token: CCAGGT, Annotations: ['SAX_LITCT'], Activation: 8.965
Token: AGCATC, Annotations: ['ORF43'], Activation: 8.951
Token: GAACGT, Annotations: ['SAX_LITCT'], Activation: 8.943
Token: TAACGT, Annotations: ['lacI'], Activation: 8.943
Token: CTGGAT, Annotations: ['VPE_VICSA'], Activation: 8.905
Token: ACCTGG, Annotations: ['MBP'], Activation: 8.873
Token: TAACGC, Annotations: ['lacZ'], Activation: 8.820
Token: TGCCGA

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2836 appears to detect: {'AmpR promoter'}
Top 20 activating tokens and their annotations:
Token: ATGTGC, Annotations: ['AmpR promoter'], Activation: 11.375
Token: ATGTGC, Annotations: ['AmpR promoter'], Activation: 11.316
Token: ATGTGC, Annotations: ['AmpR promoter'], Activation: 10.998
Token: ATGTGC, Annotations: ['AmpR promoter'], Activation: 10.960
Token: ATGTGC, Annotations: ['AmpR promoter'], Activation: 10.809
Token: ATGTGC, Annotations: ['AmpR promoter'], Activation: 10.770
Token: ATGTGC, Annotations: ['AmpR promoter'], Activation: 10.681
Token: ATGTGC, Annotations: ['AmpR promoter'], Activation: 10.665
Token: ATGTGC, Annotations: ['AmpR promoter'], Activation: 10.656
Token: ATGTGC, Annotations: ['AmpR promoter'], Activation: 10.306
Token: ATGTGC, Annotations: ['AmpR promoter'], Activation: 10.245
Token: ATGTGC, Annotations: ['AmpR promoter'], Activation: 10.186
Token: ATGTGC, Annotations: ['FN1'], Activation: 10.053
Token: ATGTGC, Annotations: ['AmpR promoter'], Activat

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2845 appears to detect: {'ori'}
Top 20 activating tokens and their annotations:
Token: GGTGGC, Annotations: ['ori'], Activation: 9.684
Token: GGTGGC, Annotations: ['ori'], Activation: 9.191
Token: TCTTGA, Annotations: ['ori'], Activation: 9.057
Token: GGTGGC, Annotations: ['ori'], Activation: 8.938
Token: ACACTA, Annotations: ['ori'], Activation: 8.829
Token: GGTGGC, Annotations: ['ori'], Activation: 8.723
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 8.632
Token: TCTTGA, Annotations: ['ori'], Activation: 8.577
Token: ATCGCT, Annotations: [], Activation: 8.542
Token: AGGATT, Annotations: ['ori'], Activation: 8.321
Token: TCTTGA, Annotations: ['ori'], Activation: 8.279
Token: CCATGG, Annotations: [], Activation: 8.230
Token: ACCACT, Annotations: [], Activation: 8.188
Token: GCGGTG, Annotations: ['ori', 'RNAI'], Activation: 8.158
Token: TCTTGA, Annotations: ['ori'], Activation: 8.085
Token: TACACT, Annotations: ['ori'], Activation: 8.062
Token: CGGCCG, Annotati

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2847 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: ACATAA, Annotations: ['CMV enhancer'], Activation: 12.340
Token: ACATAA, Annotations: ['CMV enhancer'], Activation: 12.318
Token: ACATAA, Annotations: ['CMV enhancer'], Activation: 12.076
Token: ACATAA, Annotations: ['CMV enhancer'], Activation: 11.957
Token: ACATAA, Annotations: ['CMV enhancer'], Activation: 11.697
Token: ACATAA, Annotations: ['CMV enhancer'], Activation: 11.469
Token: ACATAA, Annotations: ['CMV enhancer'], Activation: 11.469
Token: ACATAA, Annotations: ['CMV enhancer'], Activation: 11.453
Token: ACATAA, Annotations: ['CMV enhancer'], Activation: 11.302
Token: ACATAA, Annotations: ['CMV enhancer'], Activation: 11.058
Token: ACATAA, Annotations: ['DsRed2'], Activation: 11.024
Token: ACATAA, Annotations: ['K3'], Activation: 10.914
Token: ACATAA, Annotations: ['CMV enhancer'], Activation: 10.821
Token: ACATAA, Annotations: [], Activation: 10.726
Token: ACATAA, Annotati

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2851 appears to detect: {'VPS16'}
Top 20 activating tokens and their annotations:
Token: CTCTTG, Annotations: ['AmpR'], Activation: 59.245
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 57.218
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 52.403
Token: ACGGGA, Annotations: ['AmpR'], Activation: 52.331
Token: CTTACA, Annotations: ['VPS16'], Activation: 48.590
Token: TAGGCG, Annotations: ['VPS16'], Activation: 48.408
Token: AGACAG, Annotations: ['VPS16'], Activation: 47.439
Token: CCGCAT, Annotations: ['VPS16'], Activation: 46.647
Token: TCGGAT, Annotations: ['VPS16'], Activation: 46.411
Token: CTTTTC, Annotations: ['AmpR'], Activation: 45.519
Token: GTCTAG, Annotations: ['VPS16'], Activation: 44.742
Token: ACTTTT, Annotations: [], Activation: 44.216
Token: CTTCAG, Annotations: ['VPS16'], Activation: 43.748
Token: TAATAC, Annotations: ['AmpR'], Activation: 43.667
Token: CCGCTG, Annotations: ['VPS16'], Activation: 43.249
Token: <cls>, Annotatio

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2867 appears to detect: {'SV40 poly(A) signal'}
Top 20 activating tokens and their annotations:
Token: TGTTGT, Annotations: ['SV40 poly(A) signal'], Activation: 15.382
Token: TGTTGT, Annotations: ['SV40 poly(A) signal'], Activation: 15.068
Token: TGTTGT, Annotations: ['SV40 poly(A) signal'], Activation: 14.626
Token: TGTTGT, Annotations: ['SV40 poly(A) signal'], Activation: 14.291
Token: TGTTGT, Annotations: ['SV40 poly(A) signal'], Activation: 14.120
Token: TGTTGT, Annotations: ['SV40 poly(A) signal'], Activation: 14.096
Token: TGTTGT, Annotations: ['SV40 poly(A) signal'], Activation: 14.057
Token: TGTTGT, Annotations: ['SV40 poly(A) signal'], Activation: 14.056
Token: TGTTGT, Annotations: ['SV40 poly(A) signal'], Activation: 13.999
Token: TGTTGT, Annotations: ['SV40 poly(A) signal'], Activation: 13.722
Token: TGTTGT, Annotations: ['SV40 poly(A) signal'], Activation: 13.611
Token: TGTTGT, Annotations: ['SV40 poly(A) signal'], Activation: 13.359
Token: TGTTGT, Annotations: [], 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2872 appears to detect: {'RSGFP2'}
Top 20 activating tokens and their annotations:
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 67.690
Token: CGTTTA, Annotations: ['(3-F)Tyr-EGFP'], Activation: 65.065
Token: ACATCA, Annotations: ['Citrine'], Activation: 46.398
Token: TGTCGG, Annotations: ['RSGFP2'], Activation: 45.969
Token: GGTTCA, Annotations: ['RSGFP2'], Activation: 45.172
Token: CCAGGG, Annotations: ['RSGFP2'], Activation: 44.940
Token: GCACGG, Annotations: ['RSGFP2'], Activation: 43.853
Token: TGTGCC, Annotations: ['RSGFP2'], Activation: 43.626
Token: TATAGA, Annotations: ['RSGFP2'], Activation: 43.505
Token: GCTTCA, Annotations: ['Citrine'], Activation: 43.371
Token: CGGACT, Annotations: ['RSGFP2'], Activation: 43.249
Token: CGATGC, Annotations: ['RSGFP2'], Activation: 42.959
Token: AGCCTT, Annotations: ['RSGFP2'], Activation: 42.813
Token: TCTGCT, Annotations: ['RSGFP2'], Activation: 42.789
Token: CCAGCT, Annotations: ['RSGFP2'], Activation: 42.694
To

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2888 appears to detect: {'C9orf85', 'hGH poly(A) signal'}
Top 20 activating tokens and their annotations:
Token: CTGCCT, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 18.217
Token: CTGCCA, Annotations: ['C9orf85'], Activation: 8.583
Token: CAGCCT, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 7.113
Token: CCACTG, Annotations: ['hGH poly(A) signal'], Activation: 6.117
Token: GCATGA, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 5.873
Token: TCACCA, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 5.700
Token: TGGTCT, Annotations: ['hGH poly(A) signal'], Activation: 3.965
Token: TGCATG, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 3.298
Token: AGGTGC, Annotations: ['C9orf85'], Activation: 3.208
Token: TGCCAT, Annotations: [], Activation: 3.031
Token: GCTCAT, Annotations: [], Activation: 2.871
Token: TCTCCG, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 2.856
Token: ATGGTT, Annotations: [], Activatio

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2903 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: GGAGTA, Annotations: ['CMV enhancer'], Activation: 12.102
Token: GGAGTA, Annotations: ['CMV enhancer'], Activation: 11.992
Token: GGAGTA, Annotations: ['CMV enhancer'], Activation: 11.825
Token: GGAGTA, Annotations: ['CMV enhancer'], Activation: 11.825
Token: GGAGTA, Annotations: ['CMV enhancer'], Activation: 11.779
Token: GGAGTA, Annotations: ['CMV enhancer'], Activation: 11.708
Token: GGAGTA, Annotations: ['CMV enhancer'], Activation: 11.632
Token: GGAGTA, Annotations: ['CMV enhancer'], Activation: 11.373
Token: GGAGTA, Annotations: ['CMV enhancer'], Activation: 11.358
Token: TCAAGT, Annotations: ['CMV enhancer'], Activation: 11.229
Token: GGAGTA, Annotations: ['CMV enhancer'], Activation: 11.214
Token: GGAGTA, Annotations: ['CMV enhancer'], Activation: 11.166
Token: GGAGTA, Annotations: ['CMV enhancer'], Activation: 11.148
Token: GGAGTA, Annotations: ['CMV enhancer'], Activation: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2928 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: TGACGT, Annotations: ['CMV enhancer'], Activation: 15.346
Token: TGACGT, Annotations: ['CMV enhancer'], Activation: 15.135
Token: TGACGT, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 15.061
Token: TGACGT, Annotations: ['CMV enhancer'], Activation: 14.918
Token: TGACGT, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 14.748
Token: TGACGT, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 14.736
Token: TGACGT, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 14.600
Token: TGACGT, Annotations: ['CMV enhancer'], Activation: 14.563
Token: TGACGT, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 14.518
Token: TGACGT, Annotations: ['CMV enhancer'], Activation: 14.491
Token: TGACGT, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 14.473
Token: TGACGT, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 14.4

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 2933 appears to detect: {'CMV enhancer', 'CMV promoter', 'UL126'}
Top 20 activating tokens and their annotations:
Token: CCCATT, Annotations: ['Rep101'], Activation: 13.359
Token: CCCATT, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 12.959
Token: CCCATT, Annotations: ['CMV enhancer'], Activation: 12.922
Token: CCCATT, Annotations: ['CMV enhancer'], Activation: 12.745
Token: CCCATT, Annotations: ['CMV enhancer'], Activation: 12.620
Token: CCCATT, Annotations: ['CMV enhancer'], Activation: 12.612
Token: CCCATT, Annotations: ['CMV enhancer'], Activation: 12.579
Token: CCCATT, Annotations: ['CMV enhancer'], Activation: 12.572
Token: CCCATT, Annotations: ['CMV promoter', 'UL126'], Activation: 12.527
Token: CCCATT, Annotations: ['CMV promoter', 'UL126'], Activation: 12.426
Token: CCCATT, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 12.370
Token: CCCATT, Annotations: ['CMV promoter', 'UL126'], Activation: 12.331
Token: CCCATT, Annotations:

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 2987 appears to detect: {'ori'}
Top 20 activating tokens and their annotations:
Token: ATACCT, Annotations: ['ori'], Activation: 12.756
Token: ATACCT, Annotations: ['ori'], Activation: 12.675
Token: ATACCT, Annotations: ['ori'], Activation: 12.495
Token: ATACCT, Annotations: ['ori'], Activation: 12.492
Token: ATACCT, Annotations: ['ori'], Activation: 12.388
Token: ATACCT, Annotations: ['ori'], Activation: 12.306
Token: TACCTG, Annotations: ['ori'], Activation: 12.169
Token: ATACCT, Annotations: ['ori'], Activation: 12.001
Token: TACCTG, Annotations: [], Activation: 11.980
Token: TACCTG, Annotations: ['ori'], Activation: 11.979
Token: TACCTG, Annotations: ['ori'], Activation: 11.932
Token: ATACCT, Annotations: ['ori'], Activation: 11.912
Token: ATACCT, Annotations: ['ori'], Activation: 11.856
Token: ATACCT, Annotations: ['ori'], Activation: 11.840
Token: TACCTG, Annotations: ['ori'], Activation: 11.803
Token: CTACCT, Annotations: ['ERG12'], Activation: 11.772
Token: TACCTG, Anno

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3003 appears to detect: {'hGH poly(A) signal'}
Top 20 activating tokens and their annotations:
Token: GCCCGG, Annotations: [], Activation: 14.165
Token: GCCTGG, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 10.254
Token: AGGGAG, Annotations: ['hGH poly(A) signal'], Activation: 9.997
Token: GCGTGT, Annotations: ['C16orf89'], Activation: 9.792
Token: GCTCCC, Annotations: ['hGH poly(A) signal'], Activation: 8.885
Token: CAGGCA, Annotations: [], Activation: 8.668
Token: TTCCCT, Annotations: ['hGH poly(A) signal'], Activation: 7.899
Token: CCAATT, Annotations: [], Activation: 5.728
Token: CAGTGG, Annotations: ['hGH poly(A) signal'], Activation: 5.664
Token: AGGCAT, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 5.576
Token: CAGCAC, Annotations: [], Activation: 5.491
Token: TCCCTG, Annotations: ['hGH poly(A) signal'], Activation: 5.233
Token: CTCCCT, Annotations: ['hGH poly(A) signal'], Activation: 4.760
Token: AGCCCC, Annotations: [], Activation: 4.526
Tok

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3007 appears to detect: {'(3-F)Tyr-EGFP'}
Top 20 activating tokens and their annotations:
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 48.663
Token: TACTGT, Annotations: ['Blue102'], Activation: 27.938
Token: TGCTTG, Annotations: ['(3-F)Tyr-EGFP'], Activation: 23.639
Token: TACTCC, Annotations: ['(3-F)Tyr-EGFP'], Activation: 22.806
Token: TAGCCT, Annotations: ['EGFP'], Activation: 22.749
Token: TAGCCT, Annotations: ['EGFP'], Activation: 22.652
Token: CTCTGC, Annotations: ['Blue102'], Activation: 22.605
Token: TTCTTC, Annotations: ['(3-F)Tyr-EGFP'], Activation: 22.538
Token: TGCTGC, Annotations: ['(3-F)Tyr-EGFP'], Activation: 22.092
Token: GGGTAG, Annotations: ['(3-F)Tyr-EGFP'], Activation: 21.934
Token: TTCTGC, Annotations: ['EGFP'], Activation: 21.667
Token: ATGATA, Annotations: ['(3-F)Tyr-EGFP'], Activation: 21.451
Token: ACCTTG, Annotations: ['(3-F)Tyr-EGFP'], Activation: 21.353
Token: TAGTCC, Annotations: ['Blue102'], Activation: 21.272
Token: AAGTTC, An

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3020 appears to detect: {'C9orf85', 'hGH poly(A) signal'}
Top 20 activating tokens and their annotations:
Token: AACCAA, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 16.011
Token: GAACCA, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 15.155
Token: ATTGGG, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 14.641
Token: TATTGG, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 13.439
Token: TCTGTC, Annotations: ['C9orf85'], Activation: 13.342
Token: GGGTCT, Annotations: ['hGH poly(A) signal'], Activation: 12.407
Token: GCTGGA, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 12.200
Token: GGGGTC, Annotations: ['hGH poly(A) signal'], Activation: 12.165
Token: GGAAGA, Annotations: ['hGH poly(A) signal'], Activation: 10.196
Token: CAGGGA, Annotations: ['hGH poly(A) signal'], Activation: 9.933
Token: AGCTGG, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 9.074
Token: CCTGCG, Annotations: ['hGH poly(A) signal'], 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3030 appears to detect: {'SmR'}
Top 20 activating tokens and their annotations:
Token: CGACGA, Annotations: ['SmR'], Activation: 22.537
Token: CGACGA, Annotations: ['SmR'], Activation: 22.438
Token: TGTGGA, Annotations: [], Activation: 22.401
Token: CGACGA, Annotations: ['SmR'], Activation: 22.076
Token: CGACGA, Annotations: ['SmR'], Activation: 21.796
Token: ACGACG, Annotations: ['SmR'], Activation: 20.929
Token: ACGACG, Annotations: ['SmR'], Activation: 20.801
Token: ACGACG, Annotations: ['SmR'], Activation: 20.639
Token: ACGACG, Annotations: ['SmR'], Activation: 20.504
Token: TCGACG, Annotations: [], Activation: 20.376
Token: ACGACG, Annotations: ['SmR'], Activation: 18.840
Token: ACGACG, Annotations: ['SmR'], Activation: 18.781
Token: ACGACG, Annotations: ['SmR'], Activation: 18.473
Token: GATGAC, Annotations: ['SacB'], Activation: 18.086
Token: TTTAGA, Annotations: ['dCas9'], Activation: 17.716
Token: GACGAC, Annotations: ['aadA'], Activation: 16.729
Token: CGATGA, Annotat

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3036 appears to detect: {'f1 ori'}
Top 20 activating tokens and their annotations:
Token: GCCTTA, Annotations: [], Activation: 15.122
Token: CCTTCC, Annotations: ['f1 ori'], Activation: 14.242
Token: CTTTCT, Annotations: ['f1 ori'], Activation: 12.893
Token: CCTTCC, Annotations: ['f1 ori'], Activation: 12.291
Token: CTTCCC, Annotations: ['f1 ori'], Activation: 12.281
Token: CCTTCC, Annotations: ['f1 ori'], Activation: 12.259
Token: CCTTCC, Annotations: ['f1 ori'], Activation: 12.241
Token: CTTCCC, Annotations: ['f1 ori'], Activation: 12.167
Token: TTTCTT, Annotations: ['f1 ori'], Activation: 12.128
Token: TTTCTT, Annotations: ['f1 ori'], Activation: 11.992
Token: CTTCCC, Annotations: ['f1 ori'], Activation: 11.914
Token: CTTTCT, Annotations: ['M13 ori'], Activation: 11.784
Token: TTTCTT, Annotations: ['f1 ori'], Activation: 11.767
Token: GCTTTC, Annotations: ['f1 ori'], Activation: 11.749
Token: CCCTTC, Annotations: ['f1 ori'], Activation: 11.743
Token: CGCTTT, Annotations: ['f

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3038 appears to detect: {'ori'}
Top 20 activating tokens and their annotations:
Token: ATAGCT, Annotations: ['ori'], Activation: 12.724
Token: ATAGCT, Annotations: ['ori'], Activation: 12.625
Token: ATAGCT, Annotations: ['ori'], Activation: 12.599
Token: ATAGCT, Annotations: ['ori'], Activation: 12.473
Token: ATAGCT, Annotations: ['ori'], Activation: 12.426
Token: ATAGCT, Annotations: ['ori'], Activation: 12.423
Token: ATAGCT, Annotations: [], Activation: 12.207
Token: ATAGCT, Annotations: ['ori'], Activation: 12.195
Token: ATAGCT, Annotations: ['ori'], Activation: 12.170
Token: ATAGCT, Annotations: ['ori'], Activation: 12.160
Token: ATAGCT, Annotations: ['ori'], Activation: 12.108
Token: ATAGCT, Annotations: ['ori'], Activation: 12.081
Token: ATAGCT, Annotations: ['ori'], Activation: 12.053
Token: ATAGCT, Annotations: ['ori'], Activation: 11.995
Token: ATAGCT, Annotations: ['ori'], Activation: 11.884
Token: ATAGCT, Annotations: ['ori'], Activation: 11.766
Token: ATAGCT, Annota

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3042 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: CATTGA, Annotations: ['CMV enhancer'], Activation: 13.638
Token: CATTGA, Annotations: ['CMV promoter', 'UL126'], Activation: 12.841
Token: CATTGA, Annotations: ['SV40 poly(A) signal', 'VP1_SV40'], Activation: 12.595
Token: CATTGA, Annotations: ['CMV enhancer'], Activation: 12.534
Token: CATTGG, Annotations: ['pseT'], Activation: 12.471
Token: CATTGG, Annotations: ['Thbs2'], Activation: 12.071
Token: CATTGA, Annotations: ['CMV promoter', 'UL126'], Activation: 11.919
Token: CATTGA, Annotations: ['CMV enhancer'], Activation: 11.862
Token: CATTGA, Annotations: ['CMV enhancer'], Activation: 11.839
Token: CATTGA, Annotations: ['CMV IE94 promoter', 'CMV enhancer'], Activation: 11.725
Token: CATTGA, Annotations: ['CMV enhancer'], Activation: 11.680
Token: GATTGG, Annotations: ['ApmR'], Activation: 11.577
Token: CATTGA, Annotations: ['CMV enhancer'], Activation: 11.503
Token: CATTGA, Annotati

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3062 appears to detect: {'tight TRE promoter'}
Top 20 activating tokens and their annotations:
Token: TTTACT, Annotations: ['tight TRE promoter'], Activation: 12.155
Token: TTTACT, Annotations: ['tight TRE promoter'], Activation: 11.471
Token: TTACTC, Annotations: ['tight TRE promoter'], Activation: 10.902
Token: TTACTC, Annotations: ['tight TRE promoter'], Activation: 10.776
Token: TTACTC, Annotations: ['tight TRE promoter'], Activation: 10.599
Token: TTTACT, Annotations: ['tight TRE promoter'], Activation: 10.591
Token: TTTACT, Annotations: ['tight TRE promoter'], Activation: 10.187
Token: TTACTC, Annotations: ['tight TRE promoter'], Activation: 10.004
Token: TTACTC, Annotations: ['tight TRE promoter'], Activation: 9.810
Token: TTACTC, Annotations: ['tight TRE promoter'], Activation: 9.552
Token: GTTTAC, Annotations: [], Activation: 6.305
Token: AGTTTA, Annotations: [], Activation: 5.990
Token: AGTTTA, Annotations: [], Activation: 5.982
Token: TTTACT, Annotations: [], Activat

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3063 appears to detect: {'KanR', 'kanMX'}
Top 20 activating tokens and their annotations:
Token: AACATG, Annotations: ['KanR', 'kanMX'], Activation: 14.630
Token: AACATG, Annotations: ['kanMX', 'KanR'], Activation: 13.917
Token: AACATG, Annotations: ['KanR', 'kanMX'], Activation: 13.896
Token: AACATG, Annotations: ['kanMX', 'KanR'], Activation: 13.879
Token: AACATG, Annotations: ['HSPA14'], Activation: 13.586
Token: AACATG, Annotations: ['KanR', 'kanMX'], Activation: 13.485
Token: AACATG, Annotations: ['KanR', 'kanMX'], Activation: 13.438
Token: AACATG, Annotations: ['KanR', 'kanMX'], Activation: 13.369
Token: AACATG, Annotations: [], Activation: 13.329
Token: AACATG, Annotations: ['KanR', 'kanMX'], Activation: 13.232
Token: AACATG, Annotations: ['KanR', 'kanMX'], Activation: 13.171
Token: AACATG, Annotations: ['rop'], Activation: 13.147
Token: AACATG, Annotations: ['KanR', 'kanMX'], Activation: 13.109
Token: AACATG, Annotations: ['PDHA1'], Activation: 13.092
Token: AACATG, Ann

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3130 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: TGACCG, Annotations: ['CMV enhancer'], Activation: 14.589
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 14.418
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 14.303
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 14.139
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 14.052
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 13.992
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 13.988
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 13.915
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 13.872
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 13.848
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 13.808
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 13.772
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 13.719
Token: GCTGAC, Annotations: ['CMV enhancer'], Activation: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3175 appears to detect: {'f1 ori'}
Top 20 activating tokens and their annotations:
Token: TCTTAA, Annotations: [], Activation: 12.157
Token: ATTAAG, Annotations: ['f1 ori'], Activation: 10.895
Token: ATTAAG, Annotations: ['f1 ori'], Activation: 10.683
Token: ATTAAG, Annotations: ['f1 ori'], Activation: 10.385
Token: ATTAAG, Annotations: ['f1 ori'], Activation: 10.377
Token: ATTAAG, Annotations: ['f1 ori'], Activation: 10.349
Token: ATTAAG, Annotations: ['f1 ori'], Activation: 10.248
Token: ATTAAG, Annotations: ['f1 ori'], Activation: 10.232
Token: AATTAA, Annotations: ['CAP binding site', 'penA'], Activation: 10.079
Token: ATTAAG, Annotations: ['HELZ'], Activation: 10.058
Token: ATTAAG, Annotations: ['rrnB T1 terminator'], Activation: 10.006
Token: ATTAAG, Annotations: ['f1 ori'], Activation: 9.985
Token: ATTAAG, Annotations: ['f1 ori'], Activation: 9.855
Token: TTAAGC, Annotations: ['f1 ori'], Activation: 9.742
Token: ATTAAG, Annotations: ['f1 ori'], Activation: 9.730
Token: A

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3220 appears to detect: {'ori'}
Top 20 activating tokens and their annotations:
Token: CTCCCT, Annotations: ['ori'], Activation: 12.752
Token: CTCCCT, Annotations: ['ori'], Activation: 12.680
Token: CTCCCT, Annotations: ['ori'], Activation: 12.142
Token: CTCCCT, Annotations: ['ori'], Activation: 12.125
Token: CTCCCT, Annotations: ['ori'], Activation: 12.120
Token: CTCCCT, Annotations: ['f1 ori'], Activation: 12.040
Token: CTCCCT, Annotations: ['ori'], Activation: 11.894
Token: CTCCCT, Annotations: ['ori'], Activation: 11.753
Token: CTCCCT, Annotations: ['ori'], Activation: 11.717
Token: CTCCCT, Annotations: ['ori'], Activation: 11.687
Token: CTCCCT, Annotations: ['ori'], Activation: 11.642
Token: CTCCCT, Annotations: ['ori'], Activation: 11.580
Token: CTCCCT, Annotations: ['f1 ori'], Activation: 11.579
Token: TCCCTT, Annotations: ['f1 ori'], Activation: 11.561
Token: CTCCCT, Annotations: ['f1 ori'], Activation: 11.552
Token: CTCCCT, Annotations: ['ori'], Activation: 11.486
Toke

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3228 appears to detect: {'C9orf85'}
Top 20 activating tokens and their annotations:
Token: AGCCTC, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 17.766
Token: AGCCTC, Annotations: ['C9orf85'], Activation: 17.517
Token: CTGCCA, Annotations: ['C9orf85'], Activation: 16.912
Token: CCTCCC, Annotations: ['hGH poly(A) signal'], Activation: 14.756
Token: CGGTGG, Annotations: [], Activation: 14.599
Token: CAGCCT, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 14.541
Token: GCATGA, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 13.281
Token: GGGTTC, Annotations: ['C9orf85'], Activation: 12.994
Token: GCCCAG, Annotations: ['C9orf85'], Activation: 12.032
Token: CCAAGT, Annotations: ['C9orf85'], Activation: 11.658
Token: CCGAGT, Annotations: ['hGH poly(A) signal', 'C9orf85'], Activation: 11.608
Token: CTCCCA, Annotations: ['hGH poly(A) signal'], Activation: 11.408
Token: <pad>, Annotations: ['special token: <pad>'], Activation: 10.974
Token: CAGCCT, 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3237 appears to detect: {'AmpR'}
Top 20 activating tokens and their annotations:
Token: AGCTAG, Annotations: ['AmpR'], Activation: 12.877
Token: AGCTAG, Annotations: ['AmpR'], Activation: 12.864
Token: AGCTAG, Annotations: ['AmpR'], Activation: 12.860
Token: AGCTAG, Annotations: ['AmpR'], Activation: 12.567
Token: AGCTAG, Annotations: ['AmpR'], Activation: 12.369
Token: AGCTAG, Annotations: ['AmpR'], Activation: 12.311
Token: AGCTAG, Annotations: ['AmpR'], Activation: 12.172
Token: AGCTAG, Annotations: ['AmpR'], Activation: 12.145
Token: AGCTAG, Annotations: ['AmpR'], Activation: 12.116
Token: AGCTAG, Annotations: ['AmpR'], Activation: 12.063
Token: AGCTAG, Annotations: ['AmpR'], Activation: 12.032
Token: AGCTAG, Annotations: ['gRNA scaffold'], Activation: 11.866
Token: AGCTAG, Annotations: ['T7 promoter', 'CMV intron'], Activation: 11.497
Token: AGCTAG, Annotations: [], Activation: 11.450
Token: AGCTAG, Annotations: ['gRNA scaffold', 'tracrRNA'], Activation: 11.441
Token: AGCT

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3272 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: TGACGT, Annotations: ['EYFP'], Activation: 12.634
Token: TGACGT, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 12.570
Token: TGACGT, Annotations: ['CMV enhancer'], Activation: 12.364
Token: GACGTC, Annotations: ['CMV enhancer'], Activation: 12.362
Token: TTGACG, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 12.357
Token: GACGTC, Annotations: ['hphMX6', 'HygR'], Activation: 12.263
Token: TGACGT, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 12.151
Token: TGACGT, Annotations: ['CMV enhancer'], Activation: 12.102
Token: TGACGT, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 12.068
Token: GACGTC, Annotations: ['CMV enhancer'], Activation: 12.067
Token: GACGTC, Annotations: ['CMV enhancer'], Activation: 11.952
Token: GACGTC, Annotations: ['CMV enhancer'], Activation: 11.947
Token: GACGTC, Annotations: [

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3276 appears to detect: {'EGFP'}
Top 20 activating tokens and their annotations:
Token: CCTGAG, Annotations: ['EGFP'], Activation: 27.080
Token: CCTGAG, Annotations: ['cEGFP'], Activation: 24.765
Token: CCTGAG, Annotations: ['mGreenLantern'], Activation: 23.503
Token: CCTGAG, Annotations: ['EGFP'], Activation: 23.105
Token: CCTGAG, Annotations: ['mEmerald'], Activation: 22.455
Token: CCTGAG, Annotations: ['EGFP'], Activation: 20.646
Token: CGAGGG, Annotations: ['E2-Crimson'], Activation: 20.433
Token: CGTCCA, Annotations: ['mGreenLantern'], Activation: 20.410
Token: CAACAG, Annotations: ['mEGFP'], Activation: 19.682
Token: CGTGGA, Annotations: ['E2-Crimson'], Activation: 19.487
Token: CCTGAG, Annotations: ['EGFP'], Activation: 19.096
Token: CAACAG, Annotations: ['mEmerald'], Activation: 18.798
Token: CAACAG, Annotations: ['EGFP'], Activation: 18.764
Token: CCAGCA, Annotations: ['EGFP'], Activation: 18.688
Token: CTTTAG, Annotations: ['mTurquoise2'], Activation: 18.439
Token: CA

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3325 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: TTGGCA, Annotations: ['CMV enhancer'], Activation: 16.187
Token: TTGGCA, Annotations: ['CMV enhancer'], Activation: 15.897
Token: TTGGCA, Annotations: ['CMV enhancer'], Activation: 15.770
Token: TTGGCA, Annotations: ['CMV enhancer'], Activation: 15.657
Token: TTGGCA, Annotations: ['CMV enhancer'], Activation: 15.541
Token: TTGGCA, Annotations: ['CMV enhancer'], Activation: 15.537
Token: TTGGCA, Annotations: ['CMV enhancer'], Activation: 15.535
Token: TTGGCA, Annotations: ['CMV enhancer'], Activation: 15.489
Token: TTGGCA, Annotations: ['CMV enhancer'], Activation: 15.414
Token: TTGGCA, Annotations: ['CMV enhancer'], Activation: 15.410
Token: TTGGCA, Annotations: ['CMV enhancer'], Activation: 15.396
Token: TTGGCA, Annotations: ['CMV enhancer'], Activation: 15.379
Token: TTGGCA, Annotations: ['CMV enhancer'], Activation: 15.358
Token: TTGGCA, Annotations: ['CMV enhancer'], Activation: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3332 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: GGCATT, Annotations: ['CMV enhancer'], Activation: 7.902
Token: <pad>, Annotations: ['special token: <pad>'], Activation: 7.847
Token: <pad>, Annotations: ['special token: <pad>'], Activation: 7.738
Token: GGCATT, Annotations: ['CMV enhancer'], Activation: 7.541
Token: GGCATT, Annotations: ['CMV enhancer'], Activation: 7.444
Token: GGCATT, Annotations: ['CMV enhancer'], Activation: 7.371
Token: GGCATT, Annotations: ['CMV enhancer'], Activation: 7.283
Token: GGCATT, Annotations: ['CMV enhancer'], Activation: 7.282
Token: GGCATT, Annotations: ['CMV enhancer'], Activation: 7.220
Token: GGCATT, Annotations: ['CMV enhancer'], Activation: 7.214
Token: GGCATT, Annotations: ['CMV enhancer'], Activation: 7.199
Token: GGCATT, Annotations: ['CMV enhancer'], Activation: 7.103
Token: GGCATT, Annotations: ['CMV enhancer'], Activation: 7.045
Token: GGCATT, Annotations: ['CMV enhancer'], Activation:

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3355 appears to detect: {'f1 ori'}
Top 20 activating tokens and their annotations:
Token: TGGTTA, Annotations: ['f1 ori'], Activation: 12.309
Token: TGGTTA, Annotations: ['f1 ori'], Activation: 11.943
Token: TGGTTA, Annotations: ['f1 ori'], Activation: 11.894
Token: TGGTTA, Annotations: ['f1 ori'], Activation: 11.834
Token: TGGTTA, Annotations: ['f1 ori'], Activation: 11.657
Token: TGGTTA, Annotations: ['f1 ori'], Activation: 11.648
Token: TGGTTA, Annotations: ['f1 ori'], Activation: 11.545
Token: TGGTTA, Annotations: ['f1 ori'], Activation: 11.365
Token: TGGTTA, Annotations: ['f1 ori'], Activation: 11.302
Token: TGGTTA, Annotations: ['f1 ori'], Activation: 11.283
Token: TGGTTA, Annotations: ['f1 ori'], Activation: 10.980
Token: TGGTTA, Annotations: ['f1 ori'], Activation: 10.898
Token: TGGTTA, Annotations: ['f1 ori'], Activation: 10.756
Token: TGGTTA, Annotations: ['f1 ori'], Activation: 10.706
Token: TGGTTA, Annotations: [], Activation: 10.567
Token: GTGGTT, Annotations: ['f1

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3370 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: AGTACA, Annotations: ['CMV enhancer'], Activation: 13.347
Token: AGTACA, Annotations: ['CMV IE94 promoter'], Activation: 12.026
Token: AGTACA, Annotations: ['CMV promoter'], Activation: 11.898
Token: AGTACA, Annotations: ['CMV enhancer'], Activation: 11.833
Token: GTACAT, Annotations: ['CMV enhancer'], Activation: 11.797
Token: GTACAT, Annotations: ['CMV enhancer'], Activation: 11.738
Token: AGTACA, Annotations: ['CMV enhancer'], Activation: 11.565
Token: AGTACA, Annotations: ['CMV enhancer'], Activation: 11.560
Token: GTACAT, Annotations: ['CMV enhancer'], Activation: 11.555
Token: GTACAT, Annotations: ['CMV enhancer'], Activation: 11.522
Token: AGTACA, Annotations: ['CMV enhancer'], Activation: 11.512
Token: AGTACA, Annotations: ['CMV enhancer'], Activation: 11.465
Token: GTACAT, Annotations: ['CMV enhancer'], Activation: 11.453
Token: GTACAT, Annotations: ['CMV enhancer'], Activat

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3378 appears to detect: {'ori'}
Top 20 activating tokens and their annotations:
Token: AGGATC, Annotations: ['ori'], Activation: 12.937
Token: AGGATC, Annotations: ['ori'], Activation: 12.837
Token: AGGATC, Annotations: [], Activation: 12.645
Token: AGGATC, Annotations: ['ori'], Activation: 12.627
Token: AGGATC, Annotations: ['ori'], Activation: 12.561
Token: AGGATC, Annotations: ['ori'], Activation: 12.383
Token: AGGATG, Annotations: ['Dlg4'], Activation: 12.369
Token: AGGATC, Annotations: ['ori'], Activation: 12.363
Token: AGGATC, Annotations: [], Activation: 12.261
Token: AGGATC, Annotations: [], Activation: 12.178
Token: AGGATC, Annotations: ['ori'], Activation: 12.163
Token: AGGATC, Annotations: ['ori'], Activation: 12.134
Token: AGGATT, Annotations: ['KanR', 'kanMX'], Activation: 11.982
Token: AGGATC, Annotations: ['ori'], Activation: 11.978
Token: AGGATC, Annotations: ['ori'], Activation: 11.964
Token: AGGATC, Annotations: [], Activation: 11.899
Token: AGGATC, Annotation

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3394 appears to detect: {'ori'}
Top 20 activating tokens and their annotations:
Token: CCGACC, Annotations: ['ori'], Activation: 11.762
Token: CCGACC, Annotations: ['RNAI'], Activation: 11.750
Token: CCGACC, Annotations: ['RSF ori'], Activation: 11.518
Token: CCGACC, Annotations: ['ori'], Activation: 11.501
Token: CCGACC, Annotations: ['ori'], Activation: 11.491
Token: CCGACC, Annotations: ['ori'], Activation: 11.457
Token: CCGACC, Annotations: ['ori'], Activation: 11.364
Token: CCGACC, Annotations: ['p15A ori'], Activation: 11.277
Token: CCGACC, Annotations: ['cas9'], Activation: 11.255
Token: CCGACC, Annotations: ['ori'], Activation: 11.166
Token: CCGACC, Annotations: ['ori'], Activation: 11.062
Token: CCGACC, Annotations: ['EGFP'], Activation: 11.062
Token: CCGACC, Annotations: ['ori'], Activation: 10.955
Token: CCGACC, Annotations: ['ori'], Activation: 10.911
Token: CCGACC, Annotations: ['ori'], Activation: 10.890
Token: CCCGAC, Annotations: [], Activation: 10.883
Token: CC

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3432 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: GGGTGG, Annotations: ['CMV enhancer'], Activation: 16.855
Token: GGGTGG, Annotations: ['CMV enhancer'], Activation: 16.803
Token: GGGTGG, Annotations: ['CMV enhancer'], Activation: 16.748
Token: GGGTGG, Annotations: ['CMV enhancer'], Activation: 16.594
Token: GGGTGG, Annotations: ['CMV enhancer'], Activation: 16.550
Token: GGGTGG, Annotations: ['CMV enhancer'], Activation: 16.496
Token: GGGTGG, Annotations: ['CMV enhancer'], Activation: 16.384
Token: GGGTGG, Annotations: ['CMV enhancer'], Activation: 16.303
Token: GGGTGG, Annotations: ['CMV enhancer'], Activation: 16.264
Token: GGGTGG, Annotations: ['CMV enhancer'], Activation: 16.248
Token: GGGTGG, Annotations: ['CMV enhancer'], Activation: 16.174
Token: GGGTGG, Annotations: ['CMV enhancer'], Activation: 16.140
Token: GGGTGG, Annotations: ['CMV enhancer'], Activation: 16.088
Token: GGGTGG, Annotations: ['CMV enhancer'], Activation: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3442 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: CTGACC, Annotations: ['avrBs3'], Activation: 12.187
Token: CTGACC, Annotations: ['CMV enhancer'], Activation: 11.892
Token: CTGACC, Annotations: ['CMV enhancer'], Activation: 11.611
Token: CTGACC, Annotations: ['CMV enhancer'], Activation: 11.555
Token: CTGACC, Annotations: ['CMV enhancer'], Activation: 11.358
Token: CTGACC, Annotations: ['CMV enhancer'], Activation: 11.306
Token: CTGACC, Annotations: ['avrBs3'], Activation: 11.269
Token: CTGACC, Annotations: ['CMV enhancer'], Activation: 10.866
Token: CTGACC, Annotations: ['CMV enhancer'], Activation: 10.734
Token: CTGACC, Annotations: ['CMV enhancer'], Activation: 10.734
Token: CTGACC, Annotations: ['CMV enhancer'], Activation: 10.727
Token: CTGACC, Annotations: ['CMV enhancer'], Activation: 10.686
Token: CTGACC, Annotations: ['CMV enhancer'], Activation: 10.612
Token: CTGACC, Annotations: ['CMV enhancer'], Activation: 10.543
Token

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3450 appears to detect: {'SV40 promoter'}
Top 20 activating tokens and their annotations:
Token: TTAGTC, Annotations: ['SV40 promoter'], Activation: 15.778
Token: TTAGTC, Annotations: ['SV40 promoter'], Activation: 15.705
Token: TTAGTC, Annotations: ['SV40 promoter'], Activation: 15.610
Token: TTAGTC, Annotations: ['SV40 promoter'], Activation: 15.111
Token: TTAGTC, Annotations: ['SV40 promoter'], Activation: 15.025
Token: TTAGTC, Annotations: ['SV40 promoter', 'SV40 ori', 'SELP_SV40'], Activation: 14.983
Token: TTAGTC, Annotations: ['SV40 promoter'], Activation: 14.928
Token: TTAGTC, Annotations: ['SV40 promoter'], Activation: 14.598
Token: TTAGTC, Annotations: ['SV40 promoter', 'SV40 ori'], Activation: 14.503
Token: TTAGTC, Annotations: ['SV40 promoter'], Activation: 14.486
Token: TTAGTC, Annotations: ['SV40 promoter'], Activation: 14.463
Token: TTAGTC, Annotations: ['SV40 promoter'], Activation: 14.372
Token: TTAGTC, Annotations: ['SV40 promoter'], Activation: 14.257
Token: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3534 appears to detect: {'CMV promoter', 'UL126'}
Top 20 activating tokens and their annotations:
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 13.224
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 12.459
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 12.409
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 12.381
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 12.093
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 12.026
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 12.018
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 11.870
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 11.846
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 11.805
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 11.759
Token: GGAGTT, Annotations: ['CMV promoter', 'UL126'], Activation: 11.756
Token:

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3579 appears to detect: {'CMV promoter', 'UL126'}
Top 20 activating tokens and their annotations:
Token: TCAACG, Annotations: ['CMV promoter', 'UL126'], Activation: 11.466
Token: TCAACG, Annotations: ['CMV promoter', 'UL126'], Activation: 11.357
Token: TCAACG, Annotations: ['UL126'], Activation: 11.044
Token: TCAACG, Annotations: ['CMV promoter', 'UL126'], Activation: 11.037
Token: TCAACG, Annotations: ['CMV promoter', 'UL126'], Activation: 10.890
Token: TCAACG, Annotations: ['CMV promoter', 'UL126'], Activation: 10.886
Token: TCAACG, Annotations: ['CMV promoter', 'UL126'], Activation: 10.872
Token: TCAACG, Annotations: ['CMV promoter', 'UL126'], Activation: 10.854
Token: TCAACG, Annotations: ['CMV promoter', 'UL126'], Activation: 10.775
Token: TCAACG, Annotations: ['CMV promoter', 'UL126'], Activation: 10.764
Token: TCAACG, Annotations: ['CMV promoter', 'UL126'], Activation: 10.758
Token: TCAACG, Annotations: ['CMV promoter', 'UL126'], Activation: 10.751
Token: TCAACG, Annotat

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3586 appears to detect: {'ori'}
Top 20 activating tokens and their annotations:
Token: TTCTCC, Annotations: ['NeoR/KanR'], Activation: 14.762
Token: TTCTTC, Annotations: ['LEU2'], Activation: 14.680
Token: TTCTTC, Annotations: ['kanMX', 'KanR'], Activation: 14.602
Token: TTCTTC, Annotations: ['ori'], Activation: 14.506
Token: TTCTTC, Annotations: ['ori'], Activation: 14.496
Token: TTCTTC, Annotations: ['ori'], Activation: 14.396
Token: TTCTTC, Annotations: ['ori'], Activation: 14.251
Token: TTCTTC, Annotations: ['ori'], Activation: 14.240
Token: TTCTTC, Annotations: ['ori'], Activation: 14.202
Token: TCCTCC, Annotations: ['DHH1'], Activation: 14.196
Token: TTCTCC, Annotations: ['neo'], Activation: 14.188
Token: TTCTTC, Annotations: ['ori'], Activation: 14.104
Token: TTCTCC, Annotations: ['SmR'], Activation: 13.974
Token: TTCTCC, Annotations: ['chimeric intron'], Activation: 13.892
Token: TTCTTC, Annotations: ['AmpR'], Activation: 13.868
Token: TTCTTC, Annotations: ['ori'], Acti

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3601 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: CCTTAT, Annotations: ['CMV enhancer'], Activation: 19.204
Token: CCTTAT, Annotations: ['CMV enhancer'], Activation: 18.463
Token: CCTTAT, Annotations: ['CMV enhancer'], Activation: 17.700
Token: CCTTAT, Annotations: ['CMV enhancer'], Activation: 17.585
Token: CCTTAT, Annotations: ['CMV enhancer'], Activation: 17.363
Token: CCTTAT, Annotations: ['CMV enhancer'], Activation: 17.341
Token: CCTTAT, Annotations: ['CMV enhancer'], Activation: 17.309
Token: CCTTAT, Annotations: ['CMV enhancer'], Activation: 16.551
Token: CCTTAT, Annotations: ['CMV enhancer'], Activation: 15.858
Token: CCTTAT, Annotations: ['CMV enhancer'], Activation: 15.739
Token: CCTTAC, Annotations: ['CMV enhancer'], Activation: 14.468
Token: CCTTAT, Annotations: ['CMV promoter'], Activation: 13.400
Token: CCTTAT, Annotations: ['CMV enhancer'], Activation: 12.156
Token: CCTTAT, Annotations: ['CMV enhancer'], Activation: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3610 appears to detect: {'ori', 'RNAI'}
Top 20 activating tokens and their annotations:
Token: GAGCTA, Annotations: ['ori', 'RNAI'], Activation: 13.041
Token: GAGCTA, Annotations: ['ori', 'RNAI'], Activation: 12.836
Token: GAGCTA, Annotations: ['ori', 'RNAI'], Activation: 12.664
Token: GAGCTA, Annotations: ['ori', 'RNAI'], Activation: 12.657
Token: GAGCTA, Annotations: ['ori', 'RNAI'], Activation: 12.571
Token: GAGCTA, Annotations: ['ori', 'RNAI'], Activation: 12.512
Token: GAGCTA, Annotations: ['ori', 'RNAI'], Activation: 12.482
Token: GAGCTA, Annotations: ['ori', 'RNAI'], Activation: 12.347
Token: GAGCTA, Annotations: ['ori', 'RNAI'], Activation: 12.234
Token: GAGCTA, Annotations: ['ori', 'RNAI'], Activation: 12.201
Token: GAGCTA, Annotations: ['ori', 'RNAI'], Activation: 12.160
Token: GAGCTA, Annotations: ['ori', 'RNAI'], Activation: 12.130
Token: GAGCTA, Annotations: ['ori', 'RNAI'], Activation: 12.107
Token: GAGCTA, Annotations: ['ori'], Activation: 11.468
Token: GAGCTA, A

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3641 appears to detect: {'EGFP'}
Top 20 activating tokens and their annotations:
Token: TTCTAT, Annotations: ['WNT6'], Activation: 53.885
Token: CATTAT, Annotations: ['EGFP'], Activation: 53.287
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 48.148
Token: AATGCA, Annotations: ['mPlum'], Activation: 47.369
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 46.867
Token: TATCAT, Annotations: ['EGFP'], Activation: 44.742
Token: CCTGGG, Annotations: ['EGFP'], Activation: 43.635
Token: CTACCT, Annotations: ['EGFP'], Activation: 43.372
Token: TATCAT, Annotations: ['EGFP'], Activation: 43.325
Token: TATCAT, Annotations: ['EGFP'], Activation: 42.742
Token: TATCAT, Annotations: ['mEGFP'], Activation: 42.560
Token: CTACCA, Annotations: ['EGFP'], Activation: 42.518
Token: CTACCA, Annotations: ['mEGFP'], Activation: 42.339
Token: CTCCCT, Annotations: ['mPlum'], Activation: 42.330
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 42.199
Token: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3659 appears to detect: {'SmR'}
Top 20 activating tokens and their annotations:
Token: CTTGCT, Annotations: ['SmR'], Activation: 18.332
Token: CTTGCT, Annotations: ['SmR'], Activation: 17.593
Token: CTTGCT, Annotations: ['SmR'], Activation: 16.826
Token: CTTGCT, Annotations: ['SmR'], Activation: 16.172
Token: GACAAA, Annotations: ['SmR'], Activation: 13.376
Token: GACAAA, Annotations: ['SmR'], Activation: 13.298
Token: GACAAA, Annotations: ['SmR'], Activation: 12.365
Token: GACAAA, Annotations: ['SmR'], Activation: 12.059
Token: AGATCT, Annotations: [], Activation: 8.318
Token: GGCTAT, Annotations: ['SmR'], Activation: 8.296
Token: AGCAAG, Annotations: ['SmR'], Activation: 7.934
Token: ACCAGA, Annotations: [], Activation: 7.866
Token: TCTTGC, Annotations: ['SmR'], Activation: 7.844
Token: TCTTGC, Annotations: ['SmR'], Activation: 7.707
Token: TGTGTT, Annotations: [], Activation: 7.625
Token: AGTATC, Annotations: [], Activation: 7.588
Token: AGATAC, Annotations: [], Activation: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3662 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: CCCATT, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 15.703
Token: CCCATT, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 15.473
Token: CCCATT, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 15.275
Token: CCCATT, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 15.265
Token: CCCATT, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 15.234
Token: CCCATT, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 15.137
Token: CCCATT, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 14.935
Token: CCCATT, Annotations: ['CMV enhancer'], Activation: 14.688
Token: CCCATT, Annotations: ['CMV enhancer'], Activation: 14.441
Token: CCCATT, Annotations: ['CMV enhancer'], Activation: 14.410
Token: CCCATT, Annotations: ['CMV enhancer'], Activation: 14.405
Token: CCCATT

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3670 appears to detect: {'AmpR'}
Top 20 activating tokens and their annotations:
Token: ATTCAG, Annotations: ['AmpR'], Activation: 12.047
Token: ATTCAG, Annotations: ['AmpR'], Activation: 12.001
Token: ATTCAG, Annotations: ['lacZα'], Activation: 11.999
Token: ATTCAG, Annotations: ['AmpR'], Activation: 11.993
Token: ATTCAG, Annotations: ['AmpR'], Activation: 11.944
Token: ATTCAG, Annotations: ['KanR', 'kanMX'], Activation: 11.877
Token: ATTCAG, Annotations: ['lacZα'], Activation: 11.861
Token: ATTCAG, Annotations: ['AmpR'], Activation: 11.817
Token: ATTCAG, Annotations: ['AmpR'], Activation: 11.797
Token: ATTCAG, Annotations: ['lacZα'], Activation: 11.745
Token: ATTCAG, Annotations: ['lacZα'], Activation: 11.723
Token: ATTCAG, Annotations: ['IBMP_CAMVB'], Activation: 11.681
Token: ATTCAG, Annotations: ['AmpR'], Activation: 11.673
Token: ATTCAG, Annotations: ['lacZα'], Activation: 11.662
Token: ATTCAG, Annotations: ['araBAD promoter'], Activation: 11.632
Token: ATTCAG, Annotation

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3748 appears to detect: {'CMV enhancer', 'CMV promoter', 'UL126'}
Top 20 activating tokens and their annotations:
Token: GTCCCC, Annotations: ['SV40 promoter'], Activation: 9.376
Token: GTCTCC, Annotations: ['OCRL'], Activation: 9.204
Token: GTCTCC, Annotations: ['DPP4'], Activation: 8.804
Token: GTCTCC, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 8.797
Token: GTCTCC, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 8.518
Token: TGATGC, Annotations: ['KanR', 'kanMX'], Activation: 8.517
Token: GTCTCC, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 8.474
Token: GTCTCC, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 8.464
Token: GATCGC, Annotations: ['unc-119'], Activation: 8.362
Token: GTCTCC, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 8.330
Token: GTCTTC, Annotations: [], Activation: 8.274
Token: CAGTCT, Annotations: ['rrnB T1 terminator'], Activation: 8.240
Token: GTCTC

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3774 appears to detect: {'EGFP'}
Top 20 activating tokens and their annotations:
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 51.810
Token: CTATAT, Annotations: ['EBFP2'], Activation: 48.778
Token: <cls>, Annotations: ['special token: <cls>'], Activation: 47.041
Token: CTTCTT, Annotations: ['cp173Venus'], Activation: 46.893
Token: GGAGGA, Annotations: ['EGFP'], Activation: 46.353
Token: CTTCTT, Annotations: ['Citrine'], Activation: 45.191
Token: CGAGCT, Annotations: ['EGFP'], Activation: 45.081
Token: GGAGGA, Annotations: ['EGFP'], Activation: 44.535
Token: CGAGCT, Annotations: ['EGFP'], Activation: 44.286
Token: CTTCTT, Annotations: ['EGFP'], Activation: 43.431
Token: GGAGGA, Annotations: ['EGFP'], Activation: 42.519
Token: CTTCAA, Annotations: ['EGFP'], Activation: 42.158
Token: CTTCAA, Annotations: ['EGFP'], Activation: 41.902
Token: CTTTAG, Annotations: ['mTurquoise2'], Activation: 41.482
Token: <pad>, Annotations: ['special token: <pad>'], Activation: 4

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3803 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: ACTTTC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 12.454
Token: ACTCGC, Annotations: ['hph', 'hphMX6'], Activation: 12.328
Token: ACTTTC, Annotations: ['CMV enhancer'], Activation: 12.239
Token: ACTCGC, Annotations: ['araC'], Activation: 11.895
Token: ACTTTC, Annotations: ['CMV enhancer'], Activation: 11.729
Token: ACTTTC, Annotations: ['CMV enhancer'], Activation: 11.707
Token: ACTTTC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 11.623
Token: ACTTTC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 11.548
Token: ACTTTT, Annotations: [], Activation: 11.507
Token: ACTTTC, Annotations: ['CMV enhancer'], Activation: 11.504
Token: ACTTTT, Annotations: [], Activation: 11.456
Token: ACTCGC, Annotations: ['araC'], Activation: 11.380
Token: ACTTTC, Annotations: ['CMV enhancer'], Activation: 11.360
Token: ACTCGC, Annotations: ['araC'], Activation: 11.

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3864 appears to detect: {'ori'}
Top 20 activating tokens and their annotations:
Token: CTGAAC, Annotations: ['ori'], Activation: 16.279
Token: CTGAAC, Annotations: ['ori'], Activation: 15.633
Token: CTGAAC, Annotations: ['ori'], Activation: 15.479
Token: GATTGG, Annotations: ['cPPT/CTS', 'gag-pol'], Activation: 15.343
Token: GGGATT, Annotations: ['cPPT/CTS', 'gag-pol'], Activation: 15.296
Token: GGGATT, Annotations: ['cPPT/CTS', 'gag-pol'], Activation: 15.072
Token: CTGAAC, Annotations: ['ori'], Activation: 15.004
Token: CTGAAC, Annotations: ['ori'], Activation: 15.000
Token: GATTGG, Annotations: ['cPPT/CTS', 'gag-pol'], Activation: 14.908
Token: CTGAAC, Annotations: ['ori'], Activation: 14.862
Token: CTGAAC, Annotations: ['ori'], Activation: 14.850
Token: CCTCGA, Annotations: ['MCS'], Activation: 14.710
Token: CTGAAC, Annotations: ['ori'], Activation: 14.584
Token: GGATTG, Annotations: ['cPPT/CTS', 'gag-pol'], Activation: 14.519
Token: CTGAAC, Annotations: ['ori'], Activation:

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3866 appears to detect: {'RRE'}
Top 20 activating tokens and their annotations:
Token: CAGCAG, Annotations: ['ULK1'], Activation: 14.258
Token: CAGCAG, Annotations: ['VPE_VICSA'], Activation: 14.131
Token: CAGCAG, Annotations: ['YPB1_ECOLX'], Activation: 13.611
Token: CAGCAG, Annotations: ['RRE', 'RRE', 'env'], Activation: 13.084
Token: CAGCAG, Annotations: ['RRE', 'RRE', 'env'], Activation: 13.033
Token: CAGCAG, Annotations: ['ULK1'], Activation: 13.012
Token: CAGCAG, Annotations: ['NeoR/KanR'], Activation: 13.001
Token: CAGCAG, Annotations: ['RRE', 'RRE', 'env'], Activation: 12.920
Token: CAGCAG, Annotations: ['RRE', 'RRE', 'env'], Activation: 12.743
Token: CAGCAG, Annotations: ['resD'], Activation: 12.520
Token: CAGCAG, Annotations: ['YPB1_ECOLX'], Activation: 12.519
Token: CAGCAG, Annotations: ['UCHL5'], Activation: 12.358
Token: AGCAGC, Annotations: ['MYO19'], Activation: 12.206
Token: CAGCAG, Annotations: ['avrXa10'], Activation: 12.135
Token: CAGCAG, Annotations: ['SNCA'

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3870 appears to detect: {'SV40 promoter'}
Top 20 activating tokens and their annotations:
Token: TCCCCA, Annotations: ['SV40 promoter'], Activation: 19.346
Token: TCCCCA, Annotations: ['SV40 promoter'], Activation: 19.178
Token: TCCCCA, Annotations: ['SV40 promoter'], Activation: 19.168
Token: TCCCCA, Annotations: ['SV40 promoter'], Activation: 18.520
Token: TCCCCA, Annotations: ['SV40 promoter'], Activation: 18.293
Token: TCCCCA, Annotations: ['SV40 promoter'], Activation: 18.166
Token: CCCCAG, Annotations: ['SV40 promoter'], Activation: 18.107
Token: TCCCCA, Annotations: ['SV40 promoter'], Activation: 18.008
Token: TCCCCA, Annotations: ['SV40 promoter'], Activation: 17.629
Token: TCCCCA, Annotations: ['SV40 promoter'], Activation: 17.366
Token: TCCCCA, Annotations: ['SV40 promoter'], Activation: 16.778
Token: TCCCCA, Annotations: ['SV40 promoter'], Activation: 16.692
Token: TCCCCA, Annotations: ['SV40 promoter'], Activation: 16.465
Token: CCCCAG, Annotations: ['SV40 promoter'

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3875 appears to detect: {'ori'}
Top 20 activating tokens and their annotations:
Token: CTACAG, Annotations: ['ori'], Activation: 11.745
Token: CTACAG, Annotations: ['ori'], Activation: 11.603
Token: CTACAG, Annotations: ['ori'], Activation: 11.484
Token: CTACAG, Annotations: ['ori'], Activation: 11.356
Token: CTACAG, Annotations: ['ori'], Activation: 11.209
Token: CTACAG, Annotations: ['ori'], Activation: 11.022
Token: CTACAG, Annotations: ['ori'], Activation: 10.734
Token: CTACAG, Annotations: ['small t antigen'], Activation: 10.726
Token: CTACAG, Annotations: ['ori'], Activation: 10.603
Token: CTACAG, Annotations: ['ori'], Activation: 10.589
Token: CTACAG, Annotations: ['ori'], Activation: 10.581
Token: CTACAG, Annotations: ['ori'], Activation: 10.505
Token: CTACAG, Annotations: ['ori'], Activation: 10.497
Token: CTACAG, Annotations: ['ori'], Activation: 10.433
Token: CTACGG, Annotations: ['ori'], Activation: 10.356
Token: CTACAG, Annotations: [], Activation: 10.351
Token: CT

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3916 appears to detect: {'CMV enhancer', 'CMV IE94 promoter'}
Top 20 activating tokens and their annotations:
Token: TTGACG, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 21.055
Token: TTGACG, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 20.679
Token: TTGACG, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 20.359
Token: TTGACG, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 20.337
Token: TTGACG, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 20.197
Token: TTGACG, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 20.033
Token: TTGACG, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 19.848
Token: TTGACG, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 19.780
Token: TTGACG, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 19.759
Token: TTGACG, Annotations: ['CMV promoter', 'UL126', 'CMV enhancer'], Activation: 19.737
Token: TTG

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3928 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: AATAGG, Annotations: ['CMV enhancer'], Activation: 18.470
Token: AATAGG, Annotations: ['CMV enhancer'], Activation: 18.430
Token: AATAGG, Annotations: ['CMV enhancer'], Activation: 18.341
Token: AATAGG, Annotations: ['CMV enhancer'], Activation: 18.283
Token: AATAGG, Annotations: ['CMV enhancer'], Activation: 18.282
Token: AATAGG, Annotations: ['CMV enhancer'], Activation: 18.224
Token: AATAGG, Annotations: ['CMV enhancer'], Activation: 18.133
Token: AATAGG, Annotations: ['CMV enhancer'], Activation: 18.020
Token: AATAGG, Annotations: ['CMV enhancer'], Activation: 17.950
Token: AATAGG, Annotations: ['CMV enhancer'], Activation: 17.942
Token: AATAGG, Annotations: ['CMV enhancer'], Activation: 17.941
Token: AATAGG, Annotations: ['CMV enhancer'], Activation: 17.931
Token: AATAGG, Annotations: ['CMV enhancer'], Activation: 17.903
Token: AATAGG, Annotations: ['CMV enhancer'], Activation: 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3932 appears to detect: {'SV40 promoter'}
Top 20 activating tokens and their annotations:
Token: GTATGC, Annotations: ['SV40 promoter'], Activation: 17.277
Token: GTATGC, Annotations: ['SV40 promoter'], Activation: 17.235
Token: GTATGC, Annotations: ['SV40 promoter'], Activation: 17.105
Token: GTATGC, Annotations: ['SV40 promoter'], Activation: 17.071
Token: GTATGC, Annotations: ['SV40 promoter'], Activation: 15.972
Token: GTATGC, Annotations: ['SV40 promoter'], Activation: 15.030
Token: GTATGC, Annotations: ['SV40 promoter'], Activation: 15.009
Token: GTATGC, Annotations: ['SV40 promoter'], Activation: 14.403
Token: ATATAT, Annotations: ['U6 promoter'], Activation: 14.357
Token: ATGTAT, Annotations: ['RBS'], Activation: 13.921
Token: GTATGC, Annotations: ['SV40 promoter'], Activation: 13.686
Token: GTATGC, Annotations: ['SV40 promoter'], Activation: 13.619
Token: AGTATG, Annotations: ['SV40 promoter'], Activation: 13.601
Token: GTATGC, Annotations: ['SV40 promoter'], Activatio

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3938 appears to detect: {'ori', 'RNAI'}
Top 20 activating tokens and their annotations:
Token: CAAACA, Annotations: ['ori', 'RNAI'], Activation: 14.934
Token: CAAACA, Annotations: ['ori', 'RNAI'], Activation: 14.637
Token: CAAACA, Annotations: ['ori', 'RNAI'], Activation: 13.750
Token: CAAACA, Annotations: ['ori', 'RNAI'], Activation: 13.670
Token: CAAACA, Annotations: ['ori', 'RNAI'], Activation: 13.637
Token: CAAACA, Annotations: ['ori', 'RNAI'], Activation: 13.312
Token: CAAACA, Annotations: ['ori', 'RNAI'], Activation: 13.026
Token: CAAACA, Annotations: ['ori', 'RNAI'], Activation: 12.854
Token: CAAACA, Annotations: ['ori', 'RNAI'], Activation: 12.850
Token: CAAACA, Annotations: ['ori', 'RNAI'], Activation: 12.614
Token: CAAACA, Annotations: ['ori', 'RNAI'], Activation: 12.384
Token: CAAACA, Annotations: ['ori', 'RNAI'], Activation: 12.257
Token: CAAACA, Annotations: [], Activation: 12.219
Token: CAAACA, Annotations: ['ori', 'RNAI'], Activation: 12.193
Token: CAAACA, Annota

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3939 appears to detect: {'ori'}
Top 20 activating tokens and their annotations:
Token: CTTGAG, Annotations: ['ori'], Activation: 11.986
Token: CTTGAG, Annotations: ['ori'], Activation: 11.645
Token: CTTGAG, Annotations: ['ori'], Activation: 11.643
Token: CTTGAG, Annotations: ['ori'], Activation: 11.470
Token: CTTGAG, Annotations: ['ori'], Activation: 11.455
Token: CTTGAG, Annotations: ['ori'], Activation: 11.433
Token: CTTGAG, Annotations: ['ori'], Activation: 11.247
Token: CTTGAG, Annotations: ['ori'], Activation: 11.129
Token: CTTGAG, Annotations: ['ori'], Activation: 11.076
Token: CTTGAG, Annotations: ['ori'], Activation: 11.058
Token: CTTGAG, Annotations: ['ori'], Activation: 11.017
Token: CTTGAG, Annotations: ['ori'], Activation: 10.940
Token: CTTGAG, Annotations: ["5' LTR (truncated)"], Activation: 10.908
Token: CTTGAG, Annotations: ["5' LTR (truncated)"], Activation: 10.879
Token: CTTGAG, Annotations: ['CloDF13 ori'], Activation: 10.866
Token: CTTGAG, Annotations: ['LYS2

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3961 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: GGTAAA, Annotations: ['CMV enhancer'], Activation: 18.160
Token: GGTAAA, Annotations: ['CMV enhancer'], Activation: 17.580
Token: GGTAAA, Annotations: ['CMV enhancer'], Activation: 17.370
Token: GGTAAA, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 17.216
Token: GGTAAA, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 17.177
Token: GGTAAA, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 17.011
Token: GGTAAA, Annotations: ['CMV enhancer'], Activation: 16.979
Token: GGTAAA, Annotations: ['CMV enhancer'], Activation: 16.976
Token: GGTAAA, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 16.847
Token: GGTAAA, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 16.846
Token: GGTAAA, Annotations: ['CMV enhancer'], Activation: 16.745
Token: GGTAAA, Annotations: ['CMV enhancer'], Activation: 16.702
Token: GGTAAA, Ann

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 3985 appears to detect: {'CMV promoter'}
Top 20 activating tokens and their annotations:
Token: GGGACT, Annotations: ['CMV enhancer'], Activation: 13.985
Token: GGGACT, Annotations: ['DsRed2'], Activation: 12.478
Token: GGGATT, Annotations: [], Activation: 12.460
Token: GGGACT, Annotations: ['UL126'], Activation: 12.448
Token: GGGGAT, Annotations: ['UL126'], Activation: 12.397
Token: GGGACT, Annotations: ['CMV promoter', 'UL126'], Activation: 12.284
Token: GGGACT, Annotations: ['CMV promoter', 'UL126'], Activation: 12.203
Token: GGGAAA, Annotations: [], Activation: 11.766
Token: GGGACT, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 11.473
Token: GGGACT, Annotations: ['CMV promoter', 'UL126'], Activation: 11.384
Token: GGGACT, Annotations: ['CMV promoter', 'UL126'], Activation: 11.347
Token: GGACTT, Annotations: ['Fgfr4'], Activation: 11.313
Token: GGGACT, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 11.234
Token: GGGACT, Annotations: ['CMV enhancer', 

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 3989 appears to detect: {'CMV promoter', 'UL126'}
Top 20 activating tokens and their annotations:
Token: GCACCA, Annotations: ['EYFP'], Activation: 14.503
Token: GCACCA, Annotations: ['CMV promoter', 'UL126'], Activation: 13.641
Token: GCACCA, Annotations: ['CMV promoter', 'UL126'], Activation: 13.555
Token: GCACCA, Annotations: ['CMV promoter', 'UL126'], Activation: 13.432
Token: GCACCA, Annotations: ['CMV promoter', 'UL126'], Activation: 13.170
Token: GCACCA, Annotations: ['bom'], Activation: 12.941
Token: GCACCA, Annotations: ['CMV promoter', 'UL126'], Activation: 12.835
Token: GCACCA, Annotations: ['CMV promoter', 'UL126'], Activation: 12.601
Token: GCACCA, Annotations: [], Activation: 12.527
Token: GCACCA, Annotations: ['CMV promoter', 'UL126'], Activation: 12.407
Token: GCACCA, Annotations: ['CMV promoter', 'UL126'], Activation: 12.385
Token: GCACCA, Annotations: ['GAP promoter'], Activation: 12.348
Token: GCACCA, Annotations: ['CMV promoter', 'UL126'], Activation: 12.241

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df


Latent 4061 appears to detect: {'CMV enhancer', 'CMV IE94 promoter'}
Top 20 activating tokens and their annotations:
Token: ATGACG, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 14.757
Token: ATGACG, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 14.087
Token: ATGACG, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 13.536
Token: ATGACG, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 13.492
Token: ATGACG, Annotations: ['CMV enhancer'], Activation: 13.147
Token: ATGACG, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 12.863
Token: ATGACG, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 12.536
Token: ATGACG, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 12.515
Token: ATGACG, Annotations: ['CMV enhancer', 'CMV IE94 promoter'], Activation: 12.363
Token: ATGACG, Annotations: ['CMV enhancer'], Activation: 11.604
Token: ATGACG, Annotations: ['CMV enhancer', 'CMV IE94 promoter'

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()



Latent 4067 appears to detect: {'CMV enhancer'}
Top 20 activating tokens and their annotations:
Token: TATTAC, Annotations: ['CMV enhancer', 'chicken β-actin promoter'], Activation: 15.338
Token: TATTAC, Annotations: ['CMV enhancer', 'chicken β-actin promoter'], Activation: 14.709
Token: TATTAC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 14.205
Token: TTATTA, Annotations: [], Activation: 14.116
Token: TATTAT, Annotations: ['AmpR promoter'], Activation: 14.045
Token: TATTAT, Annotations: [], Activation: 13.994
Token: TATTAC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 13.924
Token: TATTAT, Annotations: ['AmpR promoter'], Activation: 13.894
Token: TATTAC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 13.881
Token: TATTAC, Annotations: ['CMV enhancer', 'chicken β-actin promoter'], Activation: 13.876
Token: TATTAC, Annotations: ['CMV enhancer'], Activation: 13.831
Token: TATTAC, Annotations: ['CMV enhancer', 'CMV promoter'], Activation: 13.745
T

  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
  token_df

2. Searching *syntactic* SAE latents

In [None]:
import pandas as pd
from tqdm import tqdm

## here we create a short list of candidate monosemantic latents for **kmers** by looking at the top-50 most activating tokens
## and asking: do at least half of them share a kmer?

def analyze_latent_features_fast(token_df, combined_latents, k=4, n_latents=4096, top_n=10):
    """Optimized version of latent feature analysis"""

    def get_kmers(token, k):
        if not isinstance(token, str) or k <= 0:
            return set()
        token = token.strip()
        if not token or len(token) < k:
            return set()
        return {token[i:i+k] for i in range(len(token) - k + 1)}

    # Convert combined_latents to numpy once
    activations_array = combined_latents.cpu().detach().numpy()

    # Pre-compute valid tokens and their k-mers
    valid_tokens = token_df['tokens'].dropna()
    token_to_kmers = {token: get_kmers(str(token), k) for token in valid_tokens}

    latent_dict = {}
    tokens_array = token_df['tokens'].values

    # Process in batches for better memory usage
    batch_size = 100
    for batch_start in tqdm(range(0, n_latents, batch_size)):
        batch_end = min(batch_start + batch_size, n_latents)

        # Process batch of latents
        batch_activations = activations_array[:, batch_start:batch_end]

        # Find top_n indices for each latent in batch
        top_indices = np.argpartition(-batch_activations, top_n, axis=0)[:top_n]

        # Process each latent in batch
        for i, latent_id in enumerate(range(batch_start, batch_end)):
            # Get tokens for top activations
            top_tokens = tokens_array[top_indices[:, i]]

            # Get k-mer sets for valid tokens
            kmer_sets = [token_to_kmers[token] for token in top_tokens
                        if pd.notna(token) and token in token_to_kmers]

            if kmer_sets:
                common_kmers = set.intersection(*kmer_sets)
                if common_kmers:
                    latent_dict[latent_id] = common_kmers
                    print(f"\nLatent {latent_id} appears to detect: {common_kmers}")

    return latent_dict

kmer_latent_dict_ = analyze_latent_features_fast(token_df, combined_latents, k=4, n_latents=4096, top_n=50) ## set n_latents to 100 to quickly test

In [None]:
#save latent dict as csv file
import pandas as pd

df = pd.DataFrame(list(latent_dict.items()), columns=['latent_id', 'annotation'])
df

# save
df.to_csv('/content/drive/MyDrive/SAEs_for_Genomics/latent_dict_vals2_nt50m_sae_l10_+40mtokens.csv', index=False)

In [None]:
dict_values = latent_dict.values()

# turn list of sets into one large set
flat_set = set.union(*dict_values)
print(flat_set)

## Auto-Searching of MLP

1. Of the MLP neurons are there any that are somewhat monosemantic for a functional annotation?

In [None]:
for latent_id in range(512):
    # we avoid modifying token_df directly as its very time-consuming to reload if we mess it up
    token_df_copy = token_df.copy()

    # get the activation value for the N-th unit in the SAE for each input in batch
    hidden_act_feature_id = mlp_act[:, latent_id] # N = feature_id

    # add this to the dataframe
    token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()

    # print latent_id if the five most activating tokens share a token annotation
    most_activating_tokens = token_df_copy.sort_values(f"latent-{latent_id}-act", ascending=False).head(5)

    # Get annotations as lists
    annotations = most_activating_tokens['token_annotations'].tolist()

    # Check if there's any intersection between all annotation lists
    if annotations:
        # Convert all annotations to sets for intersection
        annotation_sets = [set(ann) for ann in annotations]
        common_annotations = set.intersection(*annotation_sets)
        filtered_annotations = common_annotations - {'special token: <cls>', 'special token: <pad>'}

        if filtered_annotations:  # If there are any shared annotations
            print(f"\nLatent {latent_id} appears to detect: {common_annotations}")
            print("Top 5 activating tokens and their annotations:")
            for _, row in most_activating_tokens.iterrows():
                print(f"Token: {row['tokens']}, Annotations: {row['token_annotations']}, "
                      f"Activation: {row[f'latent-{latent_id}-act']:.3f}")


2. Of the MLP neurons are there any that are somewhat monosemantic for some *syntactic* pattern?

In [None]:
k = 4 ## kmer length
latent_dict = {}

for latent_id in range(512):
    # we avoid modifying token_df directly as its very time-consuming to reload if we mess it up
    token_df_copy = token_df.copy()

    # get the activation value for the N-th unit in the SAE for each input in batch
    hidden_act_feature_id = combined_acts[:, latent_id] # N = feature_id

    # add this to the dataframe
    token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()

    # print latent_id if the five most activating tokens share a token annotation
    most_activating_tokens = token_df_copy.sort_values(f"latent-{latent_id}-act", ascending=False).head(10)


    # Get a set of all kmers for each most activating token
    def get_kmers(token, k):
        if not isinstance(token, str) or k <= 0:
            raise ValueError("Invalid input: token must be string and k must be positive")
        if len(token) < k:
            return set()
        return {token[i:i+k] for i in range(len(token) - k + 1)}

    kmer_sets = [get_kmers(token, k) for token in most_activating_tokens['tokens']]

    # Check if there's any intersection between all kmer sets stored
    if kmer_sets:
        common_kmers = set.intersection(*kmer_sets)
        if common_kmers:  # If there are any shared kmers
            latent_dict[latent_id] = common_kmers
            print(f"\nLatent {latent_id} appears to detect: {common_kmers}")
            print("Top 5 activating tokens and their annotations:")
            for _, row in most_activating_tokens.iterrows():
                print(f"Token: {row['tokens']}, Annotations: {row['token_annotations']}, "
                      f"Activation: {row[f'latent-{latent_id}-act']:.3f}")



In [None]:
latent_id = 188  #np.random.randint(0, 4096) # or set particular int value in this range


# we avoid modifying token_df directly as its very time-consuming to reload if we mess it up
token_df_copy = token_df.copy()

# get the activation value for the N-th unit in the SAE for each input in batch
hidden_act_feature_id = combined_acts[:, latent_id] # N = feature_id

# add this to the dataframe
token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()

# sort to show the most activating tokens on top, add colours
token_df_copy.sort_values(f"latent-{latent_id}-act", ascending=False).head(300).style.background_gradient("coolwarm")


## Calc sensitivity and specificity of SAE latent for Functional or Syntactic Feature

In [None]:
def contains_kmers(tokens: str, kmers: list) -> bool:
    """Check if a token sequence contains any of the kmers in the list"""
    if not isinstance(tokens, str):
        return False
    return any(k in tokens for k in kmers)

def contains_annotations(token_annotation: str, annotations: list) -> bool:
    """Check if a token sequence contains any of the given annotations"""
    if not isinstance(token_annotation, str):
        return False
    return any(annotation in token_annotation for annotation in annotations)

def calculate_stats(df, act_threshold, meaning, check: str):
    """Calculate various statistics about TAG tokens and activations"""

    # Create a function that's partially applied with the specific annotation
    if check == 'kmer':
      check_kmer = lambda x: contains_kmers(x, meaning)
      check_fn = check_kmer
      col = 'tokens'

    elif check == 'annotation':
      check_annotation = lambda x: contains_annotations(x, meaning)
      check_fn = check_annotation
      col = 'token_annotations'

    else: raise ValueError("check must be 'kmer' or 'annotation'")

    # Q1: Fraction of above-threshold activations containing TAG
    above_threshold = df[df[latent_column] > act_threshold]
    above_threshold_tag_fraction = above_threshold[col].apply(check_fn).mean()

    # Q2: Fraction of below-or-equal threshold activations containing TAG
    below_threshold = df[df[latent_column] <= act_threshold]
    below_threshold_tag_fraction = below_threshold[col].apply(check_fn).mean()

    # Q3: Overall fraction of rows containing TAG
    overall_tag_fraction = df[col].apply(check_fn).mean()

    # Q4: Fraction of tokens with positive activation
    positive_activation_fraction = (df[latent_column] > act_threshold).mean()

    # Q5: For rows containing TAG, fraction with positive activation
    tag_rows = df[df[col].apply(check_fn)]
    tag_positive_fraction = (tag_rows[latent_column] > act_threshold).mean()

    return {
        'above_threshold_tag': above_threshold_tag_fraction,
        'below_threshold_tag': below_threshold_tag_fraction,
        'overall_tag': overall_tag_fraction,
        'positive_activation': positive_activation_fraction,
        'tag_positive': tag_positive_fraction
    }

def find_largest_consecutive_tag_sequence(df):
    """Find largest N where top-N rows all contain TAG"""
    sorted_df = df.sort_values(latent_column, ascending=False)
    check_cmv = lambda x: contains_annotations(x, ['CMV enhancer', 'CMV promoter', 'CMV IE94 promoter'])

    for N in range(1, len(df) + 1):
        top_n = sorted_df.head(N)
        if not all(top_n['token_annotations'].apply(check_cmv)):
            return N - 1
    return len(df)

# create empty pd df with column for latent_id, annotation and evidence_for_act_from_tag
columns = ['latent_id', 'annotation', 'evidence_for_act_from_ann', 'evidence_for_ann_from_act', 'precision', 'recall']
df = pd.DataFrame(columns=columns)

using_kmer = False
using_annotation = not using_kmer

# Iterate over latent dict
for latent_id, meaning in latent_dict.items():

    # Calculate all statistics
    act_threshold = 0.0  # Activation threshold
    latent_column = f"latent-{latent_id}-act"


    # get most activating tokens for latent id
    token_df_copy = token_df_1k_s1.copy()
    hidden_act_feature_id = combined_latents[:, latent_id] # N = feature_id
    token_df_copy[f"latent-{latent_id}-act"] = hidden_act_feature_id.cpu().detach().numpy()
    token_df_copy.sort_values(f"latent-{latent_id}-act", ascending=False).head(300).style.background_gradient("coolwarm")

    ### input all to calc stats
    if using_annotation:
      annotation = list(meaning)
      stats = calculate_stats(token_df_copy, act_threshold, meaning = annotation, check = 'annotation')

    elif using_kmer:
      kmer_strings = [''.join(kmer) for kmer in meaning]
      stats = calculate_stats(token_df_copy, act_threshold, meaning = kmer_strings, check = 'kmer')



    ## Calculate the posterior to prior odds ratios
    evidence_for_act_from_tag = (stats['tag_positive']/(1-stats['tag_positive'])) / (stats['positive_activation']/(1-stats['positive_activation']))
    evidence_for_tag_from_act = (stats['above_threshold_tag']/(1-stats['above_threshold_tag'])) / (stats['overall_tag']/(1-stats['overall_tag']))


    # only print at least moderately-monosemantic latents
    if min(evidence_for_act_from_tag, evidence_for_tag_from_act) > 20 or max(evidence_for_act_from_tag, evidence_for_tag_from_act)>200:

        print(f"\nLatent {latent_id} appears to detect: {meaning}")

        print(f"Strength of evidence for act > {act_threshold} from {meaning} (as BayesF): {evidence_for_act_from_tag:.3f}")
        print(f"Strength of evidence for {meaning} from act > {act_threshold} (as BayesF): {evidence_for_tag_from_act:.3f}")



            # Print results in a formatted way
        print(f"\n {meaning} Token Analysis Results")
        print("=" * 50)
        print(f"Analysis for activation threshold: {act_threshold}")
        print("-" * 50)
        print(f" P(token annotated with {meaning}):                      {stats['overall_tag']:.3f}")
        print(f" P(token annotated with {meaning}|activation > {act_threshold}):     {stats['above_threshold_tag']:.3f}")
        print(f" P(activation > {act_threshold}):                        {stats['positive_activation']:.3f}")
        print(f" P(activation > {act_threshold}|token annotated with {meaning}):     {stats['tag_positive']:.3f}")


        # add to df
        df.loc[len(df)] = [latent_id, meaning, evidence_for_act_from_tag, evidence_for_tag_from_act, stats['above_threshold_tag'], stats['tag_positive']]

        # Find and print largest consecutive sequence0
        #largest_n = find_largest_consecutive_tag_sequence(token_df_copy)
        #print("-" * 50)
        #print(f"Largest N where top-N rows all contain {annotation}: {largest_n}")

# save df
#df.to_csv('/content/drive/MyDrive/SAEs_for_Genomics/Latent_dict_4MER_monosem_nt50m_sae_+40mtokens.csv', index=False)


Latent 52 appears to detect: {"5' LTR"}
Strength of evidence for act > 0.0 from {"5' LTR"} (as BayesF): 38.160
Strength of evidence for {"5' LTR"} from act > 0.0 (as BayesF): 31.390

 {"5' LTR"} Token Analysis Results
Analysis for activation threshold: 0.0
--------------------------------------------------
 P(token annotated with {"5' LTR"}):                      0.005
 P(token annotated with {"5' LTR"}|activation > 0.0):     0.133
 P(activation > 0.0):                        0.011
 P(activation > 0.0|token annotated with {"5' LTR"}):     0.291

Latent 73 appears to detect: {'small t antigen'}
Strength of evidence for act > 0.0 from {'small t antigen'} (as BayesF): 217.102
Strength of evidence for {'small t antigen'} from act > 0.0 (as BayesF): 240.966

 {'small t antigen'} Token Analysis Results
Analysis for activation threshold: 0.0
--------------------------------------------------
 P(token annotated with {'small t antigen'}):                      0.001
 P(token annotated with {'sm