In [1]:
from pathlib import Path
import pickle
import sys
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)

project_root = str(Path().resolve().parent)
sys.path.insert(0, project_root)

In [2]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

from src.embed import embed

In [3]:
MODEL_ID = "AlphaGaO/Qwen3-4B-GPTQ"
DATASET_PATH = f"{project_root}/data/dataset.csv"
HIDDEN_STATES_PATH = f"{project_root}/data/activations/layer20.pkl"
MAX_LEN = 768
KV_CAHCE_SIZE = 896
LAYERS = [19, 20]

if torch.cuda.is_available():
    DEVICE = torch.device("cuda")
    MAPPING = "cuda:0"
else:
    DEVICE = torch.device("cpu")
    MAPPING = "auto"
print("{} device is available".format(DEVICE))

cuda device is available


In [4]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True
)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map=MAPPING,
    dtype=torch.float16,
    attn_implementation="flash_attention_2"
).eval().to(DEVICE)
texts = pd.read_csv(DATASET_PATH).prompt.tolist()

CUDA extension not installed.
CUDA extension not installed.
`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.


In [5]:
hidden_states = embed(
    texts,
    model, tokenizer,
    layers=LAYERS,
    kv_cache_size=KV_CAHCE_SIZE, max_length=MAX_LEN
)

Processing batches:   0%|          | 0/294 [00:00<?, ?it/s]

Transform the cumulative embeddings (activations + residual connections) of successive layers into pure activation increments by subtracting the previous layer from each layer and discarding the initial first layer, since there is no difference for it.

In [6]:
activations = []

for text_result in hidden_states:
    # Process consecutive layers starting from the second one
    for i in range(1, len(LAYERS)):
        current_layer = LAYERS[i]
        prev_layer = LAYERS[i-1]

        # Subtract previous layer from current layer
        current_tensor = text_result[current_layer].to(torch.float16)
        prev_tensor = text_result[prev_layer].to(torch.float16)

        activation = current_tensor - prev_tensor
        activations.append(activation)

Let's examine the total volume of tokens processed

In [7]:
num_tokns = sum(list(text_dct.values())[0].shape[0] for text_dct in hidden_states)
print(f"A total of {num_tokns:,} tokens were processed!")

A total of 263,123 tokens were processed!


Simply save obtained hidden states to restore definitely current data structure later

In [None]:
with open(HIDDEN_STATES_PATH, 'wb') as f:
    pickle.dump(activations, f)