In [None]:
from nnsight import NNsight


class TransformerDecoderWrapper(NNsight):
    def get_latent_activations(self, sentences, chosen_layers):
        """
        Extracts latent activations from specified layers.

        Args:
            sentences (list of str): List of input sentences.
            chosen_layers (int or list of int): Layer(s) from which to extract activations.

        Returns:
            dict: Dictionary of extracted activations {layer_name: activation}.
        """
        if isinstance(chosen_layers, int):
            chosen_layers = [chosen_layers]

        layers_out = {}
        with self.trace(sentences):
            for layer in chosen_layers:
                layers_out[f"layer{layer}"] = self.transformer.h[layer].output[0].save()

            # Stop tracing at the last chosen layer
            self.transformer.h[max(chosen_layers)].output.stop()

        return layers_out

    def run_end_model_from_layer(self, layer_activations, start_layer):
        """
        Runs the model from a specific layer onwards until the final logits.

        Args:
            layer_activations (dict): Dictionary of latent activations from `get_latent_activations`.
            start_layer (int): The layer from which to resume forward propagation.

        Returns:
            Tensor: Logits from the model.
        """
        if f"layer{start_layer}" not in layer_activations:
            raise ValueError(f"Layer {start_layer} activations not found.")

        hidden_state = layer_activations[f"layer{start_layer}"]

        for i, layer in enumerate(self.transformer.h[(start_layer + 1) :]):
            hidden_state = layer(hidden_state) if i == 0 else layer(hidden_state[0])

        logits = self.lm_head(hidden_state[0])
        return logits

In [None]:
model = TransformerDecoderWrapper("openai-community/gpt2", device_map="auto")


layers_out = model.get_latent_activations(
    ["Hello, how are you?", "Nice to meet you!", "Yes it can be cool.", "Ok nice, i can't wait!!"], [2, 3, 5]
)

logits = model.run_end_model_from_layer(layers_out, 5)

In [None]:
from nnsight import NNsight
from transformers import AutoModelForCausalLM, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
model_hug = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
model = NNsight(model_hug)

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
model

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [9]:
model.trace("The quick brown fox jumps over the lazy dog.")

<nnsight.intervention.contexts.interleaving.InterleavingTracer at 0x7fee3d2b2890>

In [None]:
sentence = ["The quick brown fox jumps over the lazy dog."]  # , "The dog is lazy."]
chosen_layers = [0, 1, 2]


layers_out = {}

with model.trace(sentence):
    for layer in chosen_layers:
        layers_out[f"layer{layer}"] = model.transformer.h[layer].output[0].save()

    # Stop tracing at the last chosen layer
    model.transformer.h[max(chosen_layers)].output.stop()

NNsightError: 'list' object has no attribute 'size'

In [None]:
import torch
from nnsight import LanguageModel
from torch.nn.utils.rnn import pad_sequence


class TransformerDecoderWrapper(LanguageModel):
    def get_latent_activations(self, sentences, chosen_layers, batch_size=4, padding_value=0):
        """
        Extrait les activations latentes des couches spécifiées par batch et
        retourne, pour chaque couche, un unique tensor paddé regroupant tous les batchs.

        Args:
            sentences (list of str): Liste de phrases en entrée.
            chosen_layers (int ou list of int): Couches dont on souhaite extraire les activations.
            batch_size (int): Taille de batch (par défaut 4).
            padding_value (int ou float): Valeur de padding utilisée pour compléter les tenseurs.

        Returns:
            dict: Dictionnaire des activations extraites {nom_de_la_couche: tensor paddé}.
        """
        if isinstance(chosen_layers, int):
            chosen_layers = [chosen_layers]

        # Stockage temporaire sous forme de listes
        layers_out = {f"layer{layer}": [] for layer in chosen_layers}

        for i in range(0, len(sentences), batch_size):
            batch = sentences[i : i + batch_size]
            with self.trace(batch):
                for layer in chosen_layers:
                    activation = self.transformer.h[layer].output[0].save()
                    layers_out[f"layer{layer}"].append(activation)
                self.transformer.h[max(chosen_layers)].output.stop()

        # Padding et concaténation de tous les batchs pour chaque couche
        for key in layers_out:
            layers_out[key] = pad_sequence(layers_out[key], batch_first=True, padding_value=padding_value)

        return layers_out

    def run_end_model_from_layer(self, layer_activations, start_layer, batch_size=4):
        """
        Exécute le modèle à partir d'une couche spécifique jusqu'aux logits finaux,
        en traitant les activations latentes par batch.

        Note : Ici, on suppose que `layer_activations[f"layer{start_layer}"]` est un tensor unique
        obtenu après padding de tous les batchs.

        Args:
            layer_activations (dict): Dictionnaire des activations latentes.
            start_layer (int): La couche à partir de laquelle reprendre la propagation.
            batch_size (int): Taille de batch (par défaut 4).

        Returns:
            Tensor: Logits du modèle, éventuellement recomposés à partir des batchs.
        """
        key = f"layer{start_layer}"
        if key not in layer_activations:
            raise ValueError(f"Activations de la couche {start_layer} non trouvées.")

        hidden_state = layer_activations[key]
        logits_batches = []

        for i in range(0, hidden_state.shape[0], batch_size):
            batch = hidden_state[i : i + batch_size]
            out = batch
            for j, layer in enumerate(self.transformer.h[(start_layer + 1) :]):
                if j == 0:
                    out = layer(out)
                else:
                    out = layer(out[0])
            logits = self.lm_head(out[0])
            logits_batches.append(logits)

        # Vous pouvez concaténer les logits si les dimensions le permettent
        return torch.cat(logits_batches, dim=0)