In [4]:
import torch
from transformers import AutoTokenizer, AutoModel
from captum.attr import IntegratedGradients

# Load BGE model
model_name = "BAAI/bge-large-en"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
model.eval()

# Sample prompt
prompt = "Flask offers suggestions, but doesn't enforce any dependencies or project layout. " \
    "It is up to the developer to choose the tools and libraries they want to use. " \
    "There are many extensions provided by the community that make adding new functionality easy."
    
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]

# === Embed inputs ===
embedding_layer = model.get_input_embeddings()
input_embeddings = embedding_layer(input_ids)  # shape: [1, seq_len, hidden_dim]
input_embeddings.requires_grad_()

# === Custom forward: dot product with concept vector ===
concept_vector = torch.ones(model.config.hidden_size)  # or define a better one

def custom_forward(embeds):
    outputs = model(inputs_embeds=embeds, attention_mask=attention_mask)
    cls_embedding = outputs.last_hidden_state[:, 0, :]  # [batch_size, hidden_dim]
    return torch.matmul(cls_embedding, concept_vector.to(cls_embedding.device))  # [batch_size]

# === Captum Integrated Gradients ===
ig = IntegratedGradients(custom_forward)
attributions, delta = ig.attribute(input_embeddings, return_convergence_delta=True)

# === Tokens and attribution scores ===
tokens = tokenizer.convert_ids_to_tokens(input_ids.squeeze())
token_attributions = attributions.squeeze().sum(dim=-1)  # sum across embedding dim

# === Print ===
for token, score in zip(tokens, token_attributions):
    print(f"{token:>12} : {score.item():.4f}")
    


KeyboardInterrupt: 

In [None]:

# === Print ===
for token, score in zip(tokens, token_attributions):
    print(f"{token:>12} : {score.item():.4f}")