In [1]:
import sys
import os
import nltk
import json
import nest_asyncio
import matplotlib.pyplot as plt
import numpy as np
import torch
import time        
import warnings

from IPython.display import display
from huggingface_hub import login
from llama_cloud_services import LlamaParse
from dotenv import load_dotenv
from context_cite import ContextCiter
from context_cite.utils import aggregate_logit_probs
from context_cite.context_partitioner import SentencePeriodPartitioner
from transformers import AutoTokenizer, AutoModelForCausalLM
from scipy.stats import spearmanr

In [2]:
load_dotenv()
nest_asyncio.apply()
warnings.filterwarnings("ignore")
os.environ['TRANSFORMERS_VERBOSITY'] = 'error'
# login(token=os.getenv("HF_TOKEN"))
# nltk.download('punkt_tab')
parser = LlamaParse(api_key=os.getenv("LLAMA_CLOUD_API_TOKEN"))

In [3]:
document = 'documents/sample.txt'
MODEL_NAME = "Llama-3.2-1B-Instruct"
model_name = "meta-llama/Llama-3.2-1B-Instruct" # 3.2 1B Instruct for faster inference, 3.1 8B for better performance

In [4]:
# docs = parser.load_data(document)
# data = ""
# for doc in docs:
#     if len(doc.text) >= 32:
#         data += doc.text + " "
# len(data)

In [5]:
def plot(cc: ContextCiter, path: str = None):
    pred_logs = cc._logit_probs
    pred_logits = aggregate_logit_probs(pred_logs)
    actu_logits = cc._actual_logit_probs

    preds = pred_logits.flatten()
    actus = actu_logits.flatten()
    assert len(preds) == len(actus), f"{len(preds)} != {len(actus)}"

    # Compute Spearman correlation without modifying the actual data
    corr, _ = spearmanr(preds, actus)  # ✅ Correct way to compute Spearman correlation

    plt.figure(figsize=(8, 8))
    plt.scatter(preds, actus, alpha=0.3, label="Context ablations")  # Scatter plot

    # Plot y = x reference line
    x_line = np.linspace(min(preds.min(), actus.min()), max(preds.max(), actus.max()), 100)
    plt.plot(x_line, x_line, '--', color='gray', label="y = x")

    # Labels and title
    plt.xlabel("Predicted log-probability")
    plt.ylabel("Actual log-probability")
    plt.title(f"Predicted vs. Actual log-probability\nSpearman correlation: {corr:.2f}")
    plt.legend()
    plt.grid(True)

    if path:
        plt.savefig(path)
    plt.show()

In [6]:
def input_handler(path: str) -> str:
	if path.endswith(".pdf"):
		docs = parser.load_data(path)
		data = ""
		for doc in docs:
			if len(doc.text) >= 32:
				data += doc.text + " "
		return data
	elif path.endswith(".txt"):
		with open(path, "r") as file:
			data = file.read()
		return data
	else:
		raise ValueError("Invalid file format")

In [7]:
cc = ContextCiter.from_pretrained(
	model_name,
	context=input_handler(document),
	query="What is Transformer?",
	device="cuda",
	num_ablations=64
)

In [8]:
res = cc.get_attributions(as_dataframe=True, top_k=5)
res

Attributed: The Transformer is a simple neural network architecture proposed by the authors, which is based solely on attention mechanisms, rather than recurrence or convolutional neural networks. It is a model that connects the encoder and decoder through an attention mechanism, allowing for global dependencies to be modeled between input and output sequences. This architecture is designed to be more parallelizable and efficient than traditional models, making it suitable for large-scale training.


  0%|          | 0/63 [00:00<?, ?it/s]

Unnamed: 0,Score,Source
0,52.571,"We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely."
1,27.777,"In this work we propose the Transformer, a model architecture eschewing recurrence and instead relying entirely on an attention mechanism to draw global dependencies between input and output."
2,10.927,The best performing models also connect the encoder and decoder through an attention mechanism.
3,7.229,"Attention mechanisms have become an integral part of compelling sequence modeling and transduction models in various tasks, allowing modeling of dependencies without regard to their distance in the input or output sequences [2, 19]."
4,7.033,Attention Is All You Need Abstract The dominant sequence transduction models are based on complex recurrent or convolutional neural networks that include an encoder and a decoder.
