In [1]:
import os
import openai
import chromadb
import nltk
import warnings
import nest_asyncio
import re

from llama_parse import LlamaParse
from llama_index.core import Document, VectorStoreIndex, get_response_synthesizer, StorageContext, QueryBundle
from llama_index.core.retrievers import VectorIndexRetriever, QueryFusionRetriever, BaseRetriever
from llama_index.core.node_parser import SemanticSplitterNodeParser, SemanticDoubleMergingSplitterNodeParser, LanguageConfig
from llama_index.core.evaluation import FaithfulnessEvaluator, RetrieverEvaluator
from llama_index.core.schema import NodeWithScore
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms import openai as OpenAI
from llama_index.vector_stores.chroma import ChromaVectorStore
from typing import Literal, List, Optional
from dotenv import load_dotenv
from tqdm.auto import tqdm
from context_cite import ContextCiter

resource module not available on Windows


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()
nest_asyncio.apply()
warnings.filterwarnings("ignore")

In [3]:
openai.api_key = os.getenv("OPENAI_API_KEY")
    
config = LanguageConfig(language="english", spacy_model="en_core_web_md") # must download the model first
# chroma_client = chromadb.PersistentClient()

embed_model = OpenAIEmbedding()


splitter = SemanticDoubleMergingSplitterNodeParser(
    initial_threshold=0.7,
    appending_threshold=0.9,
    merging_threshold=0.9,
    language_config=config,
    max_chunk_size=1024,
)

In [4]:
parser = LlamaParse(
    api_key=os.getenv("LLAMACLOUD_API_KEY"),
    num_workers=8,
    show_progress=True,
    result_type="markdown"
)

In [24]:
document = 'documents/attention.pdf'
MODEL_NAME = "Llama-3.2-1B-Instruct"
model_name = "meta-llama/Llama-3.2-1B-Instruct" # 3.2 1B Instruct for faster inference, 3.1 8B for better performance

In [5]:
file = "documents/sample.pdf"
if not os.path.exists(file):
    raise FileNotFoundError(f"File {file} not found")

In [6]:
documents = parser.load_data(file)

Started parsing the file under job_id b0703cfb-3b25-48ce-920b-06e302337f02


In [8]:
nodes = splitter.get_nodes_from_documents(documents, show_progress=True)

Parsing nodes: 100%|██████████| 6/6 [00:04<00:00,  1.39it/s]


In [12]:
print(f"Splitted {len(nodes)} nodes from {len(documents)} documents")

Splitted 156 nodes from 6 documents


In [13]:
storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)
vector_index = VectorStoreIndex(nodes=nodes, 
                         insert_batch_size=1024, 
                         storage_context=storage_context,
                         show_progress=True)                                                          

Generating embeddings: 100%|██████████| 156/156 [00:05<00:00, 28.08it/s]


In [15]:
dense_retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=5)
sparse_retriever = BM25Retriever.from_defaults(nodes=nodes, similarity_top_k=5)

res_synth = get_response_synthesizer()

In [16]:
class HybridRetriever(BaseRetriever):
    def __init__(self, dense_retriever: BaseRetriever = dense_retriever, 
                 sparse_retriever: BaseRetriever = sparse_retriever,
                 mode: Literal["AND", "OR"] = "OR",
                 **kwargs) -> None:
        self.dense_retriever = dense_retriever
        self.sparse_retriever = sparse_retriever
        self.mode = mode

        super().__init__(**kwargs)

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        dense_res = self.dense_retriever.retrieve(query_bundle)
        sparse_res = self.sparse_retriever.retrieve(query_bundle)

        dense_ids = {n.node.node_id for n in dense_res}
        sparse_ids = {n.node.node_id for n in sparse_res}

        combined_ids = {n.node.node_id: n for n in dense_res}
        combined_ids.update({n.node.node_id: n for n in sparse_res})

        if self.mode == "AND":
            ids = dense_ids.intersection(sparse_ids)

        elif self.mode == "OR":
            ids = dense_ids.union(sparse_ids)

        else:
            raise ValueError("Invalid mode. Must be either 'AND' or 'OR'.")
        
        retrieved_nodes = [combined_ids[id] for id in ids]
        return retrieved_nodes

In [17]:
hybrid_retriever = HybridRetriever(dense_retriever=dense_retriever, sparse_retriever=sparse_retriever)

In [18]:
query_engine = RetrieverQueryEngine(retriever=hybrid_retriever, response_synthesizer=res_synth)

In [19]:
res = query_engine.query("What does printf mean in C?")
res

Response(response='printf in C stands for "print formatted".', source_nodes=[NodeWithScore(node=TextNode(id_='8cdc46eb-de13-4eb3-af4e-fb13d6ff2e68', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='0e7a93df-d3b6-4964-aaa4-2f0b191edea7', node_type='4', metadata={}, hash='0e21741e5655b2140ca5424d788011fde501d9208075dbc9f3fbbe54b3e7d3dc'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='9fcf1dbb-70b8-4f53-bcb1-c4aa77e9411d', node_type='1', metadata={}, hash='da3b31ba797dbe383339b6c38060c118d5b5ee9df02cea39375730dcef134c3a'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='9b98ef09-a3b6-400a-888b-6534d2c645f4', node_type='1', metadata={}, hash='7f5e520fd1f480c7fc6a6e39a67447d4d64372b4352c6d41aa202d14f38eb4eb')}, metadata_template='{key}: {value}', metadata_separator='\n', text='The example program from the book prints "hello, world", and was inherited from a

In [None]:
snodes = res.source_nodes
for sn in snodes:
    print(sn.text)
    print("----")

The example program from the book prints "hello, world", and was inherited from a 1974 Bell Laboratories internal memorandum by Brian Kernighan, Programming in C: A Tutorial*:3

main( ) {
printf("hello, world");
}

In the above example, the main( ) function defines where the "Hello, World!"
----
*n');
}
a 'hell';
b 'o, w';
c 'orld';

The program above prints hello, world!
----
The function body consists of a single statement, a call to the printf() function, which stands for "print formatted"; it outputs to the console whatever is passed to it as the parameter, in this case the string "hello, world". The C-language version was preceded by Kernighan's own 1972 *A Tutorial Introduction to the Language B*,4 where the first known version of the program is found in an example used to illustrate external variables:

main( ) {
extrn a, b, c;
putchar(a); putchar(b); putchar(c); putchar('!
----
Retrieved 23 August 2014.
----
In contrast, the equivalent code in C++7 requires the import of the in

In [None]:
def rerank(query, nodes,
           model=model_name,
           top_k=5):
    context = "\n\n".join([node.node.get_content() for node in nodes])
    
    cc = ContextCiter.from_pretrained(
        model_name,
        context=context,
        query=query,
        device="cuda"
    )

    attributions_df = cc.get_attributions(as_dataframe=False, top_k=len(nodes))
    segments = cc.partitioner.sources
    

In [None]:
def ee_query(query):
    global query_engine
    if query_engine is None:
        return "Query engine not initialized, please upload a document first"
    
    nodes = query_engine.retriever.retrieve(QueryBundle(query=query))
    context = "\n\n".join([node.node.get_content() for node in nodes])

    cc = ContextCiter.from_pretrained(
        model_name,
        context=context,
        query=query,
        device="cuda"
    )

    response = cc.response
    attributions = cc.attributions(