In [1]:
from langchain_postgres import PGVector
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="embeddinggemma:latest")
connection = "postgresql+psycopg://langchain:langchain@localhost:6024/rag"
collection_name = "arxiv"

vector_store = PGVector(
    embeddings,
    collection_name=collection_name,
    connection=connection,
    use_jsonb=True
)

In [2]:
results = vector_store.similarity_search(
    "gradient descent",
    k=10,
    filter={"doc_type": {"$eq": "text"}},
)

for index, doc in enumerate(results[:3]):
    print(f"* {index}. {doc.page_content} [{doc.metadata}]")
    print("----------------------------------------------------")

* 0. Keywords: sparse attention, Sinkhorn normalization, block sparse attention, locality
Key Objects: Queries, Keys, Key Blocks, Query Blocks, Sorting Network
Refers to Images: None
Hypothetical Questions:
- How does Sinkhorn normalization contribute to the sparse attention mechanism?
- Why is it beneficial to limit attention to key blocks instead of individual keys?
- In what ways does this approach complement or differ from other sparse attention techniques like block local attention?
---
Summary:
Sparse Sinkhorn Attention assigns key blocks to query blocks using a sorting network and Sinkhorn normalization, limiting each query's attention to keys within its assigned block to enhance the model's ability to capture locality.
Original Text:
Sparse Sinkhorn Attention [132] first splits queries and keys into several blocks and assigns a key block to each query block. Each query is only allowed to attend to the keys in the key block that is assigned to its corresponding query block. The 

In [None]:
from langchain.retrievers import ContextualCompressionRetriever
from ragatouille import RAGPretrainedModel

RAG = RAGPretrainedModel.from_pretrained(
    "colbert-ir/colbertv2.0"
)
base_retriever = vector_store.as_retriever()
compression_retriever = ContextualCompressionRetriever(
    base_compressor=RAG.as_langchain_document_compressor(),
    base_retriever=base_retriever
)

In [None]:
compressed_docs = compression_retriever.invoke(
    "What is Gradient Descent?"
)
print(compressed_docs)