In [16]:
# Import Packages
from typing import List
import requests

from langchain_core.retrievers import BaseRetriever
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain.callbacks import get_openai_callback
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings
from langchain.retrievers import ContextualCompressionRetriever
from langchain_openai import OpenAI, ChatOpenAI
from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline, LLMChainExtractor
from langchain_community.document_transformers import EmbeddingsRedundantFilter

import warnings
warnings.filterwarnings("ignore")

In [7]:
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass()

 ········


In [8]:
os.environ["CONTEXTCRUNCH_API_KEY"] = getpass.getpass()

 ········


## Contextual Compression with LangChain

In [1]:
# Helper function for printing docs
def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )

In [4]:
documents = TextLoader("state_of_the_union.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
retriever = FAISS.from_documents(texts, HuggingFaceEmbeddings()).as_retriever()

docs = retriever.get_relevant_documents(
    "What did the president say about Ketanji Brown Jackson?"
)

pretty_print_docs(docs)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Document 1:

Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. 

Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. 

One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. 

And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.
----------------------------------------------------------------------------------------------------
Document 2:

A former top litigator in private practice. A former federal public defender. And fro

In [9]:
llm = OpenAI(temperature=0) # Could be any llm of your choice
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)
compressed_docs = compression_retriever.get_relevant_documents(
    "What did the president say about Ketanji Jackson Brown"
)
pretty_print_docs(compressed_docs)

Document 1:

I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson.
----------------------------------------------------------------------------------------------------
Document 2:

Frances Haugen, social media platforms, national experiment, children, profit, privacy protections, targeted advertising, tech companies, personal data, mental health services, Americans, veterans, war, care, families, job training, housing, lower-income veterans, VA care, debt-free, troops, Iraq, Afghanistan, dangers.


In [10]:
original_contexts_len = len("\n\n".join([d.page_content for i, d in enumerate(docs)]))
compressed_contexts_len = len("\n\n".join([d.page_content for i, d in enumerate(compressed_docs)]))

print("Original context length:", original_contexts_len)
print("Compressed context length:", compressed_contexts_len)
print("Compressed Ratio:", f"{original_contexts_len/(compressed_contexts_len + 1e-5):.2f}x")

Original context length: 3565
Compressed context length: 415
Compressed Ratio: 8.59x


### Using EmbeddingsFilter

In [12]:
embeddings = HuggingFaceBgeEmbeddings() # could be any embedding of your choice
embeddings_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=embeddings_filter, base_retriever=retriever
)

compressed_docs = compression_retriever.get_relevant_documents(
    "What did the president say about Ketanji Jackson Brown"
)
pretty_print_docs(compressed_docs)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/90.3k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/720 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

Document 1:

Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. 

Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. 

One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. 

And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.


In [13]:
original_contexts_len = len("\n\n".join([d.page_content for i, d in enumerate(docs)]))
compressed_contexts_len = len("\n\n".join([d.page_content for i, d in enumerate(compressed_docs)]))

print("Original context length:", original_contexts_len)
print("Compressed context length:", compressed_contexts_len)
print("Compressed Ratio:", f"{original_contexts_len/(compressed_contexts_len + 1e-5):.2f}x")

Original context length: 3565
Compressed context length: 787
Compressed Ratio: 4.53x


### Stringing compressors and document transformers together

In [15]:
embeddings = HuggingFaceBgeEmbeddings()
splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=0, separator=". ")
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)
pipeline_compressor = DocumentCompressorPipeline(
    transformers=[splitter, redundant_filter, relevant_filter]
)

In [17]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=pipeline_compressor, base_retriever=retriever
)

compressed_docs = compression_retriever.get_relevant_documents(
    "What did the president say about Ketanji Jackson Brown?"
)
pretty_print_docs(compressed_docs)

Document 1:

One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. 

And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson


In [18]:
original_contexts_len = len("\n\n".join([d.page_content for i, d in enumerate(docs)]))
compressed_contexts_len = len("\n\n".join([d.page_content for i, d in enumerate(compressed_docs)]))

print("Original context length:", original_contexts_len)
print("Compressed context length:", compressed_contexts_len)
print("Compressed Ratio:", f"{original_contexts_len/(compressed_contexts_len + 1e-5):.2f}x")

Original context length: 3565
Compressed context length: 237
Compressed Ratio: 15.04x


## Using ContextCrunch

In [5]:
# Instantiate Chat Model
gpt4_chat = ChatOpenAI(model_name="gpt-4-1106-preview", max_tokens=50, temperature=0)

In [6]:
# Create BaseRetriever Class
class MockBaseRetriever(BaseRetriever):
    documents: List[Document] = []
    def __init__(self, documents):
        super().__init__()
        self.documents = documents
    def get_relevant_documents(self, *args, **kwargs):
        return self.documents

In [8]:
# Read txt file as a string
with open('rag_text.txt', 'r') as file:
    text = file.read().replace('\n', '')
# Create langchain documents
documents = [Document(page_content=content) for content in text.split('\n\n')]

In [9]:
# Create a retriever
retriever = MockBaseRetriever(documents=documents)

In [12]:
# Define a Prompt Template
prompt_template = ChatPromptTemplate.from_template(
    """
    Write a high quality answer for the given question using only the provided search results. {context}
    Question: {question}
    Answer:
    """
)
question = "in the dynastic cycle what is the right to rule called."

## Baseline Usage & Performance without Compression

In [13]:
rag_chain = (
    prompt_template
    | gpt4_chat
    | StrOutputParser()
)

In [14]:
with get_openai_callback() as cb:
    result = rag_chain.invoke({"question": question, "context": text})
    print(f'Result: {result},\\n callback: {cb}')
    original_prompt_cost = cb.total_cost

Result: In the context of the dynastic cycle, the right to rule is often referred to as the "Mandate of Heaven." This concept originated in ancient China and was used to justify the rule of the Emperor of China. According to this belief,,\n callback: Tokens Used: 3892
	Prompt Tokens: 3842
	Completion Tokens: 50
Successful Requests: 1
Total Cost (USD): $0.039920000000000004


## Contextual Compression

In [16]:
from contextcrunch_langchain import ContextCrunchDocumentCompressor
cc_compressor = ContextCrunchDocumentCompressor(compression_ratio=0.9)
from langchain.retrievers import ContextualCompressionRetriever
contextcrunch_compression_retriever = ContextualCompressionRetriever(base_compressor=cc_compressor, base_retriever=retriever)

In [17]:
def format_docs(docs):
    return "\\n\\n".join(doc.page_content for doc in docs)

rag_chain_2 = (
    {"context": contextcrunch_compression_retriever | format_docs, "question": RunnablePassthrough()}
    | prompt_template
    | gpt4_chat 
    | StrOutputParser()
)

In [18]:
with get_openai_callback() as cb:
    result = rag_chain_2.invoke(question)
    print(f'Result: {result},\\n callback: {cb}')
    new_tokens = cb.total_cost

Result: In the dynastic cycle, the right to rule is called the "Mandate of Heaven." This concept originated in ancient China and was used to justify the rule of the emperor. It held that the emperor was granted the divine right to govern by,\n callback: Tokens Used: 727
	Prompt Tokens: 677
	Completion Tokens: 50
Successful Requests: 1
Total Cost (USD): $0.008270000000000001


In [19]:
cost_savings = (original_prompt_cost - new_tokens) / original_prompt_cost
print(f"Cost savings: {cost_savings*100}%")

Cost savings: 79.28356713426854%
