# Context Compression

In [None]:
import chromadb
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.document_loaders import WikipediaLoader
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor


## Download text, split and create embeddings

In [None]:
loader = WikipediaLoader(query="MKUltra")
documents = loader.load()

In [None]:
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=500)
docs = text_splitter.split_documents(documents)

In [None]:
embedding_function = OpenAIEmbeddings()
db = Chroma.from_documents(docs, embedding_function, persist_directory="./MKUltra")
db.persist()

## Creating contextular compressor

In [None]:
llm = ChatOpenAI(temperature = 0)
compressor = LLMChainExtractor.from_llm(llm)

In [None]:
compression_retriever = ContextualCompressionRetriever(base_compressor = compressor,
                                                       base_retriever = db.as_retriever())

In [None]:
docs = db.similarity_search("when was this declassified?")
print(docs[0].page_content)
# a normal similarity search which returns the whole chunk

In [None]:
compressed_docs = compression_retriever.get_relevant_documents("when was this declassified?")


In [None]:
print(compressed_docs[0].metadata["summary"])
# the compressed version SHOULD create a summary related to the question