In [7]:
import add_packages
import dotenv, yaml, os
from pprint import pprint

from my_langchain import (
  retrievers, vectorstores, document_loaders, text_splitters, text_embedding_models,
  chat_models, chains, documents
)
from my_configs import constants

dotenv.load_dotenv()

True

In [8]:
class CustomOpenAIEmbeddings(text_embedding_models.OpenAIEmbeddings):

    def __init__(self, *args, **kwargs):
        openai_api_key = os.getenv("OPENAI_API_KEY")
        super().__init__(openai_api_key=openai_api_key, *args, **kwargs)

    def _embed_documents(self, texts):
        embeddings = [
            self.client.create(
                input=text, model="text-embedding-ada-002").data[0].embedding
            for text in texts
        ]
        return embeddings

    def __call__(self, input):
        return self._embed_documents(input)

# Vector store-backed retriever

In [5]:
with open(f'{add_packages.APP_PATH}/data/movies.yaml', 'r') as file:
  data = yaml.safe_load(file)

In [9]:
docs = []
for doc_data in data['docs']:
  doc = documents.Document(
    page_content=doc_data['page_content'], metadata=doc_data['metadata']
  )
  docs.append(doc)

# Recreate metadata_field_info list
metadata_field_info = []
for info_data in data['metadata_field_info']:
  info = chains.AttributeInfo(
    name=info_data['name'], description=info_data['description'], 
    type=info_data['type']
  )
  metadata_field_info.append(info)

document_content_description = data["document_content_description"]

embeddings = CustomOpenAIEmbeddings()
llm = chat_models.chat_openai

vectorstore = vectorstores.chroma.Chroma.from_documents(docs, embeddings)
retriever = retrievers.SelfQueryRetriever.from_llm(
  llm=llm,
  vectorstore=vectorstore,
  document_contents=document_content_description,
  metadata_field_info=metadata_field_info,
  verbose=True,
)

In [None]:
# doc = document_loaders.TextLoader(f"{add_packages.APP_PATH}/data/state_of_the_union.txt").load()
# text_splitter = text_splitters.RecursiveCharacterTextSplitter(
#   chunk_size=500, chunk_overlap=100,
# )
# docs = text_splitter.split_documents(doc)
# embeddings = text_embedding_models.CohereEmbeddings(
#   model=constants.EMBEDDINGS["COHERE"]["EMBED-ENGLISH-V2.0"]
# )
# retriever = vectorstores.faiss.FAISS.from_documents(docs, embeddings).as_retriever(
#   search_type="mmr",
#   search_kwargs={
#     "k": 10,
#   }
# )

# llm = chat_models.chat_openai

# query = "What did the president say about Ketanji Jackson Brown"

# [MultiQueryRetriever](https://python.langchain.com/docs/modules/data_connection/retrievers/MultiQueryRetriever)

Distance-based vector database retrieval embeds queries in high-dimensional space to find similar embedded documents based on distance. Retrieval results may vary with slight changes in query wording or inadequate semantics captured by the embeddings. Manual prompt engineering or tuning is often used to address these issues, but it can be laborious.

The MultiQueryRetriever automates prompt tuning using an LLM to generate multiple queries from various perspectives. It retrieves relevant documents for each query and combines them to get a larger set of potentially relevant documents. Generating multiple perspectives can overcome limitations of distance-based retrieval and provide richer results.



## Simple usage

Specify LLM for query generation, retriever will handle the rest.


In [None]:
retriever_multi_query = retrievers.MultiQueryRetriever.from_llm(
  retriever=retriever, llm=llm,
)

In [None]:
unique_docs = retriever_multi_query.get_relevant_documents(query)
pprint(unique_docs)


## Supplying your own prompt

Supply a prompt with an output parser to split results into a list of queries.



# [Contextual compression](https://python.langchain.com/docs/modules/data_connection/retrievers/contextual_compression)

One challenge with retrieval is not knowing the specific queries your document storage system will face when ingesting data. This can result in relevant information being buried in a document with irrelevant text, leading to costly LLM calls and poor responses.

Contextual compression compresses retrieved documents based on the query context to only return relevant information.

To use the Contextual Compression Retriever, you need a base retriever and a Document Compressor.

The Contextual Compression Retriever sends queries to the base retriever, which then processes the initial documents through the Document Compressor to shorten the list by reducing or dropping content.



## Contextual compression enhancement with LLMChainExtractor

Wrap base retriever with ContextualCompressionRetriever. Add LLMChainExtractor to iterate over returned documents and extract relevant content for query.


In [14]:
compressor = retrievers.LLMChainExtractor.from_llm(llm)
retriever_compression = retrievers.ContextualCompressionRetriever(
  base_compressor=compressor, base_retriever=retriever,
)

In [15]:
docs_compressed = retriever_compression.get_relevant_documents(query)
pprint(docs_compressed)



[Document(page_content='nominated Circuit Court of Appeals Judge Ketanji Brown Jackson.', metadata={'source': '/Users/thung/Documents/Me/Coding/Learn-LLM/Apps/data/state_of_the_union.txt'})]



## More built-in compressors: filters



### LLMChainFilter

LLMChainFilter: Simple yet robust compressor using LLM chain to filter out documents and return others without altering content.


In [12]:
compressor = retrievers.LLMChainFilter.from_llm(llm)
retriever_compression = retrievers.ContextualCompressionRetriever(
  base_compressor=compressor, base_retriever=retriever
)

In [13]:
docs_compressed = retriever_compression.get_relevant_documents(query)
pprint(docs_compressed)



[Document(page_content='One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '/Users/thung/Documents/Me/Coding/Learn-LLM/Apps/data/state_of_the_union.txt'})]



### EmbeddingsFilter

Making an extra LLM call for each document is costly and slow. The EmbeddingsFilter embedds the documents and query, only returning documents with similar embeddings to the query.


In [10]:
compressor = retrievers.EmbeddingsFilter(
  embeddings=embeddings, similarity_threshold=0.76,
)
retriever_compression = retrievers.ContextualCompressionRetriever(
  base_compressor=compressor, base_retriever=retriever
)

In [11]:
docs_compressed = retriever_compression.get_relevant_documents(query)
pprint(docs_compressed)

[]



## Stringing compressors and document transformers together

Using the DocumentCompressorPipeline allows combining multiple compressors in sequence. BaseDocumentTransformers can also be added to the pipeline, performing transformations on a set of documents. For instance, TextSplitters split documents into smaller pieces, while EmbeddingsRedundantFilter filters out redundant documents based on embedding similarity.


In [26]:
filter_embeddings_redundant = retrievers.EmbeddingsRedundantFilter(embeddings=embeddings)
filter_embeddings_relevant = retrievers.EmbeddingsFilter(
  embeddings=embeddings, similarity_threshold=0.76,
)
filter_llmchain = retrievers.LLMChainFilter.from_llm(llm)
extractor_llmchain = retrievers.LLMChainExtractor.from_llm(llm)
compressor_pipeline = retrievers.DocumentCompressorPipeline(
  transformers=[
    # filter_embeddings_redundant, 
    # filter_embeddings_relevant,
    filter_llmchain,
    extractor_llmchain,
  ]
)

retriever_compression = retrievers.ContextualCompressionRetriever(
  base_compressor=compressor_pipeline, base_retriever=retriever,
)

In [27]:
docs_compressed = retriever_compression.get_relevant_documents(query)
pprint(docs_compressed)



[Document(page_content='nominated Circuit Court of Appeals Judge Ketanji Brown Jackson.', metadata={'source': '/Users/thung/Documents/Me/Coding/Learn-LLM/Apps/data/state_of_the_union.txt'})]


# Cohere Reranker

In [None]:
# llm = chat_models.ChatCohere(
#   model=constants.MODELS["COHERE"]["COMMAND"]
# )
llm = chat_models.chat_openai
compressor = retrievers.CohereRerank()
retriever_compression = retrievers.ContextualCompressionRetriever(
  base_compressor=compressor, base_retriever=retriever,
)

In [None]:
docs_compressed = retriever_compression.get_relevant_documents(query)
pprint(docs_compressed)