In [1]:
import add_packages
import dotenv, yaml, os, logging
from pprint import pprint

from toolkit.langchain import (
  retrievers, vectorstores, document_loaders, text_splitters, text_embedding_models,
  chat_models, chains, documents
)
from my_configs import constants

dotenv.load_dotenv()

True

# Function

In [3]:
llm = chat_models.chat_openai
embeddings = text_embedding_models.OpenAIEmbeddings()

# Docs

In [None]:
with open(f'{add_packages.APP_PATH}/data/movies.yaml', 'r') as file:
  data = yaml.safe_load(file)

docs = []
for doc_data in data['docs']:
  doc = documents.Document(
    page_content=doc_data['page_content'], metadata=doc_data['metadata']
  )
  docs.append(doc)

# Recreate metadata_field_info list
metadata_field_info = []
for info_data in data['metadata_field_info']:
  info = chains.AttributeInfo(
    name=info_data['name'], description=info_data['description'], 
    type=info_data['type']
  )
  metadata_field_info.append(info)

document_content_description = data["document_content_description"]

vectorstore = vectorstores.chroma.Chroma.from_documents(docs, embeddings)

queries = [
  "I want to watch a movie rated higher than 8.5",
  "Has Greta Gerwig directed any movies about women",
  "What's a highly rated (above 8.5) science fiction film?",
  "What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated",
]

In [None]:
doc = document_loaders.TextLoader(f"{add_packages.APP_PATH}/data/state_of_the_union.txt").load()
text_splitter = text_splitters.RecursiveCharacterTextSplitter(
  chunk_size=500, chunk_overlap=100,
)
docs = text_splitter.split_documents(doc)

query = "What did the president say about Ketanji Jackson Brown"

In [None]:
docs = [
  documents.Document(page_content="foo"),
  documents.Document(page_content="bar"),
  documents.Document(page_content="world"),
  documents.Document(page_content="hello"),
  documents.Document(page_content="foo bar"),
  documents.Document(page_content="Langchain supports cohere RAG!"),
  documents.Document(page_content="The sky is blue!"),
]


In [4]:
embeddings = text_embedding_models.CustomOpenAIEmbeddings()

loader = document_loaders.WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
doc = loader.load()

text_splitter = text_splitters.RecursiveCharacterTextSplitter(
  chunk_size=500, chunk_overlap=0,
)
docs = text_splitter.split_documents(doc)

vectorstore = vectorstores.chroma.Chroma.from_documents(docs, embedding=embeddings)

queries = [
  "Hi I'm Lance. What are the approaches to Task Decomposition?",
  "I live in San Francisco. What are the Types of Memory?",
]

# Vector store-backed retriever

In [None]:
embeddings = text_embedding_models.CustomOpenAIEmbeddings()
# embeddings = text_embedding_models.CohereEmbeddings(
#   model=constants.EMBEDDINGS["COHERE"]["EMBED-ENGLISH-V2.0"]
# )

llm = chat_models.chat_openai


# [Self-querying Retriever](https://python.langchain.com/docs/modules/data_connection/retrievers/self_query)

A self-querying retriever can query itself by using a query-constructing LLM chain to write a structured query and applying it to its VectorStore. This allows the retriever to compare user-input queries with stored documents for semantic similarity and extract filters from user queries to execute on document metadata.

![tmp](https://python.langchain.com/assets/images/self_querying-26ac0fc8692e85bc3cd9b8640509404f.jpg)


## Creating

Instantiate the retriever by providing information on metadata fields and document contents.


In [None]:
retriever = retrievers.SelfQueryRetriever.from_llm(
  llm=llm,
  vectorstore=vectorstore,
  document_contents=document_content_description,
  metadata_field_info=metadata_field_info,
  verbose=True,
  # structured_query_translator=None,
)

In [None]:
retriever.get_relevant_documents(queries[2])


## Filter k

Use the self query retriever to specify the number of documents to fetch by passing enable_limit=True to the constructor.



## Building from the ground up with LCEL

Reconstruct retriever from scratch for custom control and insight into operations.

Create a query-construction chain to generate a StructuredQuery object from a user query. Helper functions are available for prompt creation and output parsing with tunable parameters.

The query constructor is crucial for the self-query retriever. To create an effective retrieval system, ensure the query constructor is optimized by adjusting prompts, examples, and attribute descriptions. Refer to this cookbook for a detailed example using hotel inventory data.

The key element is the structured query translator, responsible for translating the StructuredQuery object into a metadata filter in the syntax of the vector store. LangChain has built-in translators available in the Integrations section.



# [MultiQueryRetriever](https://python.langchain.com/docs/modules/data_connection/retrievers/MultiQueryRetriever)

Distance-based vector database retrieval embeds queries in high-dimensional space to find similar embedded documents based on distance. Retrieval results may vary with slight changes in query wording or inadequate semantics captured by the embeddings. Manual prompt engineering or tuning is often used to address these issues, but it can be laborious.

The MultiQueryRetriever automates prompt tuning using an LLM to generate multiple queries from various perspectives. It retrieves relevant documents for each query and combines them to get a larger set of potentially relevant documents. Generating multiple perspectives can overcome limitations of distance-based retrieval and provide richer results.



## Simple usage

Specify LLM for query generation, retriever will handle the rest.


In [None]:
retriever_multi_query = retrievers.MultiQueryRetriever.from_llm(
  retriever=retriever, llm=llm,
)

In [None]:
unique_docs = retriever_multi_query.get_relevant_documents(query)
pprint(unique_docs)


## Supplying your own prompt

Supply a prompt with an output parser to split results into a list of queries.



# [Contextual compression](https://python.langchain.com/docs/modules/data_connection/retrievers/contextual_compression)

One challenge with retrieval is not knowing the specific queries your document storage system will face when ingesting data. This can result in relevant information being buried in a document with irrelevant text, leading to costly LLM calls and poor responses.

Contextual compression compresses retrieved documents based on the query context to only return relevant information.

To use the Contextual Compression Retriever, you need a base retriever and a Document Compressor.

The Contextual Compression Retriever sends queries to the base retriever, which then processes the initial documents through the Document Compressor to shorten the list by reducing or dropping content.



## Contextual compression enhancement with LLMChainExtractor

Wrap base retriever with ContextualCompressionRetriever. Add LLMChainExtractor to iterate over returned documents and extract relevant content for query.


In [None]:
compressor = retrievers.LLMChainExtractor.from_llm(llm)
retriever_compression = retrievers.ContextualCompressionRetriever(
  base_compressor=compressor, base_retriever=retriever,
)

In [None]:
docs_compressed = retriever_compression.get_relevant_documents(query)
pprint(docs_compressed)


## More built-in compressors: filters



### LLMChainFilter

LLMChainFilter: Simple yet robust compressor using LLM chain to filter out documents and return others without altering content.


In [None]:
compressor = retrievers.LLMChainFilter.from_llm(llm)
retriever_compression = retrievers.ContextualCompressionRetriever(
  base_compressor=compressor, base_retriever=retriever
)

In [None]:
docs_compressed = retriever_compression.get_relevant_documents(query)
pprint(docs_compressed)


### EmbeddingsFilter

Making an extra LLM call for each document is costly and slow. The EmbeddingsFilter embedds the documents and query, only returning documents with similar embeddings to the query.


In [None]:

compressor = retrievers.EmbeddingsFilter(
  embeddings=embeddings, similarity_threshold=0.76,
)
retriever_compression = retrievers.ContextualCompressionRetriever(
  base_compressor=compressor, base_retriever=retriever
)

In [None]:
docs_compressed = retriever_compression.get_relevant_documents(query)
pprint(docs_compressed)


## Stringing compressors and document transformers together

Using the DocumentCompressorPipeline allows combining multiple compressors in sequence. BaseDocumentTransformers can also be added to the pipeline, performing transformations on a set of documents. For instance, TextSplitters split documents into smaller pieces, while EmbeddingsRedundantFilter filters out redundant documents based on embedding similarity.


In [None]:
filter_embeddings_redundant = retrievers.EmbeddingsRedundantFilter(embeddings=embeddings)
filter_embeddings_relevant = retrievers.EmbeddingsFilter(
  embeddings=embeddings, similarity_threshold=0.76,
)
filter_llmchain = retrievers.LLMChainFilter.from_llm(llm)
extractor_llmchain = retrievers.LLMChainExtractor.from_llm(llm)
compressor_pipeline = retrievers.DocumentCompressorPipeline(
  transformers=[
    # filter_embeddings_redundant, 
    # filter_embeddings_relevant,
    filter_llmchain,
    extractor_llmchain,
  ]
)

retriever_compression = retrievers.ContextualCompressionRetriever(
  base_compressor=compressor_pipeline, base_retriever=retriever,
)

In [None]:
docs_compressed = retriever_compression.get_relevant_documents(query)
pprint(docs_compressed)

# [Ensemble Retriever](https://python.langchain.com/docs/modules/data_connection/retrievers/ensemble)

The EnsembleRetriever combines retrievers' results and reranks them using the [Reciprocal Rank Fusion](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) algorithm.

EnsembleRetriever achieves better performance by combining strengths of different algorithms.

The most common pattern is to combine a sparse retriever with a dense retriever, known as "hybrid search". The sparse retriever finds relevant documents based on keywords, while the dense retriever finds relevant documents based on semantic similarity.

Configure retrievers at runtime by marking fields as configurable to ensure only one source is returned from the FAISS retriever with the relevant configuration passed in at runtime.



# Cohere

## Cohere Reranker

In [None]:
embeddings = text_embedding_models.CohereEmbeddings(
  model=constants.EMBEDDINGS["COHERE"]["EMBED-MULTILINGUAL-V3.0"]
)

vectorstore = vectorstores.faiss.FAISS.from_documents(
  docs, embeddings
)

In [None]:

retriever = create_retriever(
  embeddings=embeddings,
  retriever_types=[
    "CohereRerank",
  ],
  vectorstore=vectorstore,
  # search_kwargs=
)

In [None]:
print(query)
retriever.get_relevant_documents(query)

In [None]:
embeddings = text_embedding_models.CohereEmbeddings(
  model=constants.EMBEDDINGS["COHERE"]["EMBED-MULTILINGUAL-V3.0"]
)

retriever = create_retriever(
  embeddings=embeddings,
  retriever_types=[
    "CohereRerank",
  ],
  vectorstore=vectorstore,
  # search_kwargs=
)

## [Cohere RAG retriever](https://python.langchain.com/docs/integrations/retrievers/cohere)

In [None]:
retriever_cohere_rag = retrievers.CohereRagRetriever(
  llm=chat_models.ChatCohere(), 
)

In [None]:
retriever_cohere_rag.get_relevant_documents("What is cohere ai?")

# [BM25](https://python.langchain.com/docs/integrations/retrievers/bm25)

BM25 also known as the Okapi BM25, is a ranking function used in information retrieval systems to estimate the relevance of documents to a given search query.


In [None]:
retriever = retrievers.BM25Retriever.from_documents(docs)
retriever.get_relevant_documents("foo")

# [LLMLingua](https://python.langchain.com/docs/integrations/retrievers/llmlingua) 

LLMLingua Document Compressor uses a compact, well-trained language model to identify and remove non-essential tokens in prompts, enabling efficient inference with large language models. Up to 20x compression is achieved with minimal performance loss.



## Vectorstore

Initialize a simple vector store retriever and store the 2023 State of the Union speech in chunks. Set up the retriever to retrieve a high number of docs (20).



## Compression

Wrap base retriever with ContextualCompressionRetriever using LLMLinguaCompressor as compressor.



## QA generation

See the result of using this in the generation step now.



# [RePhraseQuery](https://python.langchain.com/docs/integrations/retrievers/re_phrase)

RePhraseQuery: Simple retriever using LLM between user input and query.

Pre-process user input effectively.



## Setting up

Create vector store.



## Default prompt

The default prompt in the from_llm classmethod.

```
DEFAULT_TEMPLATE = """You are an assistant tasked with taking a natural language 
query from a user and converting it into a query for a vectorstore. 
In this process, you strip out information that is not relevant for 
the retrieval task. Here is the user query: {question}"""
```

In [13]:

llm = chat_models.chat_openai
retriever_rephrase_query = retrievers.RePhraseQueryRetriever.from_llm(
  retriever=vectorstore.as_retriever(), llm=llm
)

In [14]:
query = queries[0]
print(query)
retriever_rephrase_query.get_relevant_documents(query)

Hi I'm Lance. What are the approaches to Task Decomposition?


INFO:langchain.retrievers.re_phraser:Re-phrased question: Query for vectorstore: approaches to Task Decomposition


[Document(page_content='Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.', metadata={'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en', 'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log"}),
 Document(page_content='Tree of Thoughts (Yao et al. 2023) ext

In [17]:
retriever = create_retriever(
  llm=chat_models.chat_openai,
  vectorstore=vectorstore,
  embeddings=text_embedding_models.OpenAIEmbeddings(),
  retriever_types=[
    "RePhraseQueryRetriever",
  ],
  search_kwargs={"k": 4},
  search_type='similarity',
)

query = queries[0]
print(query)
retriever.get_relevant_documents(query)

[32m2024-04-03 11:37:37.341[0m | [1mINFO    [0m | [36m__main__[0m:[36mcreate_retriever[0m:[36m122[0m - [1mRetrievers: ['RePhraseQueryRetriever'][0m


Hi I'm Lance. What are the approaches to Task Decomposition?


INFO:langchain.retrievers.re_phraser:Re-phrased question: Query for vectorstore: Approaches to Task Decomposition


[Document(page_content='Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.', metadata={'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en', 'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log"}),
 Document(page_content='Fig. 1. Overview of a LLM-powered auto


## Custom prompt



In [None]:
# todo

# Test

## Best

In [None]:
from loguru import logger
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.vectorstores import VectorStore
from langchain_core.embeddings import Embeddings
from typing import Literal, Union

def create_portion(input_list):
		length = len(input_list)
		output_value = 1 / length
		output_list = [output_value] * length
		return output_list

def create_retriever(
	llm: Union[BaseChatModel, None] = None,
	vectorstore: Union[VectorStore, None] = None,
	embeddings: Union[Embeddings, None] = None,
	retriever_types: list[Literal[
		'base', 'SelfQueryRetriever', 'MultiQueryRetriever', 'CohereRerank', 
		'BM25Retriever', 'RePhraseQueryRetriever',
  ]] = [],
	compressor_types: list[Literal[
		'EmbeddingsRedundantFilter', 'EmbeddingsFilter', 'LLMChainFilter', 
		'LLMChainExtractor',
  ]] = [],
	search_type: Literal['mmr', 'similarity'] = "mmr",
	search_kwargs: dict = {
		"k": 10,
	},
	document_content_description: Union[str, None] = None,
	metadata_field_info: Union[list, None] = None,
):
	my_retrievers = []
	my_compressors = []
	
	#*----------------------------------------------------------------------------
	retriever_base = vectorstore.as_retriever(
		search_type=search_type,
		search_kwargs=search_kwargs,
	)
	
	if "base" in retriever_types:
		my_retrievers.append(retriever_base)

	if "SelfQueryRetriever" in retriever_types:
		retriever_self_querying = retrievers.SelfQueryRetriever.from_llm(
			llm=llm,
			vectorstore=vectorstore,
			document_contents=document_content_description,
			metadata_field_info=metadata_field_info,
			verbose=True,
		)
		my_retrievers.append(retriever_self_querying)
		
	if "MultiQueryRetriever" in retriever_types:
		retriever_multi_query = retrievers.MultiQueryRetriever.from_llm(
			retriever=retriever_base, llm=llm,
		)
		my_retrievers.append(retriever_multi_query)

	if "CohereRerank" in retriever_types:
		logger.warning(f"Remember to use CohereEmbeddings for Vectorstore.")
		embeddings = text_embedding_models.CohereEmbeddings(
			model=constants.EMBEDDINGS["COHERE"]["EMBED-MULTILINGUAL-V3.0"]
		)
		compressor_cohere = retrievers.CohereRerank()
		retriever_cohere_rerank = retrievers.ContextualCompressionRetriever(
			base_compressor=compressor_cohere, base_retriever=retriever_base,
		)
		my_retrievers.append(retriever_cohere_rerank)
	
	if "RePhraseQueryRetriever" in retriever_types:
		retriever_rephrase_query = retrievers.RePhraseQueryRetriever.from_llm(
			retriever=retriever_base, llm=llm
		)
		my_retrievers.append(retriever_rephrase_query)
		
	if "BM25Retriever" in retriever_types:
		retriever_bm25 = retrievers.BM25Retriever() # todo
		my_retrievers.append(retriever_bm25)
	#*----------------------------------------------------------------------------

	if "ContextualCompressionRetriever" in retriever_types:
		if "EmbeddingsRedundantFilter" in compressor_types:
			filter_embeddings_redundant = retrievers.EmbeddingsRedundantFilter(embeddings=embeddings)
			my_compressors.append(filter_embeddings_redundant)
		if "EmbeddingsFilter" in compressor_types:
			filter_embeddings_relevant = retrievers.EmbeddingsFilter(
				embeddings=embeddings, similarity_threshold=0.75,
			)
			my_compressors.append(filter_embeddings_relevant)
		
		if "LLMChainFilter" in compressor_types:
			filter_llmchain = retrievers.LLMChainFilter.from_llm(llm)
			my_compressors.append(filter_llmchain)
		
		if "LLMChainExtractor" in compressor_types:
			extractor_llmchain = retrievers.LLMChainExtractor.from_llm(llm)
			my_compressors.append(extractor_llmchain)
			
		compressor_pipeline = retrievers.DocumentCompressorPipeline(
			transformers=my_compressors,
		)

		retriever_contextual_compression = retrievers.ContextualCompressionRetriever(
			base_compressor=compressor_pipeline, base_retriever=retriever_base,
		)
		
		my_retrievers.append(retriever_contextual_compression)
	
	logger.info(f"Retrievers: {retriever_types}")
	retriever_ensemble = retrievers.EnsembleRetriever(
		retrievers=my_retrievers,
		weights=create_portion(my_retrievers),
	)

	return retriever_ensemble

In [None]:
embeddings = text_embedding_models.CohereEmbeddings(
  model=constants.EMBEDDINGS["COHERE"]["EMBED-MULTILINGUAL-V3.0"]
)

retriever = create_retriever(
  llm=chat_models.chat_openai,
  embeddings=embeddings,
  retriever_types=[
    # "base",
    # "SelfQueryRetriever",
    "CohereRerank",
    "RePhraseQueryRetriever",
  ],
  compressor_types=[

  ],
  vectorstore=vectorstore,
  search_kwargs={"k": 4},
  search_type='similarity',
  document_content_description=document_content_description,
  metadata_field_info=metadata_field_info,
)

# TODOs

[Qdrant Self Query](https://python.langchain.com/docs/integrations/retrievers/self_query/qdrant_self_query)

[Tavily Search API](https://python.langchain.com/docs/integrations/retrievers/tavily)

[Wikipedia](https://python.langchain.com/docs/integrations/retrievers/wikipedia)

[You.com](https://python.langchain.com/docs/integrations/retrievers/you-retriever)