From d4c0678aafbdd9617d1c0096d6e8e21b613c8abe Mon Sep 17 00:00:00 2001 From: seehi <6580@pm.me> Date: Mon, 6 May 2024 14:52:46 +0800 Subject: [PATCH 1/5] fix potential NoneType error --- metagpt/rag/retrievers/bm25_retriever.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/metagpt/rag/retrievers/bm25_retriever.py b/metagpt/rag/retrievers/bm25_retriever.py index 241820cf4..dc75d87b0 100644 --- a/metagpt/rag/retrievers/bm25_retriever.py +++ b/metagpt/rag/retrievers/bm25_retriever.py @@ -40,8 +40,10 @@ def add_nodes(self, nodes: list[BaseNode], **kwargs) -> None: self._corpus = [self._tokenizer(node.get_content()) for node in self._nodes] self.bm25 = BM25Okapi(self._corpus) - self._index.insert_nodes(nodes, **kwargs) + if self._index: + self._index.insert_nodes(nodes, **kwargs) def persist(self, persist_dir: str, **kwargs) -> None: """Support persist.""" - self._index.storage_context.persist(persist_dir) + if self._index: + self._index.storage_context.persist(persist_dir) From 53369ee6da1b3b1090bd9399b537ef99f2e5218a Mon Sep 17 00:00:00 2001 From: seehi <6580@pm.me> Date: Mon, 6 May 2024 15:52:18 +0800 Subject: [PATCH 2/5] update comment in rag_pipeline example --- examples/rag_pipeline.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/examples/rag_pipeline.py b/examples/rag_pipeline.py index 7dbca35a6..75888e480 100644 --- a/examples/rag_pipeline.py +++ b/examples/rag_pipeline.py @@ -18,13 +18,13 @@ ) from metagpt.utils.exceptions import handle_exception +LLM_TIP = "If you not sure, just answer I don't know." + DOC_PATH = EXAMPLE_DATA_PATH / "rag/writer.txt" -QUESTION = "What are key qualities to be a good writer?" +QUESTION = f"What are key qualities to be a good writer? {LLM_TIP}" TRAVEL_DOC_PATH = EXAMPLE_DATA_PATH / "rag/travel.txt" -TRAVEL_QUESTION = "What does Bob like?" - -LLM_TIP = "If you not sure, just answer I don't know." +TRAVEL_QUESTION = f"What does Bob like? {LLM_TIP}" class Player(BaseModel): @@ -40,21 +40,21 @@ def rag_key(self) -> str: class RAGExample: - """Show how to use RAG. - - Default engine use LLM Reranker, if the answer from the LLM is incorrect, may encounter `IndexError: list index out of range`. - """ + """Show how to use RAG.""" - def __init__(self, engine: SimpleEngine = None): + def __init__(self, engine: SimpleEngine = None, use_llm_ranker: bool = False): self._engine = engine + self._use_llm_ranker = use_llm_ranker @property def engine(self): if not self._engine: + ranker_configs = [LLMRankerConfig()] if self._use_llm_ranker else None + self._engine = SimpleEngine.from_docs( input_files=[DOC_PATH], retriever_configs=[FAISSRetrieverConfig()], - ranker_configs=[LLMRankerConfig()], + ranker_configs=ranker_configs, ) return self._engine @@ -105,7 +105,7 @@ async def add_docs(self): """ self._print_title("Add Docs") - travel_question = f"{TRAVEL_QUESTION}{LLM_TIP}" + travel_question = f"{TRAVEL_QUESTION}" travel_filepath = TRAVEL_DOC_PATH logger.info("[Before add docs]") @@ -240,8 +240,14 @@ async def _retrieve_and_print(self, question): async def main(): - """RAG pipeline.""" - e = RAGExample() + """RAG pipeline. + + Note: + 1. If `use_llm_ranker` is True, then will use LLM Reranker to get better result, but it is not always guaranteed that the output will be parseable for reranking, + prefer `gpt-4-turbo`, otherwise might encounter `IndexError: list index out of range` or `ValueError: invalid literal for int() with base 10`. + """ + e = RAGExample(use_llm_ranker=False) + await e.run_pipeline() await e.add_docs() await e.add_objects() From 01833aa997a33d1cf6989accda0d8a0ad318ce79 Mon Sep 17 00:00:00 2001 From: seehi <6580@pm.me> Date: Tue, 7 May 2024 15:43:20 +0800 Subject: [PATCH 3/5] update comment in rag_pipeline example --- examples/rag_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/rag_pipeline.py b/examples/rag_pipeline.py index 75888e480..fbbc2cc39 100644 --- a/examples/rag_pipeline.py +++ b/examples/rag_pipeline.py @@ -42,7 +42,7 @@ def rag_key(self) -> str: class RAGExample: """Show how to use RAG.""" - def __init__(self, engine: SimpleEngine = None, use_llm_ranker: bool = False): + def __init__(self, engine: SimpleEngine = None, use_llm_ranker: bool = True): self._engine = engine self._use_llm_ranker = use_llm_ranker From 400e0af8144ebc6752adfe96334127d8395fef4c Mon Sep 17 00:00:00 2001 From: seehi <6580@pm.me> Date: Wed, 15 May 2024 17:31:44 +0800 Subject: [PATCH 4/5] format --- metagpt/rag/benchmark/base.py | 2 +- metagpt/rag/factories/ranker.py | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/metagpt/rag/benchmark/base.py b/metagpt/rag/benchmark/base.py index c1fd297d9..b5d265b35 100644 --- a/metagpt/rag/benchmark/base.py +++ b/metagpt/rag/benchmark/base.py @@ -121,7 +121,7 @@ def mean_reciprocal_rank(self, nodes: list[NodeWithScore], reference_docs: list[ return mrr_sum return mrr_sum - + async def semantic_similarity(self, response: str, reference: str) -> float: result = await self.evaluator.aevaluate( response=response, diff --git a/metagpt/rag/factories/ranker.py b/metagpt/rag/factories/ranker.py index b75745a1f..7abda162a 100644 --- a/metagpt/rag/factories/ranker.py +++ b/metagpt/rag/factories/ranker.py @@ -8,11 +8,11 @@ from metagpt.rag.rankers.object_ranker import ObjectSortPostprocessor from metagpt.rag.schema import ( BaseRankerConfig, + BGERerankConfig, + CohereRerankConfig, ColbertRerankConfig, LLMRankerConfig, ObjectRankerConfig, - CohereRerankConfig, - BGERerankConfig ) @@ -60,13 +60,15 @@ def _create_cohere_rerank(self, config: CohereRerankConfig, **kwargs) -> LLMRera def _create_bge_rerank(self, config: BGERerankConfig, **kwargs) -> LLMRerank: try: - from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingReranker + from llama_index.postprocessor.flag_embedding_reranker import ( + FlagEmbeddingReranker, + ) except ImportError: raise ImportError( "`llama-index-postprocessor-flag-embedding-reranker` package not found, please run `pip install llama-index-postprocessor-flag-embedding-reranker`" ) return FlagEmbeddingReranker(**config.model_dump()) - + def _create_object_ranker(self, config: ObjectRankerConfig, **kwargs) -> LLMRerank: return ObjectSortPostprocessor(**config.model_dump()) From b71ea5f407c268a846eff5fabc0886c7e74a05b4 Mon Sep 17 00:00:00 2001 From: seehi <6580@pm.me> Date: Wed, 15 May 2024 17:40:21 +0800 Subject: [PATCH 5/5] update comment --- examples/rag_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/rag_pipeline.py b/examples/rag_pipeline.py index fbbc2cc39..5b716ce03 100644 --- a/examples/rag_pipeline.py +++ b/examples/rag_pipeline.py @@ -243,7 +243,7 @@ async def main(): """RAG pipeline. Note: - 1. If `use_llm_ranker` is True, then will use LLM Reranker to get better result, but it is not always guaranteed that the output will be parseable for reranking, + 1. If `use_llm_ranker` is True, then it will use LLM Reranker to get better result, but it is not always guaranteed that the output will be parseable for reranking, prefer `gpt-4-turbo`, otherwise might encounter `IndexError: list index out of range` or `ValueError: invalid literal for int() with base 10`. """ e = RAGExample(use_llm_ranker=False)