From b96e08f03365decce6881171b27df649f8149f1d Mon Sep 17 00:00:00 2001 From: Wendong Date: Sat, 4 May 2024 16:06:47 +0800 Subject: [PATCH 1/6] make top_k works for auto retriever --- camel/retrievers/auto_retriever.py | 58 ++++++++++++++++-------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/camel/retrievers/auto_retriever.py b/camel/retrievers/auto_retriever.py index 0c2d6c9a9..de977e41c 100644 --- a/camel/retrievers/auto_retriever.py +++ b/camel/retrievers/auto_retriever.py @@ -233,8 +233,7 @@ def run_vector_retriever( vr = VectorRetriever() - retrieved_infos = "" - retrieved_infos_text = "" + all_retrieved_info = [] for content_input_path in content_input_paths: # Generate a valid collection name @@ -283,36 +282,41 @@ def run_vector_retriever( retrieved_info = vr.query( query, vector_storage_instance, top_k, similarity_threshold ) - # Reorganize the retrieved info with original query - for info in retrieved_info: - retrieved_infos += "\n" + str(info) - retrieved_infos_text += "\n" + str(info['text']) - output = ( - "Original Query:" - + "\n" - + "{" - + query - + "}" - + "\n" - + "Retrieved Context:" - + retrieved_infos - ) - output_text = ( - "Original Query:" - + "\n" - + "{" - + query - + "}" - + "\n" - + "Retrieved Context:" - + retrieved_infos_text - ) - + all_retrieved_info.extend(retrieved_info) except Exception as e: raise RuntimeError( f"Error in auto vector retriever processing: {e!s}" ) from e + # Splitting records into those with and without a 'similarity_score' + with_score = [ + info for info in all_retrieved_info if 'similarity score' in info + ] + without_score = [ + info + for info in all_retrieved_info + if 'similarity score' not in info + ] + + # Sorting only the list with scores + with_score_sorted = sorted( + with_score, key=lambda x: x['similarity score'], reverse=True + ) + + # Merging back the sorted scored items with the non-scored items + all_retrieved_info_sorted = with_score_sorted + without_score + + # Selecting the top 'top_k' results + all_retrieved_info = all_retrieved_info_sorted[:top_k] + + retrieved_infos = "\n".join(str(info) for info in all_retrieved_info) + retrieved_infos_text = "\n".join( + info['text'] for info in all_retrieved_info if 'text' in info + ) + + output = f"Original Query:\n{{ {query} }}\nRetrieved Context:\n{retrieved_infos}" + output_text = f"Original Query:\n{{ {query} }}\nRetrieved Context:\n{retrieved_infos_text}" + if return_detailed_info: return output else: From 55a2b3ea0c6dbf1032a2eaf4eaf0c90709a3403b Mon Sep 17 00:00:00 2001 From: Wendong Date: Sat, 4 May 2024 16:26:35 +0800 Subject: [PATCH 2/6] add retrieval function, remove unstructured io from camel functions --- camel/functions/__init__.py | 5 +-- camel/functions/retrieval_functions.py | 53 ++++++++++++++++++++++++++ camel/retrievers/auto_retriever.py | 20 ++++------ camel/retrievers/vector_retriever.py | 2 +- 4 files changed, 64 insertions(+), 16 deletions(-) create mode 100644 camel/functions/retrieval_functions.py diff --git a/camel/functions/__init__.py b/camel/functions/__init__.py index 38d8d20dc..e87bfd328 100644 --- a/camel/functions/__init__.py +++ b/camel/functions/__init__.py @@ -11,8 +11,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. =========== - -from ..loaders.unstructured_io import UnstructuredIO from .google_maps_function import MAP_FUNCS from .math_functions import MATH_FUNCS from .openai_function import ( @@ -20,6 +18,7 @@ get_openai_function_schema, get_openai_tool_schema, ) +from .retrieval_functions import RETRIEVAL_FUNCS from .search_functions import SEARCH_FUNCS from .twitter_function import TWITTER_FUNCS from .weather_functions import WEATHER_FUNCS @@ -33,5 +32,5 @@ 'WEATHER_FUNCS', 'MAP_FUNCS', 'TWITTER_FUNCS', - 'UnstructuredIO', + 'RETRIEVAL_FUNCS', ] diff --git a/camel/functions/retrieval_functions.py b/camel/functions/retrieval_functions.py new file mode 100644 index 000000000..f63626da2 --- /dev/null +++ b/camel/functions/retrieval_functions.py @@ -0,0 +1,53 @@ +# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. =========== +# Licensed under the Apache License, Version 2.0 (the “License”); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an “AS IS” BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. =========== +from typing import List, Union +from camel.functions import OpenAIFunction +from camel.retrievers import AutoRetriever +from camel.types import StorageType + +def information_retrieval(query: str, content_input_paths: Union[str, List[str]]) -> str: + r"""Performs an auto retrieal for information. Given a query, + this function will retrieve the information from the remote vector storage, + and return the retrieved information back. It is useful for information + retrieve. + + Args: + query (string): Question you want to be answered. + content_input_paths (Union[str, List[str]]): Paths to local + files or remote URLs. + + Returns: + str: Aggregated information retrieved in response to the query. + + Example: + information_retrieval(query = "what is camel?",content_input_paths=["https://lablab.ai/t/camel-tutorial-building-communicative-agents-for-large-scale-language-model-exploration", "https://www.camel-ai.org/"]) + """ + auto_retriever = AutoRetriever( + url_and_api_key=("Your Milvus URI","Your Milvus Token"), + storage_type=StorageType.MILVUS) + + retrieved_info = auto_retriever.run_vector_retriever( + query=query, + content_input_paths=content_input_paths, + top_k=3 + ) + return retrieved_info + +# add the function to OpenAIFunction list +RETRIEVAL_FUNCS: List[OpenAIFunction] = [ + OpenAIFunction(func) + for func in [ + information_retrieval, + ] +] \ No newline at end of file diff --git a/camel/retrievers/auto_retriever.py b/camel/retrievers/auto_retriever.py index de977e41c..aebc408d6 100644 --- a/camel/retrievers/auto_retriever.py +++ b/camel/retrievers/auto_retriever.py @@ -234,7 +234,6 @@ def run_vector_retriever( vr = VectorRetriever() all_retrieved_info = [] - for content_input_path in content_input_paths: # Generate a valid collection name collection_name = self._collection_name_generator( @@ -288,7 +287,7 @@ def run_vector_retriever( f"Error in auto vector retriever processing: {e!s}" ) from e - # Splitting records into those with and without a 'similarity_score' + # Split records into those with and without a 'similarity_score' with_score = [ info for info in all_retrieved_info if 'similarity score' in info ] @@ -297,16 +296,13 @@ def run_vector_retriever( for info in all_retrieved_info if 'similarity score' not in info ] - - # Sorting only the list with scores + # Sort only the list with scores with_score_sorted = sorted( with_score, key=lambda x: x['similarity score'], reverse=True ) - - # Merging back the sorted scored items with the non-scored items + # Merge back the sorted scored items with the non-scored items all_retrieved_info_sorted = with_score_sorted + without_score - - # Selecting the top 'top_k' results + # Select the 'top_k' results all_retrieved_info = all_retrieved_info_sorted[:top_k] retrieved_infos = "\n".join(str(info) for info in all_retrieved_info) @@ -314,10 +310,10 @@ def run_vector_retriever( info['text'] for info in all_retrieved_info if 'text' in info ) - output = f"Original Query:\n{{ {query} }}\nRetrieved Context:\n{retrieved_infos}" - output_text = f"Original Query:\n{{ {query} }}\nRetrieved Context:\n{retrieved_infos_text}" + detailed_info = f"Original Query:\n{{ {query} }}\nRetrieved Context:\n{retrieved_infos}" + text_info = f"Original Query:\n{{ {query} }}\nRetrieved Context:\n{retrieved_infos_text}" if return_detailed_info: - return output + return detailed_info else: - return output_text + return text_info diff --git a/camel/retrievers/vector_retriever.py b/camel/retrievers/vector_retriever.py index f94a94d79..935268a94 100644 --- a/camel/retrievers/vector_retriever.py +++ b/camel/retrievers/vector_retriever.py @@ -14,7 +14,7 @@ from typing import Any, Dict, List, Optional from camel.embeddings import BaseEmbedding, OpenAIEmbedding -from camel.functions import UnstructuredIO +from camel.loaders import UnstructuredIO from camel.retrievers.base import BaseRetriever from camel.storages import BaseVectorStorage, VectorDBQuery, VectorRecord From 86854cefd4bb577e7919ccff60881ef81ba9982c Mon Sep 17 00:00:00 2001 From: Wendong Date: Sun, 5 May 2024 21:53:56 +0800 Subject: [PATCH 3/6] format fix --- camel/functions/retrieval_functions.py | 31 ++++++++++++++------------ 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/camel/functions/retrieval_functions.py b/camel/functions/retrieval_functions.py index f63626da2..32e44ef16 100644 --- a/camel/functions/retrieval_functions.py +++ b/camel/functions/retrieval_functions.py @@ -12,42 +12,45 @@ # limitations under the License. # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. =========== from typing import List, Union + from camel.functions import OpenAIFunction from camel.retrievers import AutoRetriever from camel.types import StorageType -def information_retrieval(query: str, content_input_paths: Union[str, List[str]]) -> str: - r"""Performs an auto retrieal for information. Given a query, - this function will retrieve the information from the remote vector storage, - and return the retrieved information back. It is useful for information - retrieve. + +def information_retrieval( + query: str, content_input_paths: Union[str, List[str]] +) -> str: + r"""Retrieves information from a remote vector storage based on the specified query. This function connects to a remote vector storage system and retrieves relevant information by processing the input query. It is essential to use this function when the answer to a question requires external knowledge sources. Args: - query (string): Question you want to be answered. + query (str): The question or query for which an answer is required. content_input_paths (Union[str, List[str]]): Paths to local files or remote URLs. Returns: - str: Aggregated information retrieved in response to the query. + str: The information retrieved in response to the query, aggregated and formatted as a string. Example: - information_retrieval(query = "what is camel?",content_input_paths=["https://lablab.ai/t/camel-tutorial-building-communicative-agents-for-large-scale-language-model-exploration", "https://www.camel-ai.org/"]) + # Retrieve information about CAMEL AI. + information_retrieval(query = "what is CAMEL AI?", + content_input_paths="https://www.camel-ai.org/") """ auto_retriever = AutoRetriever( - url_and_api_key=("Your Milvus URI","Your Milvus Token"), - storage_type=StorageType.MILVUS) + url_and_api_key=("Your Milvus URI", "Your Milvus Token"), + storage_type=StorageType.MILVUS, + ) retrieved_info = auto_retriever.run_vector_retriever( - query=query, - content_input_paths=content_input_paths, - top_k=3 + query=query, content_input_paths=content_input_paths, top_k=3 ) return retrieved_info + # add the function to OpenAIFunction list RETRIEVAL_FUNCS: List[OpenAIFunction] = [ OpenAIFunction(func) for func in [ information_retrieval, ] -] \ No newline at end of file +] From b7e6f6da36390e52516daf58ab52802da7c3f4c0 Mon Sep 17 00:00:00 2001 From: Wendong Date: Sun, 5 May 2024 21:57:34 +0800 Subject: [PATCH 4/6] use local storage --- camel/functions/retrieval_functions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/camel/functions/retrieval_functions.py b/camel/functions/retrieval_functions.py index 32e44ef16..009f9e606 100644 --- a/camel/functions/retrieval_functions.py +++ b/camel/functions/retrieval_functions.py @@ -21,7 +21,7 @@ def information_retrieval( query: str, content_input_paths: Union[str, List[str]] ) -> str: - r"""Retrieves information from a remote vector storage based on the specified query. This function connects to a remote vector storage system and retrieves relevant information by processing the input query. It is essential to use this function when the answer to a question requires external knowledge sources. + r"""Retrieves information from a local vector storage based on the specified query. This function connects to a local vector storage system and retrieves relevant information by processing the input query. It is essential to use this function when the answer to a question requires external knowledge sources. Args: query (str): The question or query for which an answer is required. @@ -37,8 +37,8 @@ def information_retrieval( content_input_paths="https://www.camel-ai.org/") """ auto_retriever = AutoRetriever( - url_and_api_key=("Your Milvus URI", "Your Milvus Token"), - storage_type=StorageType.MILVUS, + vector_storage_local_path="camel/temp_storage", + storage_type=StorageType.QDRANT, ) retrieved_info = auto_retriever.run_vector_retriever( From 5543425d636e99e8e8dd0d5a74c7911e6a81dab9 Mon Sep 17 00:00:00 2001 From: Wendong Date: Mon, 6 May 2024 16:21:55 +0800 Subject: [PATCH 5/6] fix neo4j test issue --- test/storages/graph_storages/test_neo4j_graph.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/storages/graph_storages/test_neo4j_graph.py b/test/storages/graph_storages/test_neo4j_graph.py index c698291e8..c3b77c2cf 100644 --- a/test/storages/graph_storages/test_neo4j_graph.py +++ b/test/storages/graph_storages/test_neo4j_graph.py @@ -120,8 +120,10 @@ def test_neo4j_timeout() -> None: except Exception as e: assert ( e.code # type: ignore[attr-defined] - == "Neo.ClientError.Transaction." - "TransactionTimedOutClientConfiguration" + in [ + "Neo.ClientError.Transaction.TransactionTimedOutClientConfiguration", + "Neo.ClientError.Transaction.LockClientStopped", + ] ) From 93cfa07a85460c03236f68a84ab8ae45a0bac5d7 Mon Sep 17 00:00:00 2001 From: Wendong Date: Sun, 12 May 2024 23:04:02 +0800 Subject: [PATCH 6/6] add comment --- camel/functions/retrieval_functions.py | 9 +++++++-- camel/retrievers/auto_retriever.py | 8 ++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/camel/functions/retrieval_functions.py b/camel/functions/retrieval_functions.py index 009f9e606..53cc8fc7a 100644 --- a/camel/functions/retrieval_functions.py +++ b/camel/functions/retrieval_functions.py @@ -21,7 +21,11 @@ def information_retrieval( query: str, content_input_paths: Union[str, List[str]] ) -> str: - r"""Retrieves information from a local vector storage based on the specified query. This function connects to a local vector storage system and retrieves relevant information by processing the input query. It is essential to use this function when the answer to a question requires external knowledge sources. + r"""Retrieves information from a local vector storage based on the + specified query. This function connects to a local vector storage system + and retrieves relevant information by processing the input query. It is + essential to use this function when the answer to a question requires + external knowledge sources. Args: query (str): The question or query for which an answer is required. @@ -29,7 +33,8 @@ def information_retrieval( files or remote URLs. Returns: - str: The information retrieved in response to the query, aggregated and formatted as a string. + str: The information retrieved in response to the query, aggregated + and formatted as a string. Example: # Retrieve information about CAMEL AI. diff --git a/camel/retrievers/auto_retriever.py b/camel/retrievers/auto_retriever.py index aebc408d6..a3de64369 100644 --- a/camel/retrievers/auto_retriever.py +++ b/camel/retrievers/auto_retriever.py @@ -63,7 +63,8 @@ def _initialize_vector_storage( self, collection_name: Optional[str] = None, ) -> BaseVectorStorage: - r"""Sets up and returns a vector storage instance with specified parameters. + r"""Sets up and returns a vector storage instance with specified + parameters. Args: collection_name (Optional[str]): Name of the collection in the @@ -195,7 +196,8 @@ def run_vector_retriever( similarity_threshold: float = DEFAULT_SIMILARITY_THRESHOLD, return_detailed_info: bool = False, ) -> str: - r"""Executes the automatic vector retriever process using vector storage. + r"""Executes the automatic vector retriever process using vector + storage. Args: query (str): Query string for information retriever. @@ -288,6 +290,8 @@ def run_vector_retriever( ) from e # Split records into those with and without a 'similarity_score' + # Records with 'similarity_score' lower than 'similarity_threshold' + # will not have a 'similarity_score' in the output content with_score = [ info for info in all_retrieved_info if 'similarity score' in info ]