camel-ai · Wendong-Fan · Jun 20, 2024 · Mar 4, 2024 · Jun 17, 2024 · Jun 17, 2024
diff --git a/camel/configs/openai_config.py b/camel/configs/openai_config.py
@@ -41,6 +41,19 @@ class ChatGPTConfig(BaseConfig):
             (default: :obj:`1.0`)
         n (int, optional): How many chat completion choices to generate for
             each input message. (default: :obj:`1`)
+        response_format (object, optional): An object specifying the format
+            that the model must output. Compatible with GPT-4 Turbo and all
+            GPT-3.5 Turbo models newer than gpt-3.5-turbo-1106. Setting to
+            {"type": "json_object"} enables JSON mode, which guarantees the
+            message the model generates is valid JSON. Important: when using
+            JSON mode, you must also instruct the model to produce JSON
+            yourself via a system or user message. Without this, the model
+            may generate an unending stream of whitespace until the generation
+            reaches the token limit, resulting in a long-running and seemingly
+            "stuck" request. Also note that the message content may be
+            partially cut off if finish_reason="length", which indicates the
+            generation exceeded max_tokens or the conversation exceeded the
+            max context length.
         stream (bool, optional): If True, partial message deltas will be sent
             as data-only server-sent events as they become available.
             (default: :obj:`False`)
@@ -95,6 +108,7 @@ class ChatGPTConfig(BaseConfig):
     stop: str | Sequence[str] | NotGiven = NOT_GIVEN
     max_tokens: int | NotGiven = NOT_GIVEN
     presence_penalty: float = 0.0
+    response_format: dict | NotGiven = NOT_GIVEN
     frequency_penalty: float = 0.0
     logit_bias: dict = field(default_factory=dict)
     user: str = ""

diff --git a/camel/prompts/__init__.py b/camel/prompts/__init__.py
@@ -16,6 +16,9 @@
 from .code import CodePromptTemplateDict
 from .descripte_video_prompt import DescriptionVideoPromptTemplateDict
 from .evaluation import EvaluationPromptTemplateDict
+from .generate_text_embedding_data import (
+    GenerateTextEmbeddingDataPromptTemplateDict,
+)
 from .misalignment import MisalignmentPromptTemplateDict
 from .object_recognition import ObjectRecognitionPromptTemplateDict
 from .prompt_templates import PromptTemplateGenerator
@@ -37,6 +40,7 @@
     'TaskPromptTemplateDict',
     'PromptTemplateGenerator',
     'SolutionExtractionPromptTemplateDict',
+    'GenerateTextEmbeddingDataPromptTemplateDict',
     'ObjectRecognitionPromptTemplateDict',
     'DescriptionVideoPromptTemplateDict',
 ]
diff --git a/camel/prompts/generate_text_embedding_data.py b/camel/prompts/generate_text_embedding_data.py
@@ -0,0 +1,79 @@
+# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+# Licensed under the Apache License, Version 2.0 (the “License”);
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an “AS IS” BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
+from typing import Any
+
+from camel.prompts import TextPrompt, TextPromptDict
+from camel.types import RoleType
+
+
+# flake8: noqa :E501
+class GenerateTextEmbeddingDataPromptTemplateDict(TextPromptDict):
+    r"""A :obj:`TextPrompt` dictionary containing text embedding tasks
+    generation, query, positive and hard negative samples generation,
+    from the `"Improving Text Embeddings with Large Language Models"
+    <https://arxiv.org/abs/2401.00368>`_ paper.
+
+
+    Attributes:
+        GENERATE_TASKS (TextPrompt): A prompt to generate a list
+            of :obj:`num_tasks` synthetic text_embedding tasks.
+        ASSISTANT_PROMPT (TextPrompt): A system prompt for the AI assistant
+            to generate synthetic :obj:`user_query`, :obj:`positive document`,
+            and :obj:`hard_negative_document` for a specific :obj:`task` with
+            specified parameters including :obj:`query_type`,
+            :obj:`query_length`, :obj:`clarity`, :obj:`num_words`,
+            :obj:`language` and :obj:`difficulty`.
+    """
+
+    GENERATE_TASKS = TextPrompt(
+        """You are an expert to brainstorm a list of {num_tasks} potentially useful text retrieval tasks
+Here are a few examples for your reference:
+  - Provided a scientific claim as query, retrieve documents that help verify or refute the claim.
+  - Search for documents that answers a FAQ-style query on children's nutrition.
+Please adhere to the following guidelines:
+  - Specify what the query is, and what the desired documents are.
+  - Each retrieval task should cover a wide range of queries, and should not be too specific.
+Your output should always be a python list of strings starting with `1.`, `2.` etc.
+And each element corresponds to a distinct retrieval task in one sentence.
+Do not explain yourself or output anything else.
+Be creative!"""
+    )
+
+    ASSISTANT_PROMPT = TextPrompt(
+        """You have been assigned a retrieval task: {task}
+Your mission is to write one text retrieval example for this task in JSON format. The JSON object must
+contain the following keys:
+  - "user_query": a string, a random user search query specified by the retrieval task.
+  - "positive_document": a string, a relevant document for the user query.
+  - "hard_negative_document": a string, a hard negative document that only appears relevant to the query.
+Please adhere to the following guidelines:
+  - The "user_query" should be {query_type}, {query_length}, {clarity}, and diverse in topic.
+  - All documents must be created independent of the query. Avoid copying the query verbatim.
+It's acceptable if some parts of the "positive_document" are not topically related to the query.
+  - All documents should be at least {num_words} words long.
+  - The "hard_negative_document" contains some useful information, but it should be less useful or comprehensive compared to the "positive_document".
+  - Both the query and documents should be in {language}.
+  - Do not provide any explanation in any document on why it is relevant or not relevant to the query.
+  - Both the query and documents require {difficulty} level education to understand.
+Your output must always be a JSON object only (starting and ending with curly brackets), do not explain yourself or output anything else. Be creative!"""
+    )
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        self.update(
+            {
+                "generate_tasks": self.GENERATE_TASKS,
+                RoleType.ASSISTANT: self.ASSISTANT_PROMPT,
+            }
+        )
diff --git a/camel/prompts/task_prompt_template.py b/camel/prompts/task_prompt_template.py
@@ -24,6 +24,9 @@
 from camel.prompts.evaluation import (
     EvaluationPromptTemplateDict,
 )
+from camel.prompts.generate_text_embedding_data import (
+    GenerateTextEmbeddingDataPromptTemplateDict,
+)
 from camel.prompts.misalignment import MisalignmentPromptTemplateDict
 from camel.prompts.object_recognition import (
     ObjectRecognitionPromptTemplateDict,
@@ -60,6 +63,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
                 TaskType.SOLUTION_EXTRACTION: SolutionExtractionPromptTemplateDict(),  # noqa: E501
                 TaskType.ROLE_DESCRIPTION: RoleDescriptionPromptTemplateDict(),
                 TaskType.OBJECT_RECOGNITION: ObjectRecognitionPromptTemplateDict(),  # noqa: E501
+                TaskType.GENERATE_TEXT_EMBEDDING_DATA: GenerateTextEmbeddingDataPromptTemplateDict(),  # noqa: E501
                 TaskType.VIDEO_DESCRIPTION: DescriptionVideoPromptTemplateDict(),  # noqa: E501
             }
         )
diff --git a/camel/types/enums.py b/camel/types/enums.py
@@ -207,6 +207,7 @@ class TaskType(Enum):
     EVALUATION = "evaluation"
     SOLUTION_EXTRACTION = "solution_extraction"
     ROLE_DESCRIPTION = "role_description"
+    GENERATE_TEXT_EMBEDDING_DATA = "generate_text_embedding_data"
     OBJECT_RECOGNITION = "object_recognition"
     DEFAULT = "default"
     VIDEO_DESCRIPTION = "video_description"