Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add agent example use case to generate query, positive and negative examples #451

Merged
merged 8 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
14 changes: 14 additions & 0 deletions camel/configs/openai_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,19 @@ class ChatGPTConfig(BaseConfig):
(default: :obj:`1.0`)
n (int, optional): How many chat completion choices to generate for
each input message. (default: :obj:`1`)
response_format (object, optional): An object specifying the format
that the model must output. Compatible with GPT-4 Turbo and all
GPT-3.5 Turbo models newer than gpt-3.5-turbo-1106. Setting to
{"type": "json_object"} enables JSON mode, which guarantees the
message the model generates is valid JSON. Important: when using
JSON mode, you must also instruct the model to produce JSON
yourself via a system or user message. Without this, the model
may generate an unending stream of whitespace until the generation
reaches the token limit, resulting in a long-running and seemingly
"stuck" request. Also note that the message content may be
partially cut off if finish_reason="length", which indicates the
generation exceeded max_tokens or the conversation exceeded the
max context length.
stream (bool, optional): If True, partial message deltas will be sent
as data-only server-sent events as they become available.
(default: :obj:`False`)
Expand Down Expand Up @@ -95,6 +108,7 @@ class ChatGPTConfig(BaseConfig):
stop: str | Sequence[str] | NotGiven = NOT_GIVEN
max_tokens: int | NotGiven = NOT_GIVEN
presence_penalty: float = 0.0
response_format: dict | NotGiven = NOT_GIVEN
frequency_penalty: float = 0.0
logit_bias: dict = field(default_factory=dict)
user: str = ""
Expand Down
4 changes: 4 additions & 0 deletions camel/prompts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
from .code import CodePromptTemplateDict
from .descripte_video_prompt import DescriptionVideoPromptTemplateDict
from .evaluation import EvaluationPromptTemplateDict
from .generate_text_embedding_data import (
GenerateTextEmbeddingDataPromptTemplateDict,
)
from .misalignment import MisalignmentPromptTemplateDict
from .object_recognition import ObjectRecognitionPromptTemplateDict
from .prompt_templates import PromptTemplateGenerator
Expand All @@ -37,6 +40,7 @@
'TaskPromptTemplateDict',
'PromptTemplateGenerator',
'SolutionExtractionPromptTemplateDict',
'GenerateTextEmbeddingDataPromptTemplateDict',
'ObjectRecognitionPromptTemplateDict',
'DescriptionVideoPromptTemplateDict',
]
79 changes: 79 additions & 0 deletions camel/prompts/generate_text_embedding_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
from typing import Any

from camel.prompts import TextPrompt, TextPromptDict
from camel.types import RoleType


# flake8: noqa :E501
class GenerateTextEmbeddingDataPromptTemplateDict(TextPromptDict):
r"""A :obj:`TextPrompt` dictionary containing text embedding tasks
generation, query, positive and hard negative samples generation,
from the `"Improving Text Embeddings with Large Language Models"
<https://arxiv.org/abs/2401.00368>`_ paper.


Attributes:
GENERATE_TASKS (TextPrompt): A prompt to generate a list
of :obj:`num_tasks` synthetic text_embedding tasks.
ASSISTANT_PROMPT (TextPrompt): A system prompt for the AI assistant
to generate synthetic :obj:`user_query`, :obj:`positive document`,
and :obj:`hard_negative_document` for a specific :obj:`task` with
specified parameters including :obj:`query_type`,
:obj:`query_length`, :obj:`clarity`, :obj:`num_words`,
:obj:`language` and :obj:`difficulty`.
"""

GENERATE_TASKS = TextPrompt(
"""You are an expert to brainstorm a list of {num_tasks} potentially useful text retrieval tasks
Here are a few examples for your reference:
- Provided a scientific claim as query, retrieve documents that help verify or refute the claim.
- Search for documents that answers a FAQ-style query on children's nutrition.
Please adhere to the following guidelines:
- Specify what the query is, and what the desired documents are.
- Each retrieval task should cover a wide range of queries, and should not be too specific.
Your output should always be a python list of strings starting with `1.`, `2.` etc.
And each element corresponds to a distinct retrieval task in one sentence.
Do not explain yourself or output anything else.
Be creative!"""
)

ASSISTANT_PROMPT = TextPrompt(
"""You have been assigned a retrieval task: {task}
Your mission is to write one text retrieval example for this task in JSON format. The JSON object must
contain the following keys:
- "user_query": a string, a random user search query specified by the retrieval task.
- "positive_document": a string, a relevant document for the user query.
- "hard_negative_document": a string, a hard negative document that only appears relevant to the query.
Please adhere to the following guidelines:
- The "user_query" should be {query_type}, {query_length}, {clarity}, and diverse in topic.
- All documents must be created independent of the query. Avoid copying the query verbatim.
It's acceptable if some parts of the "positive_document" are not topically related to the query.
- All documents should be at least {num_words} words long.
- The "hard_negative_document" contains some useful information, but it should be less useful or comprehensive compared to the "positive_document".
- Both the query and documents should be in {language}.
- Do not provide any explanation in any document on why it is relevant or not relevant to the query.
- Both the query and documents require {difficulty} level education to understand.
Your output must always be a JSON object only (starting and ending with curly brackets), do not explain yourself or output anything else. Be creative!"""
)

def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
self.update(
{
"generate_tasks": self.GENERATE_TASKS,
RoleType.ASSISTANT: self.ASSISTANT_PROMPT,
}
)
4 changes: 4 additions & 0 deletions camel/prompts/task_prompt_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
from camel.prompts.evaluation import (
EvaluationPromptTemplateDict,
)
from camel.prompts.generate_text_embedding_data import (
GenerateTextEmbeddingDataPromptTemplateDict,
)
from camel.prompts.misalignment import MisalignmentPromptTemplateDict
from camel.prompts.object_recognition import (
ObjectRecognitionPromptTemplateDict,
Expand Down Expand Up @@ -60,6 +63,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
TaskType.SOLUTION_EXTRACTION: SolutionExtractionPromptTemplateDict(), # noqa: E501
TaskType.ROLE_DESCRIPTION: RoleDescriptionPromptTemplateDict(),
TaskType.OBJECT_RECOGNITION: ObjectRecognitionPromptTemplateDict(), # noqa: E501
TaskType.GENERATE_TEXT_EMBEDDING_DATA: GenerateTextEmbeddingDataPromptTemplateDict(), # noqa: E501
TaskType.VIDEO_DESCRIPTION: DescriptionVideoPromptTemplateDict(), # noqa: E501
}
)
1 change: 1 addition & 0 deletions camel/types/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ class TaskType(Enum):
EVALUATION = "evaluation"
SOLUTION_EXTRACTION = "solution_extraction"
ROLE_DESCRIPTION = "role_description"
GENERATE_TEXT_EMBEDDING_DATA = "generate_text_embedding_data"
OBJECT_RECOGNITION = "object_recognition"
DEFAULT = "default"
VIDEO_DESCRIPTION = "video_description"
Expand Down