Skip to content

Commit

Permalink
Update the branch feature/role-generation from master (#265)
Browse files Browse the repository at this point in the history
Co-authored-by: zhiyu-01 <121875294+zhiyu-01@users.noreply.github.com>
Co-authored-by: Guohao Li <lightaime@gmail.com>
Co-authored-by: MorphlingEd <s1973609@ed.ac.uk>
Co-authored-by: Tianqi Xu <40522713+dandansamax@users.noreply.github.com>
Co-authored-by: Wenxuan Li <55635778+MorphlingEd@users.noreply.github.com>
  • Loading branch information
6 people committed Nov 3, 2023
1 parent c316fc9 commit 5a1da91
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 288 deletions.
256 changes: 3 additions & 253 deletions camel/functions/search_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,9 @@
import os
from typing import Any, Dict, List

import camel.agents
from camel.functions import OpenAIFunction
from camel.messages import BaseMessage
from camel.prompts import TextPrompt
import wikipedia

from .openai_function import OpenAIFunction


def search_wiki(entity: str) -> str:
Expand All @@ -31,7 +30,6 @@ def search_wiki(entity: str) -> str:
string: The search result. If the page corresponding to the entity
exists, return the summary of this entity in a string.
"""
import wikipedia
result: str

try:
Expand All @@ -49,254 +47,6 @@ def search_wiki(entity: str) -> str:
return result


def search_google(query: str) -> List[Dict[str, Any]]:
r"""Use Google search engine to search information for the given query.
Args:
query (string): The query to be searched.
Returns:
List[Dict[str, Any]]: A list of dictionaries where each dictionary
represents a website.
Each dictionary contains the following keys:
- 'result_id': A number in order.
- 'title': The title of the website.
- 'description': A brief description of the website.
- 'long_description': More detail of the website.
- 'url': The URL of the website.
Example:
{
'result_id': 1,
'title': 'OpenAI',
'description': 'An organization focused on ensuring that
artificial general intelligence benefits all of humanity.',
'long_description': 'OpenAI is a non-profit artificial
intelligence research company. Our goal is to advance digital
intelligence in the way that is most likely to benefit humanity
as a whole',
'url': 'https://www.openai.com'
}
title, descrption, url of a website.
"""
import requests

# https://developers.google.com/custom-search/v1/overview
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
# https://cse.google.com/cse/all
SEARCH_ENGINE_ID = os.getenv("SEARCH_ENGINE_ID")

# Using the first page
start_page_idx = 1
# Different language may get different result
search_language = "en"
# How many pages to return
num_result_pages = 10
# Constructing the URL
# Doc: https://developers.google.com/custom-search/v1/using_rest
url = f"https://www.googleapis.com/customsearch/v1?" \
f"key={GOOGLE_API_KEY}&cx={SEARCH_ENGINE_ID}&q={query}&start=" \
f"{start_page_idx}&lr={search_language}&num={num_result_pages}"

responses = []
# Fetch the results given the URL
try:
# Make the get
result = requests.get(url)
data = result.json()

# Get the result items
if "items" in data:
search_items = data.get("items")

# Iterate over 10 results found
for i, search_item in enumerate(search_items, start=1):
if "og:description" in search_item["pagemap"]["metatags"][0]:
long_description = \
search_item["pagemap"]["metatags"][0]["og:description"]
else:
long_description = "N/A"
# Get the page title
title = search_item.get("title")
# Page snippet
snippet = search_item.get("snippet")

# Extract the page url
link = search_item.get("link")
response = {
"result_id": i,
"title": title,
"description": snippet,
"long_description": long_description,
"url": link
}
responses.append(response)
else:
responses.append({"error": "google search failed."})

except requests.RequestException:
responses.append({"error": "google search failed."})

return responses


def text_extract_from_web(url: str) -> str:
r"""Get the text information from given url.
Args:
url (string): The web site you want to search.
Returns:
string: All texts extract from the web.
"""
import requests
from bs4 import BeautifulSoup

try:
# Request the target page
response_text = requests.get(url).text

# Parse the obtained page
soup = BeautifulSoup(response_text, features="html.parser")

for script in soup(["script", "style"]):
script.extract()

text = soup.get_text()
# Strip text
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines
for phrase in line.split(" "))
text = ".".join(chunk for chunk in chunks if chunk)

except requests.RequestException:
text = f"can't access {url}"

return text


# Split a text into smaller chunks of size n
def create_chunks(text: str, n: int) -> List[str]:
r"""Returns successive n-sized chunks from provided text."
Args:
text (string): The text to be split.
n (int): The max length of a single chunk.
Returns:
List[str]: A list of splited texts.
"""

chunks = []
i = 0
while i < len(text):
# Find the nearest end of sentence within a range of 0.5 * n
# and 1.5 * n tokens
j = min(i + int(1.2 * n), len(text))
while j > i + int(0.8 * n):
# Decode the tokens and check for full stop or newline
chunk = text[i:j]
if chunk.endswith(".") or chunk.endswith("\n"):
break
j -= 1
# If no end of sentence found, use n tokens as the chunk size
if j == i + int(0.8 * n):
j = min(i + n, len(text))
chunks.append(text[i:j])
i = j
return chunks


def prompt_single_step_agent(prompt: str) -> str:
"""Prompt a single-step agent to summarize texts or answer a question."""

assistant_sys_msg = BaseMessage.make_assistant_message(
role_name="Assistant",
content="You are a helpful assistant.",
)
agent = camel.agents.ChatAgent(assistant_sys_msg)
agent.reset()

user_msg = BaseMessage.make_user_message(
role_name="User",
content=prompt,
)
assistant_response = agent.step(user_msg)
if assistant_response.msgs is not None:
return assistant_response.msg.content
return ""


def summarize_text(text: str, query: str) -> str:
r"""Summarize the information from the text, base on the query if query is
given.
Args:
text (string): Text to summarise.
query (string): What information you want.
Returns:
string: Strings with information.
"""
summary_prompt = TextPrompt(
'''Gather information from this text that relative to the question, but
do not directly answer the question.\nquestion: {query}\ntext ''')
summary_prompt = summary_prompt.format(query=query)
# Max length of each chunk
max_len = 3000
results = ""
chunks = create_chunks(text, max_len)
# Summarize
for i, chunk in enumerate(chunks, start=1):
prompt = summary_prompt + str(i) + ": " + chunk
result = prompt_single_step_agent(prompt)
results += result + "\n"

# Final summarise
final_prompt = TextPrompt(
'''Here are some summarized texts which split from one text, Using the
information to answer the question: {query}.\n\nText: ''')
final_prompt = final_prompt.format(query=query)
prompt = final_prompt + results

response = prompt_single_step_agent(prompt)

return response


def search_google_and_summarize(query: str) -> str:
r"""Search webs for information. Given a query, this function will use
the Google search engine to search for related information from the
internet, and then return a summarized answer.
Args:
query (string): Question you want to be answered.
Returns:
string: Summarized information from webs.
"""
# Google search will return a list of urls
responses = search_google(query)
for item in responses:
if "url" in item:
url = item.get("url")
# Extract text
text = text_extract_from_web(str(url))
# Using chatgpt summarise text
answer = summarize_text(text, query)

# Let chatgpt decide whether to continue search or not
prompt = TextPrompt(
'''Do you think the answer: {answer} can answer the query:
{query}. Use only 'yes' or 'no' to answer.''')
prompt = prompt.format(answer=answer, query=query)
reply = prompt_single_step_agent(prompt)
if "yes" in str(reply).lower():
return answer

return "Failed to find the answer from google search."


SEARCH_FUNCS: List[OpenAIFunction] = [
OpenAIFunction(func)
for func in [search_wiki, search_google_and_summarize]
Expand Down
1 change: 1 addition & 0 deletions camel/societies/role_playing.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
TaskPlannerAgent,
TaskSpecifyAgent,
)
from camel.agents.chat_agent import ChatAgentResponse
from camel.generators import SystemMessageGenerator
from camel.human import Human
from camel.messages import BaseMessage
Expand Down
5 changes: 0 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,5 @@ module = [
"huggingface_hub",
"huggingface_hub.utils._errors",
"wikipedia",
"jsonschema.*",
"bs4.*",
"docx2txt",
"PyMuPDF",
"fitz"
]
ignore_missing_imports = true
29 changes: 29 additions & 0 deletions test/agents/test_role_playing.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,32 @@ def test_role_playing_step(task_type, extend_sys_msg_meta_dicts,
assert isinstance(response.terminated, bool)
assert response.terminated is False
assert isinstance(response.info, dict)


@pytest.mark.model_backend
def test_role_playing_with_function():
function_list = [*MATH_FUNCS]
assistant_model_config = FunctionCallingConfig.from_openai_function_list(
function_list=function_list)

role_playing = RolePlaying(
assistant_role_name="AI Assistant",
assistant_agent_kwargs=dict(model=ModelType.GPT_3_5_TURBO,
model_config=assistant_model_config,
function_list=function_list),
user_role_name="AI User",
user_agent_kwargs=dict(model=ModelType.GPT_3_5_TURBO),
task_prompt="Perform the task",
task_specify_agent_kwargs=dict(model=ModelType.GPT_3_5_TURBO),
task_type=TaskType.AI_SOCIETY,
)

init_assistant_msg, _ = role_playing.init_chat()
assistant_response, user_response = role_playing.step(init_assistant_msg)
for response in (assistant_response, user_response):
assert isinstance(response.msgs, list)
assert len(response.msgs) == 1
assert isinstance(response.msgs[0], BaseMessage)
assert isinstance(response.terminated, bool)
assert response.terminated is False
assert isinstance(response.info, dict)
32 changes: 2 additions & 30 deletions test/functions/test_search_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
import os

import requests
import wikipedia

from camel.functions.search_functions import (
search_google_and_summarize,
search_wiki,
)
from camel.functions.search_functions import search_wiki


def test_search_wiki_normal():
Expand All @@ -41,28 +35,6 @@ def test_search_wiki_not_found():


def test_search_wiki_with_ambiguity():
expected_output = wikipedia.summary("New York City", sentences=5,
expected_output = wikipedia.summary("New York (state)", sentences=5,
auto_suggest=False)
assert search_wiki("New York") == expected_output


def test_google_api():
# Check the Google search api

# https://developers.google.com/custom-search/v1/overview
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
# https://cse.google.com/cse/all
SEARCH_ENGINE_ID = os.getenv("SEARCH_ENGINE_ID")

url = f"https://www.googleapis.com/customsearch/v1?" \
f"key={GOOGLE_API_KEY}&cx={SEARCH_ENGINE_ID}&q=any"
result = requests.get(url)

assert result.status_code == 200


def test_web_search():
query = "What big things are happening in 2023?"
answer = search_google_and_summarize(query)

assert answer is not None

0 comments on commit 5a1da91

Please sign in to comment.