Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add web search #274

Merged
merged 35 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
fd98180
add web search
zhiyu-01 Sep 3, 2023
a19aba2
update
zhiyu-01 Sep 3, 2023
ce8a4a6
update
zhiyu-01 Sep 5, 2023
1ce288b
Merge branch 'master' into function
zhiyu-01 Sep 5, 2023
c593ad3
Merge branch 'master' into function
zhiyu-01 Sep 5, 2023
7f8450d
Update web_search.py
zhiyu-01 Sep 7, 2023
5193c01
Merge branch 'master' into function
zhiyu-01 Sep 9, 2023
1f75055
Merge branch 'master' into function
zhiyu-01 Sep 11, 2023
591fa47
update
zhiyu-01 Sep 11, 2023
a882e26
Merge branch 'function' of https://github.com/camel-ai/camel into fun…
zhiyu-01 Sep 11, 2023
5f16411
Update web_search.py
zhiyu-01 Sep 11, 2023
b224c1d
Merge branch 'master' into function
zhiyu-01 Sep 13, 2023
9ecb3e4
Update camel/functions/web_search.py
zhiyu-01 Sep 14, 2023
51d9ca2
Update camel/functions/web_search.py
zhiyu-01 Sep 14, 2023
1cf40ff
Update camel/functions/web_search.py
zhiyu-01 Sep 14, 2023
f3f65dc
Update camel/functions/web_search.py
zhiyu-01 Sep 14, 2023
e8ba969
update
zhiyu-01 Sep 14, 2023
48515fc
update
zhiyu-01 Sep 14, 2023
eb9521c
update
zhiyu-01 Sep 14, 2023
f48f46c
Merge branch 'master' into function
zhiyu-01 Sep 18, 2023
25590ba
update
zhiyu-01 Sep 18, 2023
6444d85
Merge branch 'function' of https://github.com/camel-ai/camel into fun…
zhiyu-01 Sep 18, 2023
13a3bb8
update
zhiyu-01 Sep 18, 2023
70c6ef6
update
zhiyu-01 Sep 19, 2023
685e7d4
update
zhiyu-01 Sep 20, 2023
a7d8dbe
Update search_functions.py
zhiyu-01 Sep 20, 2023
a9435b2
update
zhiyu-01 Sep 20, 2023
5c826ba
Update chat_agent.py
zhiyu-01 Sep 20, 2023
db92799
update
zhiyu-01 Sep 20, 2023
4fc33f7
Merge branch 'master' into function
zhiyu-01 Sep 24, 2023
84f032c
update
zhiyu-01 Sep 24, 2023
a14213f
Merge branch 'function' of https://github.com/camel-ai/camel into fun…
zhiyu-01 Sep 24, 2023
3e2514c
Update test_role_playing.py
zhiyu-01 Sep 24, 2023
2f0dc19
update
zhiyu-01 Sep 27, 2023
6d9b1eb
Merge branch 'master' into function
lightaime Oct 5, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions camel/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
from .openai_function import OpenAIFunction
from .math_functions import MATH_FUNCS
from .search_functions import SEARCH_FUNCS
from .web_search import WEB_FUNCS

__all__ = [
'OpenAIFunction',
'MATH_FUNCS',
'SEARCH_FUNCS',
'WEB_FUNCS',
zhiyu-01 marked this conversation as resolved.
Show resolved Hide resolved
]
243 changes: 243 additions & 0 deletions camel/functions/web_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
import os
from typing import Any, Dict, List

import requests
from bs4 import BeautifulSoup

import camel.agents
from camel.messages import BaseMessage

from .openai_function import OpenAIFunction
zhiyu-01 marked this conversation as resolved.
Show resolved Hide resolved


def search_google(query: str) -> List[Dict[str, Any]]:
r"""using google search engine to search information for the given query.
zhiyu-01 marked this conversation as resolved.
Show resolved Hide resolved

Args:
query (string): what question to search.
zhiyu-01 marked this conversation as resolved.
Show resolved Hide resolved

Returns:
List: a list of web information, include title, descrption, url.
zhiyu-01 marked this conversation as resolved.
Show resolved Hide resolved
"""
# https://developers.google.com/custom-search/v1/overview
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
# https://cse.google.com/cse/all
SEARCH_ENGINE_ID = os.getenv("SEARCH_ENGINE_ID")

# using the first page
zhiyu-01 marked this conversation as resolved.
Show resolved Hide resolved
start = 1
# different language may get different result
language = "en"
# how many pages to return
numbers = 10
# constructing the URL
# doc: https://developers.google.com/custom-search/v1/using_rest
url = f"https://www.googleapis.com/customsearch/v1?" \
f"key={GOOGLE_API_KEY}&cx={SEARCH_ENGINE_ID}&q={query}&start=" \
f"{start}&lr={language}&num={numbers}"

responses = []
# make the get
zhiyu-01 marked this conversation as resolved.
Show resolved Hide resolved
try:
result = requests.get(url)
data = result.json()

# get the result items
search_items = data.get("items")

# iterate over 10 results found
for i, search_item in enumerate(search_items, start=1):
try:
zhiyu-01 marked this conversation as resolved.
Show resolved Hide resolved
long_description = \
search_item["pagemap"]["metatags"][0]["og:description"]
except KeyError:
long_description = "N/A"
# get the page title
title = search_item.get("title")
# page snippet
snippet = search_item.get("snippet")

# extract the page url
link = search_item.get("link")
response = {
"Result_id": i,
"Title": title,
"Description": snippet,
"Long_description": long_description,
"URL": link
}
responses.append(response)

except requests.RequestException:
responses.append({"erro": "google search failed."})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
responses.append({"erro": "google search failed."})
responses.append({"error": "google search failed."})


return responses


def text_extract_from_web(url: str) -> str:
r"""Get the text information from given url.

Args:
url (string): The web site you want to search.

Returns:
string: All texts extract from the web.
"""
try:
# request the target page
response_text = requests.get(url).text

# parse the obtained page
soup = BeautifulSoup(response_text, features="html.parser")

for script in soup(["script", "style"]):
script.extract()

text = soup.get_text()
# strip text
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines
for phrase in line.split(" "))
text = ".".join(chunk for chunk in chunks if chunk)

except requests.RequestException:
text = f"can't access {url}"

return text


# Split a text into smaller chunks of size n
def create_chunks(text: str, n: int) -> List[str]:
r"""Returns successive n-sized chunks from provided text."

Args:
text: what need to be cut.
zhiyu-01 marked this conversation as resolved.
Show resolved Hide resolved
n: max length of chunk.
zhiyu-01 marked this conversation as resolved.
Show resolved Hide resolved

Returns:
List[str]: a list of chunks
zhiyu-01 marked this conversation as resolved.
Show resolved Hide resolved
"""

chunks = []
i = 0
while i < len(text):
# Find the nearest end of sentence within a range of 0.5 * n
# and 1.5 * n tokens
j = min(i + int(1.2 * n), len(text))
while j > i + int(0.8 * n):
# Decode the tokens and check for full stop or newline
chunk = text[i:j]
if chunk.endswith(".") or chunk.endswith("\n"):
break
j -= 1
# If no end of sentence found, use n tokens as the chunk size
if j == i + int(0.8 * n):
j = min(i + n, len(text))
chunks.append(text[i:j])
i = j
return chunks


def single_step_agent(prompt: str) -> str:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def single_step_agent(prompt: str) -> str:
def single_step_agent(prompt: TextPrompt) -> str:

zhiyu-01 marked this conversation as resolved.
Show resolved Hide resolved
"""single step agent."""

assistant_sys_msg = BaseMessage.make_assistant_message(
role_name="Assistant",
content="You are a helpful assistant.",
)
agent = camel.agents.ChatAgent(assistant_sys_msg)
agent.reset()

user_msg = BaseMessage.make_user_message(
role_name="User",
content=prompt,
)
assistant_response = agent.step(user_msg)
if assistant_response.msgs is not None:
return assistant_response.msg.content
return ""


def summarise_text(text: str, query: str) -> str:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Prefer American spelling for consistency:

Suggested change
def summarise_text(text: str, query: str) -> str:
def summarize_text(text: str, query: str) -> str:

r"""Summarise the information from the text, base on the query if query is
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
r"""Summarise the information from the text, base on the query if query is
r"""Summarize the information from the text based on the query if the query is

given.

Args:
text (string): text to summarise.
query (string): what information you want.

Returns:
string: Strings with information.
"""
summary_prompt = f"Gather information from this text that relative to " \
f"the question, but do not directly answer " \
f"the question.\nquestion: {query}\ntext "
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Prefer to use TextPrompt and format them explicitly.

# max length of each chunk
max_len = 3000
results = ""
chunks = create_chunks(text, max_len)
# summarise
for i, chunk in enumerate(chunks, start=1):
prompt = summary_prompt + str(i) + ": " + chunk
result = single_step_agent(prompt)
results += result + "\n"

# final summarise
final_prompt = f"Here are some summarised texts which split from one " \
f"text, Using the information to " \
f"answer the question: {query}.\n\nText: "
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Prefer to use TextPrompt and format them explicitly.

prompt = final_prompt + results

response = single_step_agent(prompt)

return response


def search_web(query: str) -> str:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The naming is a bit confusing. How about

Suggested change
def search_web(query: str) -> str:
def search_google_and_summarize(query: str) -> str:

r"""search webs for information.

Args:
query (string): question you want to be answered.

Returns:
string: Summarised information from webs.
"""
# google search will return a list of urls
result = search_google(query)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
result = search_google(query)
responses = search_google(query)

answer: str = ""
for item in result:
url = item.get("URL")
# extract text
text = text_extract_from_web(str(url))
# using chatgpt summarise text
answer = summarise_text(text, query)

# let chatgpt decide whether to continue search or not
prompt = f"Do you think the answer: {answer} can answer the query: " \
f"{query}. Use only 'yes' or 'no' to answer."
zhiyu-01 marked this conversation as resolved.
Show resolved Hide resolved
zhiyu-01 marked this conversation as resolved.
Show resolved Hide resolved
# add the source
answer += f"\nFrom: {url}"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Should we also add the title for the source info?
  2. This can be moved after the reply from the single_step_agent.
Suggested change
answer += f"\nFrom: {url}"
answer += f"\nFrom: {url}"


reply = single_step_agent(prompt)
if "yes" in str(reply).lower():
break

return answer
zhiyu-01 marked this conversation as resolved.
Show resolved Hide resolved


WEB_FUNCS: List[OpenAIFunction] = [
OpenAIFunction(func) for func in [search_web, search_google]
]
4 changes: 2 additions & 2 deletions examples/function_call/role_playing_with_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from camel.agents.chat_agent import FunctionCallingRecord
from camel.configs import ChatGPTConfig, FunctionCallingConfig
from camel.functions import MATH_FUNCS, SEARCH_FUNCS
from camel.functions import MATH_FUNCS, SEARCH_FUNCS, WEB_FUNCS
from camel.societies import RolePlaying
from camel.typing import ModelType
from camel.utils import print_text_animated
Expand All @@ -29,7 +29,7 @@ def main(model_type=ModelType.GPT_4) -> None:

user_model_config = ChatGPTConfig(temperature=0.0)

function_list = [*MATH_FUNCS, *SEARCH_FUNCS]
function_list = [*WEB_FUNCS, *MATH_FUNCS, *SEARCH_FUNCS]
assistant_model_config = FunctionCallingConfig.from_openai_function_list(
function_list=function_list,
kwargs=dict(temperature=0.0),
Expand Down
29 changes: 29 additions & 0 deletions test/functions/test_web_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
import requests


def test_google_api():
# check the google search api

# https://developers.google.com/custom-search/v1/overview
GOOGLE_API_KEY = "AIzaSyAFATycX7C9SgqpeL5ciCZ7dFBsqIqLhtY"
zhiyu-01 marked this conversation as resolved.
Show resolved Hide resolved
# https://cse.google.com/cse/all
SEARCH_ENGINE_ID = "50393d7ebc1ef4bf9"

url = f"https://www.googleapis.com/customsearch/v1?" \
f"key={GOOGLE_API_KEY}&cx={SEARCH_ENGINE_ID}&q=any"
result = requests.get(url)

assert result.status_code == 200