In [219]:
from openai import OpenAI, AssistantEventHandler
from langchain.retrievers import WikipediaRetriever
from langchain.tools import DuckDuckGoSearchResults
from langchain.document_loaders import WebBaseLoader
import json
import os

def search_wiki(inputs):
    topic = inputs["term"]
    retriever = WikipediaRetriever(
        top_k_result=4,
    )
    docs = retriever.invoke(topic)
    return "\n".join([doc.page_content for doc in docs])

def search_duckduckgo(inputs):
    # print("search_duckduckgo")
    query = inputs["query"]
    search = DuckDuckGoSearchResults()
    str = search.invoke(query)
    find_s_string = "link: https://"
    find_e_string = "]"
    del_string = "link: "
    s_index = str.find(find_s_string)
    e_index = 0
    urls = []
    while s_index != -1:
        s_index += e_index
        e_index = str[s_index:].find(find_e_string) + s_index
        urls.append(str[s_index + len(del_string):e_index])
        s_index = str[e_index:].find(find_s_string)
    return json.dumps({"urls":"\n".join(urls)})

def web_scrapping(inputs):
    urls = inputs["urls"]
    urls_list = urls.split("|")
    if len(urls_list) >= 2:
        loader = WebBaseLoader(urls_list[:2])
    else:
        loader = WebBaseLoader(urls_list)
    docs = loader.load()
    return "\n\n".join([doc.page_content.replace("\n", " ").replace("  ", " ") for doc in docs])

def save_content_to_txt(inputs):
    content = inputs["content"]
    filename = inputs["filename"]
    folder_dir = "./.cache/agent"
    os.makedirs(folder_dir, exist_ok=True)
    with open(f"{folder_dir}/{filename}.txt", "w", encoding="utf-8") as file:
        file.write(content)
    return inputs["content"]

functions_map = {
    "search_wiki":search_wiki,
    "search_duckduckgo":search_duckduckgo,
    "web_scrapping":web_scrapping,
    "save_content_to_txt":save_content_to_txt,
}

functions = [
    {
        "type":"function",
        "function":{
            "name":"search_wiki",
            "description":"Receives a single term, searches Wikipedia, and returns information about that term.",
            "parameters":{
                "type":"object",
                "properties":{
                    "term":{
                        "type":"string",
                        "description":"The term you will search for on Wikipedia."
                    },
                },
                "required":["term"],
            }
        }
    },
    {
        "type":"function",
        "function":{
            "name":"search_duckduckgo",
            "description":"It receives a query, searches the input content using DuckDuckGo, and returns list of the website address(url)",
            "parameters":{
                "type":"object",
                "properties":{
                    "query":{
                        "type":"string",
                        "description":"The query you will search for. Example query: Research about the XZ backdoor",
                    },
                },
                "required":["query"],
            }
        }
    },
    {
        "type":"function",
        "function":{
            "name":"web_scrapping",
            "description":"""This function accesses the specified URL and extracts information from them.
            This function receives URL list, scrapes the information from those URL, and returns web information.""",
            "parameters":{
                "type":"object",
                "properties":{
                    "urls":{
                        "type":"string",
                        "description":"url list to extracts web information. Example url list: 'https://en.wikipedia.org/wiki/URL|https://naver.com|https://daum.net'",
                    },
                },
                "required":["urls"],
            }
        }
    },
    {
        "type":"function",
        "function":{
            "name":"save_content_to_txt",
            "description":"This tool receives content and a filename and saves them as a text file.",
            "parameters":{
                "type":"object",
                "properties":{
                    "content":{
                        "type":"string",
                        "description":"Content to be saved as a text file.",
                    },
                    "filename":{
                        "type":"string",
                        "description":"Filename to save the content. A single word that contains the content.",
                    },
                },
                "required":["content", "filename"],
            }
        }
    }
]

In [117]:
client = OpenAI()
assistant = client.beta.assistants.create(
    name="Research Assistant",
    instructions="""
You are a research expert.
When you get a research request, decide between Wikipedia and DuckDuckGo.
Perform the search using the better option.
If one source lacks information, gather more from the other.
With DuckDuckGo, obtain the website's URL.
Use the URL to extract information from the site.
Write all collected content.
And save all collected content as a text file.
Name the file with the key term.
""",
    model="gpt-4o-mini",
    tools=functions,
)

In [118]:
assistant
assistant_id=assistant.id

In [186]:
thread = client.beta.threads.create(
    # messages=[
    #     {
    #         "role":"user",
    #         "content":"Research about the XZ backdoor",
    #     }
    # ]
)

In [187]:
thread
thread_id = thread.id

In [None]:
run = client.beta.threads.runs.create(
    thread_id=thread_id,
    assistant_id=assistant_id,
)
run_id = run.id
run

In [257]:
def get_run(run_id, thread_id):
    return client.beta.threads.runs.retrieve(
        run_id=run_id,
        thread_id=thread_id,
    )

def get_messages(thread_id):
    messages = client.beta.threads.messages.list(
        thread_id=thread_id,
    )
    messages = list(messages)
    messages.reverse()
    for message in messages:
        print(f"{message.role}:{message.content[0].text.value}")

def add_message(thread_id, content):
    return client.beta.threads.messages.create(
        thread_id=thread_id,
        role="user",
        content=content
    )

def get_tool_outputs(run_id, thread_id):
    run = get_run(run_id, thread_id)
    print("========run========\n", run, "\n===========================")
    if run.status == 'requires_action':
        outputs = []
        for action in run.required_action.submit_tool_outputs.tool_calls:
            action_id=action.id
            function = action.function
            # print(f"Calling function: {function.name} with args {function.arguments}")
            output = functions_map[function.name](json.loads(function.arguments))
            outputs.append({
                "output":output,
                "tool_call_id": action_id,
            })
    else:
        # expired될 경우를 생각해야함. 이 때는, 다시 message를 보내라고 얘기해줘야함.
        pass
    return outputs

def submit_tool_outputs(run_id, thread_id):
    output = get_tool_outputs(run_id, thread_id)
    client.beta.threads.runs.submit_tool_outputs(
        run_id=run_id,
        thread_id=thread_id,
        tool_outputs=output,
    )
    return output

In [258]:
get_run(run_id, thread_id).status

'requires_action'

In [253]:
get_messages(thread_id)

user:Research about the XZ backdoor
assistant:I have completed the research on the XZ backdoor. Here's a summary of the findings:

### XZ Utils Backdoor Overview

#### Background
In February 2024, a malicious backdoor was introduced to the Linux utility xz within the liblzma library in versions 5.6.0 and 5.6.1 by an account using the name "Jia Tan". This backdoor allows attackers with a specific Ed448 private key to have remote code execution capabilities on affected Linux systems. The issue was assigned a Common Vulnerabilities and Exposures (CVE) number, CVE-2024-3094, and received a CVSS score of 10.0, indicating a critical vulnerability.

While xz is commonly present in most Linux distributions, it was noted that the compromised version had not yet been widely deployed to production systems, although it was found in development versions of major distributions. The discovery was made by the software developer Andres Freund, who reported it on March 29, 2024.

Freund's investigation 

In [None]:
add_message(thread_id, "Research about the Onepiece")

In [None]:
print(get_run(run_id, thread_id))

In [259]:
submit_tool_outputs(run_id, thread_id)

 Run(id='run_1kw4nMZIzw6b4DskD3UsJBAk', assistant_id='asst_85obwClXjFOH4vFg5oX3lT7F', cancelled_at=None, completed_at=None, created_at=1726406183, expires_at=1726406783, failed_at=None, incomplete_details=None, instructions="\nYou are a research expert.\nWhen you get a research request, decide between Wikipedia and DuckDuckGo.\nPerform the search using the better option.\nIf one source lacks information, gather more from the other.\nWith DuckDuckGo, obtain the website's URL.\nUse the URL to extract information from the site.\nWrite all collected content.\nAnd save all collected content as a text file.\nName the file with the key term.\n", last_error=None, max_completion_tokens=None, max_prompt_tokens=None, metadata={}, model='gpt-4o-mini', object='thread.run', parallel_tool_calls=True, required_action=RequiredAction(submit_tool_outputs=RequiredActionSubmitToolOutputs(tool_calls=[RequiredActionFunctionToolCall(id='call_Kf3l6Yq0QlNQehicgEhGrHGS', function=Function(arguments='{"content":"

TypeError: list indices must be integers or slices, not str