## Setup things

In [1]:
from openai import OpenAI
import re
import httpx
import requests
import xml.etree.ElementTree as ET
import json

In [2]:
from dotenv import load_dotenv
load_dotenv() # OPENAI_API_KEY
client = OpenAI() 

ARXIV_NAMESPACE = '{http://www.w3.org/2005/Atom}'

## Check if OpenAI works

In [3]:
chat_completion = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Hello there!"}]
)
chat_completion.choices[0].message.content

'Hello! How can I assist you today?'

In [4]:
class ChatBot:
    def __init__(self, system=""):
        self.system = system
        self.messages = []
        if self.system:
            self.messages.append({"role": "system", "content": system})
    
    def __call__(self, message):
        self.messages.append({"role": "user", "content": message})
        result = self.run_llm()
        self.messages.append({"role": "assistant", "content": result})
        return result

    def run_llm(self):
        completion = client.chat.completions.create(
                        model="gpt-4o", 
                        temperature=0,
                        messages=self.messages)
        return completion.choices[0].message.content

In [5]:
prompt = """
You run in a loop of Thought, Action, PAUSE, Observation.
At the end of the loop you output an Answer
Use Thought to describe your thoughts about the question you have been asked.
Use Action to run one of the actions available to you - then return PAUSE.
Observation will be the result of running those actions.

Your available actions are:

calculate:
e.g. calculate: 4 * 7 / 3
Runs a calculation and returns the number - uses Python so be sure to use floating point syntax if necessary

wikipedia:
e.g. wikipedia:
Returns a summary from searching Wikipedia

arxiv_search:
e.g. arxiv_search:
Returns a summary of research papers

Example session:

Question: What is the capital of France?
Thought: I should look up France on Wikipedia
Action: wikipedia: France
PAUSE

You will be called again with this:

Observation: France is a country. The capital is Paris.

You then output:

Answer: The capital of France is Paris
""".strip()


In [6]:
# abot = Agent(prompt)
action_re = re.compile('^Action: (\w+): (.*)$')

  action_re = re.compile('^Action: (\w+): (.*)$')


In [30]:
class Agent:
    def __init__(self, system_prompt="", max_turns=1, known_actions=None):
        self.max_turns = max_turns
        self.bot = ChatBot(system_prompt)
        self.known_actions = known_actions
        
    def run(self, question):
        i = 0
        next_prompt = question
        
        while i < self.max_turns:
            i += 1
            result = self.bot(next_prompt)
            print(result)
            actions = [action_re.match(a) for a in result.split('\n') if action_re.match(a)]
            if actions:
                # There is an action to run
                action, action_input = actions[0].groups()
                if action not in self.known_actions:
                    raise Exception("Unknown action: {}: {}".format(action, action_input))
                print(" -- running {} {}".format(action, action_input))
                observation = self.known_actions[action](action_input)
                print("Observation:", observation)
                next_prompt = "Observation: {}".format(observation)
            else:
                return

## Tools / Actions at the disposal of the Agent

In [31]:

def wikipedia(q):
    return httpx.get("https://en.wikipedia.org/w/api.php", params={
        "action": "query",
        "list": "search",
        "srsearch": q,
        "format": "json"
    }).json()["query"]["search"][0]["snippet"]


def arxiv_search(q):
    url = f'http://export.arxiv.org/api/query?search_query=all:{q}&start=0&max_results=1'
    res = requests.get(url)
    et_root = ET.fromstring(res.content)
    for entry in et_root.findall(f"{ARXIV_NAMESPACE}entry"):
        title = entry.find(f"{ARXIV_NAMESPACE}title").text.strip()
        summary = entry.find(f"{ARXIV_NAMESPACE}summary").text.strip()
    return json.dumps({"title" : title, "summary" : summary})


def calculate(what):
    return eval(what)


## Test our Agent

In [32]:
known_actions = {
    "wikipedia": wikipedia,
    "calculate": calculate,
    "arxiv_search": arxiv_search
}
agent = Agent(prompt, max_turns=3, known_actions=known_actions)
agent.run("what is the capital of indonesia?")

Thought: I should look up Indonesia on Wikipedia to find out its capital.
Action: wikipedia: Indonesia
PAUSE
 -- running wikipedia Indonesia
Observation: <span class="searchmatch">Indonesia</span>, officially the Republic of <span class="searchmatch">Indonesia</span>, is a country in Southeast Asia and Oceania, between the Indian and Pacific oceans. Comprising over 17
Answer: The capital of Indonesia is Jakarta.


In [35]:
agent.run("what is the sum of 7 and 5?")

Thought: I need to calculate the sum of 7 and 5.
Action: calculate: 7 + 5
PAUSE
 -- running calculate 7 + 5
Observation: 12
Answer: The sum of 7 and 5 is 12.


In [37]:
agent.run("explain the lightrag paper")

Thought: I should search for the "lightrag paper" to find relevant information about it, as it seems to be a specific research topic or publication.
Action: arxiv_search: lightrag
PAUSE
 -- running arxiv_search lightrag
Observation: {"title": "LightRAG: Simple and Fast Retrieval-Augmented Generation", "summary": "Retrieval-Augmented Generation (RAG) systems enhance large language models\n(LLMs) by integrating external knowledge sources, enabling more accurate and\ncontextually relevant responses tailored to user needs. However, existing RAG\nsystems have significant limitations, including reliance on flat data\nrepresentations and inadequate contextual awareness, which can lead to\nfragmented answers that fail to capture complex inter-dependencies. To address\nthese challenges, we propose LightRAG, which incorporates graph structures into\ntext indexing and retrieval processes. This innovative framework employs a\ndual-level retrieval system that enhances comprehensive information retr

In [39]:
agent = Agent(prompt, max_turns=3, known_actions=known_actions)
agent.run("AI Bites")

Thought: "AI Bites" could refer to a variety of topics, such as a series of short educational pieces on artificial intelligence, a podcast, or a specific concept within AI. To provide a more accurate answer, I should look up "AI Bites" to see if it is a recognized term or series. 
Action: wikipedia: AI Bites
PAUSE
 -- running wikipedia AI Bites
Observation: &quot;Retrieval Augmented Generation(RAG) — A quick and comprehensive introduction&quot;. <span class="searchmatch">ai</span>-<span class="searchmatch">bites</span>.net. Kiela Douwe, Ho Alan (Oct 13, 2023). &quot;Where did Retrieval Augmented
Thought: It seems that "AI Bites" might be related to a website or series that discusses topics in artificial intelligence, such as Retrieval Augmented Generation (RAG). To provide a more comprehensive answer, I should search for more information on "AI Bites" to understand its context and purpose. 
Action: arxiv_search: AI Bites
PAUSE
 -- running arxiv_search AI Bites
Observation: {"title": "A