In [None]:
import ollama
import wikipedia
import json

[m["name"] for m in ollama.list()["models"]]

# Intro

In the reseach world, there is 2 concepts that in my opinion make up what we today call "AI Agent":
1. Chain of Though prompting: https://arxiv.org/abs/2201.11903
2. Reasoning and Acting in Language Models: https://arxiv.org/abs/2210.03629

As this is still very much an open-research topic, there is not one clear defintion of what an Agent or AI Agent is.

I like to think about it the following way:
An AI Agent is a system that can autonomously solve a task by interacting with its environment.

Because that in the software world we can then translate/simplify to:

A Software Program that gets a task in text form as input, and produces a sequence of function/API calls as output.

## Giving hard questions to an LLM

In [None]:
MODEL = 'llama3.1:8b-instruct-q4_0'
TEMPERATURE = 0
TASK = 'How old is the oldest crocodile and what is that number divided by 3.67897?'

In [None]:
response = ollama.chat(model=MODEL, messages=[{"role": "user", "content": TASK}], options={"temperature": TEMPERATURE})
print(response["message"]["content"])

## Function Calling

In [None]:
def wiki_search(query: str) -> str:
    try:
        return wikipedia.summary(query)
    except Exception as e:
        return str(e)

def calculator(problem: str) -> str:
    try:
        terms = problem.split(" ")
        if len(terms) != 3:
            return "Only input 2 terms and one operator e.g. '5 + 3'"
        n1, n2 = float(terms[0]), float(terms[2])
        op = terms[1]
        if op == "+":
            return str(n1 + n2)
        elif op == "-":
            return str(n1 - n2)
        elif op == "*":
            return str(n1 * n2)
        elif op == "/":
            if n2 != 0:
                return str(n1 / n2)
            else:
                return "Can't divide by zero!"
        else:
            return "Can't recognize operator!"
    except Exception as e:
        return str(e)

def finish(result: str) -> str:
    print(result)
    return "Finished"

ACTIONS = {
    wiki_search.__name__: wiki_search,
    calculator.__name__: calculator,
    finish.__name__: finish
}

In [None]:
system_prompt = """
Think step by step.
You can call tools to solve steps.
Only generate one function call at time.

These Tools are available to you:
- wiki_search:
{
    "thought": "The user wants me to search the web to find the president of America",
    "action": "wiki_search",
    "action_input": "President of a America",
}

Always uset this json format to generate a repsonse. NEVER WRITE ANYTHING OUTSIDE.
```json
{
    "thought": "<YOUR THOUGHT>",
    "action": "<YOUR ACTION>",
    "action_input": "<YOUR ACTION INPUT>",
}
```
"""

memory = [{"role": "system", "content": system_prompt}, 
          {'role': 'user',  'content': TASK}]

response = ollama.chat(model=MODEL, messages=memory, options={"temperature": TEMPERATURE})
print(response["message"]["content"])

In [None]:
action = json.loads(response["message"]["content"])
action

In [None]:
result = ACTIONS[action["action"]](action["action_input"])
result

In [None]:
memory = [{"role": "system", "content": result}, 
          {'role': 'user',  'content': TASK}]
response = ollama.chat(model=MODEL, messages=memory, options={"temperature": TEMPERATURE})
print(response["message"]["content"])

## CoT + ReAct (Function Calling in a loop) --> AI Agent

In [None]:
system_prompt = """
You are an AI Agent that uses thought, action, action input and observation to solve a problem.
You get a list of tools available to you. You work in a loop where you generate thoughts and actions + action_inputs. You will get observations from your actions.
Think step by step. Split the problem into a sequence of actions.
Use the finish action once you are done.

Always uset this json format to generate a repsonse. NEVER WRITE ANYTHING OUTSIDE.
```json
{
    "thought": "<YOUR THOUGHT>",
    "action": "<YOUR ACTION>",
    "action_input": "<YOUR ACTION INPUT>"
}
```

These Tools are available to you:
- wiki_search:
{
    "thought": "The user wants me to search the web to find the president of America",
    "action": "wiki_search",
    "action_input": "President of a America"
}
- calculator:
{
    "thought": "The user wants me to calculate the age of the president plus twenty. I have to remember that this tool can only take 2 numbers and an operator as input.",
    "action": "calculator",
    "action_input": "1994 + 20"
}
- finish:
{
    "thought": "I am done with the task",
    "action": "finish",
    "action_input": "The answer to the questions is ..."
}
"""

memory = [{"role": "system", "content": system_prompt + "\n"}, 
          {'role': 'user',  'content': TASK + "\n"}]
action = None

while action != "finish":
    response = ollama.chat(model=MODEL, messages=memory, options={"temperature": TEMPERATURE})
    print(response['message']['content'])
    output = json.loads(response['message']['content'])
    action = output["action"]
    observation = ACTIONS[action](output["action_input"])
    print(observation)
    model_message = {"role": "assistant", "content": response['message']['content'] + "\n"}
    observation_message = {"role": "user", "content": observation + "\n"}
    memory.append(model_message)
    memory.append(observation_message)

### How that looks for the LLM

In [None]:
from transformers import AutoTokenizer
huggingface_model_id = "meta-llama/Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(huggingface_model_id)

In [None]:
print(tokenizer.apply_chat_template(conversation=memory, tokenize=False))