# Wikipedia-searching agent

Python 3.11

In [None]:
!pip install chromadb instructor wikipedia

In [2]:
import instructor
import pydantic
import wikipedia

In [106]:
# OpenAI setup

import os

from dotenv import load_dotenv
from openai import OpenAI


load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
    raise ValueError("OpenAI API key not found")

client = OpenAI(api_key=openai_api_key)

def openai_request(prompt, system=None, model="gpt-3.5-turbo"):
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system or "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        temperature=0
    )
    return response.choices[0].message.content.strip()

## Calculator
LLMs are challenged by computations because words are represented as embeddings. Observations:
- it is hard to make an LLM produce a consistent format of responses, it may usually stick to the requested format but drift away every once in a while (eg. x minus y -> subtract y from x, which is problematic bacause of the order of operands) (impacted by the temperature)
- problematic order of operands
- commands such as "repeat n times" works fine for n<6, for larger values it produces unreliable results

In [178]:
# Generate execution plan

# question = "How much is fifty five plus eleven minus twenty two?"  # 1. Fifty five plus eleven.\n2. Value from step 1 minus twenty two.
questions = [
    "Calculate a hudred divided by five",
    "Tell me the result for nine times eighteen",
    "How much is fifty five plus eleven minus twenty two?",
    "50 / 5",
    "What is 1 + 9 * 3",
    "Calculate 4 * (5 + 5) / 20",
    "Tell me how much is 3 * (9 - (10 - 9))",
    "I want to know how much is (4+6)/(2*2.5)",
    "Take 140 plus 4 and divide it by two three times, that minus 1"
    ]
system = """You are a helpful instructor. Your goal is to help the user to create a list of steps that solve a mathematical problem. You should not give the solutions of problems.

Execute the following steps to help the user prepare the list of steps:

1. Determine the correct order of operations. Mind the precedence of operators and focus on the position of operands. You can use values obtained in previous steps as input to next setps. The number of steps can be one or more, it should be as low as possible. 

2. Write down the steps. While formulating each step, you should choose a single specific math operation. If the calculation requires using using several operators, list them all as separate steps. 

3. If the values proceed from previous steps, always name the exact step number in which the value was obtained, for example ```value from step 1```.

4. Rephrase the steps to always use operator names such as "five minus two", "ten divided by five" rather than verbs. While rephrasing, it is very important to keep the sense of computation unchanged.

4. Put the steps in separate lines and number them. Only output the steps.

For example, a list of steps for the question "What is 1+10/5?" would be:
1. 10 divided by 5
2. 1 plus value from step 1
"""
prompt = "Create a list of steps according to the instructions to answer the question: ```{question}```"

# response = openai_request(prompt=prompt, system=system)
# for question in questions[-1:]:
#     print(f"Question: {question}")
#     response = openai_request(prompt=prompt.format(question=question), system=system)
#     print(response)

In [81]:
# Extract list of steps and their details with Instructor package

from enum import Enum

from typing import Union
from pydantic import BaseModel


class Step(BaseModel):
    step_number: int

class ResultOfPreviousStep(BaseModel):
    number_of_step: int

class CalculatorOpeartion(str, Enum):
    Add = "Add"
    Subtract = "Subtract"
    Multiply = "Multiply"
    Divide = "Divide"

class CalculatorStep(Step):
    operation: CalculatorOpeartion
    first_value: Union[int, float, ResultOfPreviousStep] # fails at order of values
    second_value: Union[int, float, ResultOfPreviousStep]

STEP_TYPES = [CalculatorStep]

class StepsPlan(BaseModel):
    steps_list: list[Union[*STEP_TYPES]]

instructor_client = instructor.from_openai(OpenAI())

def create_plan(data: str) -> StepsPlan:
    return instructor_client.chat.completions.create(
        model="gpt-3.5-turbo",
        response_model=StepsPlan,
        messages=[
            {
                "role": "user",
                "content": f"Convert the following list of steps to a plan: {data}.",
            },
        ],
    )

plan = create_plan(response)
for step in plan.steps_list:
    print(type(step), step) 
# <class '__main__.CalculatorStep'> step_number=1 operation=<CalculatorOpeartion.Add: 'Add'> first_value=55 second_value=11
# <class '__main__.CalculatorStep'> step_number=2 operation=<CalculatorOpeartion.Subtract: 'Subtract'> first_value=22 second_value=ResultOfPreviousStep(number_of_step=1) # wrong order of values

<class '__main__.CalculatorStep'> step_number=1 operation=<CalculatorOpeartion.Add: 'Add'> first_value=55 second_value=11
<class '__main__.CalculatorStep'> step_number=2 operation=<CalculatorOpeartion.Subtract: 'Subtract'> first_value=ResultOfPreviousStep(number_of_step=1) second_value=22


In [82]:
# Execute the plan with an agent

from abc import ABC, abstractmethod
from dataclasses import dataclass


class Agent(ABC):
    @classmethod
    @abstractmethod
    def execute(cls):
        pass

@dataclass
class CalculatorInput:
    operation: CalculatorOpeartion
    value1: Union[int, float]
    value2: Union[int, float]
    
class CalculatorAgent(Agent):
    @classmethod
    def _calculate(cls, input: CalculatorInput) -> Union[int, float]:
        if input.operation == CalculatorOpeartion.Add:
            return input.value1 + input.value2
        elif input.operation == CalculatorOpeartion.Subtract:
            return input.value1 - input.value2
        elif input.operation == CalculatorOpeartion.Multiply:
            return input.value1 * input.value2
        else:
            return input.value1 / input.value2
    
    @classmethod
    def execute(cls, input: CalculatorInput) -> Union[int, float]:
        return cls._calculate(input)

def execute_plan(plan: StepsPlan) -> dict:
    execution = {}
    for i, step in enumerate(plan.steps_list, 1):
        if isinstance(step, CalculatorStep):
            input = CalculatorInput(
                operation=step.operation,
                value1=execution[step.first_value.number_of_step] if isinstance(step.first_value, ResultOfPreviousStep) else step.first_value,
                value2=execution[step.second_value.number_of_step] if isinstance(step.second_value, ResultOfPreviousStep) else step.second_value,
            )
            result = CalculatorAgent.execute(input)
        execution[i] = result
    return execution

execute_plan(plan)

{1: 66, 2: 44}

In [None]:
# Full loop
for q in questions:
    steps = openai_request(prompt=prompt.format(question=q), system=system)
    plan = create_plan(steps)
    execution = execute_plan(plan)
    print(f"Question: {q}")
    print(f"Steps:\n{steps}")
    print("Plan and execution:")
    for step, v in zip(plan.steps_list, execution.values()):
        print(step, '|', v)
    print('-'*10)

# Question: Calculate a hudred divided by five
# Steps:
# 1. 100 divided by 5
# Plan and execution:
# step_number=1 operation=<CalculatorOpeartion.Divide: 'Divide'> first_value=100 second_value=5 | 20.0
# ----------
# Question: Tell me the result for nine times eighteen
# Steps:
# 1. Nine times eighteen
# Plan and execution:
# step_number=1 operation=<CalculatorOpeartion.Multiply: 'Multiply'> first_value=9 second_value=18 | 162
# ----------
# Question: How much is fifty five plus eleven minus twenty two?
# Steps:
# 1. Fifty five plus eleven
# 2. Twenty two minus value from step 1
# Plan and execution:
# step_number=1 operation=<CalculatorOpeartion.Add: 'Add'> first_value=55 second_value=11 | 66
# step_number=2 operation=<CalculatorOpeartion.Subtract: 'Subtract'> first_value=22 second_value=ResultOfPreviousStep(number_of_step=1) | -44
# ----------
# Question: 50 / 5
# Steps:
# 1. 50 divided by 5
# Plan and execution:
# step_number=1 operation=<CalculatorOpeartion.Divide: 'Divide'> first_value=50 second_value=5 | 10.0
# ----------
# Question: What is 1 + 9 * 3
# Steps:
# 1. 9 multiplied by 3
# 2. 1 plus value from step 1
# Plan and execution:
# step_number=1 operation=<CalculatorOpeartion.Multiply: 'Multiply'> first_value=9 second_value=3 | 27
# step_number=2 operation=<CalculatorOpeartion.Add: 'Add'> first_value=1 second_value=ResultOfPreviousStep(number_of_step=1) | 28
# ----------
# Question: Calculate 4 * (5 + 5) / 20
# Steps:
# 1. 5 plus 5
# 2. 4 times value from step 1
# 3. 20 divided by value from step 2 # WRONG
# Plan and execution:
# step_number=1 operation=<CalculatorOpeartion.Add: 'Add'> first_value=5 second_value=5 | 10
# step_number=2 operation=<CalculatorOpeartion.Multiply: 'Multiply'> first_value=4 second_value=ResultOfPreviousStep(number_of_step=1) | 40
# step_number=3 operation=<CalculatorOpeartion.Divide: 'Divide'> first_value=20 second_value=ResultOfPreviousStep(number_of_step=2) | 0.5
# ----------
# Question: Tell me how much is 3 * (9 - (10 - 9))
# Steps:
# 1. 10 minus 9
# 2. 9 minus value from step 1
# 3. 3 multiplied by value from step 2
# Plan and execution:
# step_number=1 operation=<CalculatorOpeartion.Subtract: 'Subtract'> first_value=10 second_value=9 | 1
# step_number=2 operation=<CalculatorOpeartion.Subtract: 'Subtract'> first_value=9 second_value=ResultOfPreviousStep(number_of_step=1) | 8
# step_number=3 operation=<CalculatorOpeartion.Multiply: 'Multiply'> first_value=3 second_value=ResultOfPreviousStep(number_of_step=2) | 24
# ----------
# Question: I want to know how much is (4+6)/(2*2.5)
# Steps:
# 1. 4 plus 6
# 2. 2 multiplied by 2.5
# 3. value from step 1 divided by value from step 2
# Plan and execution:
# step_number=1 operation=<CalculatorOpeartion.Add: 'Add'> first_value=4 second_value=6 | 10
# step_number=2 operation=<CalculatorOpeartion.Multiply: 'Multiply'> first_value=2 second_value=2.5 | 5.0
# step_number=3 operation=<CalculatorOpeartion.Divide: 'Divide'> first_value=ResultOfPreviousStep(number_of_step=1) second_value=ResultOfPreviousStep(number_of_step=2) | 2.0
# ----------
# Question: Take 140 plus 4 and divide it by two three times, that minus 1
# Steps:
# 1. 140 plus 4
# 2. Divide value from step 1 by 2
# 3. Divide value from step 2 by 2
# 4. Divide value from step 3 by 2
# 5. Value from step 4 minus 1
# Plan and execution:
# step_number=1 operation=<CalculatorOpeartion.Add: 'Add'> first_value=140 second_value=4 | 144
# step_number=2 operation=<CalculatorOpeartion.Divide: 'Divide'> first_value=ResultOfPreviousStep(number_of_step=1) second_value=2 | 72.0
# step_number=3 operation=<CalculatorOpeartion.Divide: 'Divide'> first_value=ResultOfPreviousStep(number_of_step=2) second_value=2 | 36.0
# step_number=4 operation=<CalculatorOpeartion.Divide: 'Divide'> first_value=ResultOfPreviousStep(number_of_step=3) second_value=2 | 18.0
# step_number=5 operation=<CalculatorOpeartion.Subtract: 'Subtract'> first_value=ResultOfPreviousStep(number_of_step=4) second_value=1 | 17.0
# ----------

## Wikipedia

In [None]:
# Generate execution plan

# question = "What is the time difference between Lisbon and Buenos Aires?"
question = "What is the time difference between Lisbon and Buenos Aires?"
system = """You are a helpful assistant that uses its tools to solve problems. 

Help the user to create an algorithm as a list of steps that have to be performed in order to answer to the question. 

While plannninng the steps, you can only use the following tools:
- wikipedia search - you can phrase the step as: search wikipedia page of topic,
- calculator - should name the specific calculation operator, eg. subtract or add. If the values proceed from previous steps, name the step in which the valye was obtained. For example, you can phrase the step as: ```subtract the value from step from the value from step 3.```.

Use specific terms and give concise instructions. The number of steps should be as low as possible. Put the steps in separate lines and number them.
"""
prompt = f"Create a list of steps that have to be performed in order to answer to the question: ```{question}```"

response = openai_request(prompt=prompt, system=system)
response  # '1. Search Wikipedia page for current local time in Lisbon.\n2. Search Wikipedia page for current local time in Buenos Aires.\n3. Calculate the time difference between the two cities by subtracting the time in Lisbon from the time in Buenos Aires.'

In [None]:
# Extract list of steps and their details with Instructor package

from typing import Union
from pydantic import BaseModel

class Step(BaseModel):
    step_number: int

class WikipediaStep(Step):
    wikipedia_page: str
    what_to_find: str

class CalculatorStep(Step):
    operation: str
    value1: str
    value2: str

# STEP_TYPES = [Wikipedia, Calculator]

class StepsPlan(BaseModel):
    steps_list: list[Union[WikipediaStep, CalculatorStep]]

instructor_client = instructor.from_openai(OpenAI())

def create_plan(data: str) -> StepsPlan:
    return instructor_client.chat.completions.create(
        model="gpt-3.5-turbo",
        response_model=StepsPlan,
        messages=[
            {
                "role": "user",
                "content": f"Convert the following list of steps to a plan: {data}",
            },
        ],
    )

classification = list(create_plan(response))
classification
# [('steps_list',
#   [WikipediaStep(step_number=1, wikipedia_page='Lisbon', what_to_find='current local time'),
#    WikipediaStep(step_number=2, wikipedia_page='Buenos Aires', what_to_find='current local time'),
#    CalculatorStep(step_number=3, operation='subtract', value1='Lisbon', value2='Buenos Aires')])]

In [152]:
wiki_step1, wiki_step2, calc_step = classification[0][1]
wiki_step1

WikipediaStep(step_number=1, wikipedia_page='Lisbon', what_to_find='current local time')

In [158]:
# Get wikipedia page and search by item similarity
def get_wikipedia_page_html(wikipedia_query: WikipediaStep) -> str:
    page = wikipedia.page(wikipedia_query.wikipedia_page)
    return page.html()

def create_vector_db(html_content: str) -> str:
    return "Vector database"


page = get_wikipedia_page_html(wiki_step1)

In [None]:
page.html()