# Wikipedia-searching agent

In [None]:
!pip install chromadb wikipedia

In [1]:
import instructor
import pydantic
import wikipedia

In [10]:
# OpenAI setup

import os

from dotenv import load_dotenv
from openai import OpenAI


load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
    raise ValueError("OpenAI API key not found")

client = OpenAI(api_key=openai_api_key)

def openai_request(prompt, system=None, model="gpt-3.5-turbo"):
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system or "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content.strip()

In [117]:
# Generate execution plan

question = "What is the time difference between Lisbon and Buenos Aires?"
system = """You are a helpful assistant that uses its tools to solve problems. 

Help the user to create an algorithm as a list of steps that have to be performed in order to answer to the question. 

While plannninng the steps, you can only use the following tools:
- wikipedia search - you can phrase the step as: search wikipedia page of topic,
- calculator - should name the specific calculation operator, eg. subtract or add. If the values proceed from previous steps, name the step in which the valye was obtained. For example, you can phrase the step as: ```subtract the value from step from the value from step 3.```.

Use specific terms and give concise instructions. The number of steps should be as low as possible. Put the steps in separate lines and number them.
"""
prompt = f"Create a list of steps that have to be performed in order to answer to the question: ```{question}```"

response = openai_request(prompt=prompt, system=system)
response  # '1. Search Wikipedia page for current local time in Lisbon.\n2. Search Wikipedia page for current local time in Buenos Aires.\n3. Calculate the time difference between the two cities by subtracting the time in Lisbon from the time in Buenos Aires.'

'1. Search Wikipedia page for current local time in Lisbon.\n2. Search Wikipedia page for current local time in Buenos Aires.\n3. Calculate the time difference between the two cities by subtracting the time in Lisbon from the time in Buenos Aires.'

In [142]:
# Extract list of steps and their details with Instructor package

from typing import Union
from pydantic import BaseModel


class Step(BaseModel):
    step_number: int

class WikipediaStep(Step):
    wikipedia_page: str
    what_to_find: str

class CalculatorStep(Step):
    operation: str
    value1: str
    value2: str

# STEP_TYPES = [Wikipedia, Calculator]

class StepsPlan(BaseModel):
    steps_list: list[Union[WikipediaStep, CalculatorStep]]

instructor_client = instructor.from_openai(OpenAI())

def create_plan(data: str) -> StepsPlan:
    return instructor_client.chat.completions.create(
        model="gpt-3.5-turbo",
        response_model=StepsPlan,
        messages=[
            {
                "role": "user",
                "content": f"Convert the following list of steps to a plan: {data}",
            },
        ],
    )

classification = list(create_plan(response))
classification
# [('steps_list',
#   [WikipediaStep(step_number=1, wikipedia_page='Lisbon', what_to_find='current local time'),
#    WikipediaStep(step_number=2, wikipedia_page='Buenos Aires', what_to_find='current local time'),
#    CalculatorStep(step_number=3, operation='subtract', value1='Lisbon', value2='Buenos Aires')])]

[('steps_list',
  [WikipediaStep(step_number=1, wikipedia_page='Lisbon', what_to_find='current local time'),
   WikipediaStep(step_number=2, wikipedia_page='Buenos Aires', what_to_find='current local time'),
   CalculatorStep(step_number=3, operation='subtract', value1='Lisbon', value2='Buenos Aires')])]

In [152]:
wiki_step1, wiki_step2, calc_step = classification[0][1]
wiki_step1

WikipediaStep(step_number=1, wikipedia_page='Lisbon', what_to_find='current local time')

In [158]:
# Get wikipedia page and search by item similarity
def get_wikipedia_page_html(wikipedia_query: WikipediaStep) -> str:
    page = wikipedia.page(wikipedia_query.wikipedia_page)
    return page.html()

def create_vector_db(html_content: str) -> str:
    return "Vector database"


page = get_wikipedia_page_html(wiki_step1)

In [168]:
page.html()

'<div class="mw-content-ltr mw-parser-output" lang="en" dir="ltr"><div class="shortdescription nomobile noexcerpt noprint searchaux" style="display:none">Capital and largest city of Portugal</div>\n<style data-mw-deduplicate="TemplateStyles:r1236090951">.mw-parser-output .hatnote{font-style:italic}.mw-parser-output div.hatnote{padding-left:1.6em;margin-bottom:0.5em}.mw-parser-output .hatnote i{font-style:normal}.mw-parser-output .hatnote+link+.hatnote{margin-top:-0.5em}@media print{body.ns-0 .mw-parser-output .hatnote{display:none!important}}</style><div role="note" class="hatnote navigation-not-searchable">This article is about the Governmental capital city. For other uses, see <a href="/wiki/Lisbon_(disambiguation)" class="mw-disambig" title="Lisbon (disambiguation)">Lisbon (disambiguation)</a>.</div>\n<link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1236090951"><div role="note" class="hatnote navigation-not-searchable">"Lisboa" redirects here. For other uses, s