In [1]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

True

In [2]:
import os
import openai
from grammarflow import *
from pydantic import BaseModel, Field
import random
import time

In [3]:
class LLM:
    def __init__(self):
        self.client = openai.OpenAI(
            api_key=os.environ["OPENAI_API_KEY"],
        )

    def invoke(self, config: dict):
        with PromptContextManager(config) as filled_prompt:
            return self.request(filled_prompt, temperature=0.01)

    def __call__(self, prompt, temperature=0.2, context=None):
        response = self.client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            temperature=temperature,
        )
        return response.choices[0].message.content


llm = LLM()

In [4]:
class Step(BaseModel):
    thought: str = Field(..., description="Concisely describe the ")
    action: str = Field(..., description="You only have 3 options: search | lookup | finish")
    action_input: str = Field(..., description="Your input to the above action.")

In [5]:
def check_previous_interaction(id_): return id_ > 1

def make_prompt(): 
  prompt = PromptBuilder() 
  prompt.add_section(
    text="""
  Goal: {question}

  Your goal is to solve the above QA task in steps. First, think about what information you need to answer the question. 
  Use `search` to get the information you need. This needs a single keyword or noun as input. If it can't find anything, itll give alternate keywords. You can find them in your thinking history below, use them to search again. 
  Use `lookup` to find more information from the paragraph returned by search. This matches the action_input you give to setences in search. 
  Use `finish` to return your final complete answer as input field. Use this if you believe you have enough information to completely answer the task.""",
    placeholders=["question"]
  ) 
  prompt.add_section(
    define_grammar=True
  ) 
  prompt.add_section(
    text="\nExample thinking process:{example}\n", 
    placeholders=["example"]
  )
  prompt.add_section(
    text="Create the next Step using the information available you to below.\n",
  )
  prompt.add_section(
    text="DO NOT REPEAT THOUGHTS OR ACTIONS FROM YOUR PAST. ENSURE EACH STEP IS UNIQUE. Below is the history of your thinking process and corresponding observations.\n{history}\n",
    placeholders=["history"],
    enable_on=check_previous_interaction
  ) 
  return prompt 

In [6]:
import wikienv, wrappers
env = wikienv.WikiEnv()
env = wrappers.HotPotQAWrapper(env, split="dev")
env = wrappers.LoggingWrapper(env)
example = wrappers.EXAMPLE

def step(env, action):
    attempts = 0
    while attempts < 10:
        try:
            return env.step(action)
        except requests.exceptions.Timeout:
            attempts += 1

In [7]:
def webthink(idx=None, env=env, to_print=False): 
  
  def log(st, st_=None): 
    if to_print: 
      if st_: print(st, st_)
      else: print(st)

  # Initializing Stateful vars
  thought = None 
  action = None
  observation = None
  id_ = 1
  history, history_ = {}, None

  # Initializing question
  question = env.reset(idx=idx)
  print('Question: ', question)

  for i in range(10):
    if history:
      history_ = "\n".join([f"Thought {id_}: {value['thought']}\nAction {id_}: {value['action']}\nAction_Input {id_}: {value['action_input']}\nObservation {id_}: {value['observation']}" for id_, value in history.items()])

    with Constrain(make_prompt()) as manager: 
      manager.set_config(
        format='xml'
      ) 
      manager.format_prompt(
        placeholders={ 
          "question": question,
          'example': example, 
          "history": history_
        }, 
        grammars=[{
          'description': 'Your thinking state:', 
          'model': Step
        }], 
        enable_on={
          'id_':id_ 
        }
      ) 
      
      response = llm(manager.prompt, temperature=0.01)
      log(response)
      response = manager.parse(response)  
      log('---------------')

      thought = response.Step.thought 
      action = response.Step.action
      action_input = response.Step.action_input

    observation, r, done, info = step(env, f"{action}[{action_input}]")
    observation = observation.replace("\\n", " ")

    log('--------------------------')
    log('---------------')
    log('Prompt:', manager.prompt)
    log('---------------')
    log('---------------')
    print('Step {}'.format(id_))
    log('Thought:', thought)
    log('Action:', action)
    log('Action Input:', action_input)
    log('Observation:', observation)
    log('--------------------------')
    
    history[id_] = { 
      "thought": thought, 
      "action": action, 
      "action_input": action_input, 
      "observation": observation
    }
    id_ += 1
    if done: 
      final = action_input
      break 

  if not done:
      final = "Failed"
      observation, r, done, info = step(env, "finish[]")

  return final, info['gt_answer'], id_

In [9]:
idxs = list(range(7405))
random.Random(233).shuffle(idxs)

errors = []
logs = {} 
old_time = time.time()
for i in idxs[:5]:
    try: 
        action_input, answer, id_ = webthink(i, env)
        logs[i] = {
            'action_input': action_input,
            'answer': answer, 
            'steps': id_
        }
        print('Answer:', answer)
        print('Action Input:', action_input)
        print('-----------')
        print()
    except: 
        errors += [i] 