# Multi-hop question answering with agent

In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
import nest_asyncio
nest_asyncio.apply()

In [3]:
import warnings
from pathlib import Path

import pandas as pd
import weaviate
from pydantic import BaseModel
from llama_index import Document, ServiceContext
from llama_index.prompts.base import Prompt
from llama_index.prompts.prompt_type import PromptType
from llama_index import VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores import WeaviateVectorStore
from llama_index.storage.storage_context import StorageContext
from llama_index.llms import ChatMessage, LiteLLM, LangChainLLM, OpenAI
from llama_index.tools import QueryEngineTool, ToolMetadata
from llama_index.agent import OpenAIAgent
from llama_index.tools.function_tool import FunctionTool
from llama_index.callbacks import CallbackManager, LlamaDebugHandler

from bellek.llama_index.obs import make_phoenix_trace_callback_handler
from bellek.text.utils import fuzzy_match
from bellek.utils import generate_time_id, set_seed

set_seed(42)

/Users/bdsaglam/knowledge/bellek/.venv/lib/python3.10/site-packages/pydantic/_internal/_config.py:219: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.1.1/migration/
/Users/bdsaglam/knowledge/bellek/.venv/lib/python3.10/site-packages/litellm/proxy/_types.py:83: PydanticDeprecatedSince20: `pydantic.config.Extra` is deprecated, use literal values instead (e.g. `extra='allow'`). Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.1.1/migration/
  extra = Extra.allow  # Allow extra fields
/Users/bdsaglam/knowledge/bellek/.venv/lib/python3.10/site-packages/litellm/proxy/_types.py:86: PydanticDeprecatedSince20: Pydantic V1 style `@root_validator` validators are deprecated. You should migrate to Pydantic V2 style `@model_validator` validators, see the migration guid

In [4]:
phoenix_handler = make_phoenix_trace_callback_handler(Path(f"/tmp/phoenix/thesis-kg-llm/baseline-agent/traces-{generate_time_id()}.jsonl"))
callback_manager = CallbackManager(handlers=[
    phoenix_handler,
    # LlamaDebugHandler(print_trace_on_end=True),
])

In [5]:
llm = OpenAI(
    model="gpt-3.5-turbo", 
    temperature=0,
    api_base="http://localhost:11000/",
)
embed_model = HuggingFaceEmbedding("sentence-transformers/all-MiniLM-L6-v2")
service_context = ServiceContext.from_defaults(
    llm=llm, 
    embed_model=embed_model,
    callback_manager=callback_manager
)

In [6]:
ds_df = pd.read_json('/Users/bdsaglam/knowledge/bellek/data/generated/musique-kg-llm/train/dataset.jsonl', orient='records', lines=True)
qd_df = pd.read_json('/Users/bdsaglam/knowledge/bellek/data/generated/musique-kg-llm/train/question-decomposition.jsonl', orient='records', lines=True)
df = pd.merge(ds_df.drop(columns=['question', 'question_decomposition']), qd_df, on='id', suffixes=('', ''))
df.head()

Unnamed: 0,id,paragraphs,answer,answer_aliases,answerable,text,question,question_decomposition
0,2hop__128801_205185,"[{'idx': 0, 'title': 'Pama, Burkina Faso', 'pa...",Midland County,"[Midland County, Texas]",True,Pama is a town located in the province of Komp...,What county is the town where KNFM is licensed...,[{'question': 'In which town is KNFM licensed?...
1,2hop__719559_217649,"[{'idx': 0, 'title': 'Antoine Marchand', 'para...",Warner Bros.,[],True,Antoine Marchand is a record label established...,What's the record label of the artist who put ...,[{'question': 'Who is the artist who released ...
2,2hop__128806_205185,"[{'idx': 0, 'title': 'Spanish Town', 'paragrap...",Midland County,"[Midland County, Texas]",True,Spanish Town is the capital and the largest to...,What region is the town where KQRX is liscense...,[{'question': 'In which town is KQRX licensed?...
3,2hop__837090_278127,"[{'idx': 0, 'title': 'The Opening (album)', 'p...",Roc-A-Fella Records,[],True,The Opening is a live album by American jazz p...,What is the record label of the Do It Again pe...,[{'question': 'Who is the performer of the son...
4,2hop__128895_11424,"[{'idx': 0, 'title': 'Ehrhardt, South Carolina...",15504,[],True,"Ehrhardt is a town in Bamberg County, South Ca...",How many households were there in the town WPU...,[{'question': 'In which town is WPUR licensed?...


In [7]:
def make_docs(example, only_supporting=False):
    ps = example["paragraphs"]
    for p in ps:
        if only_supporting and not p["is_supporting"]:
            continue
        idx = p["idx"]
        title = p["title"]
        body = p["paragraph_text"]
        is_supporting = p["is_supporting"]
        text = f"# {title}\n{body}"
        yield Document(
            text=text,
            metadata={"parent_id": example["id"], "idx": idx, "is_supporting": is_supporting},
            excluded_llm_metadata_keys=["parent_id", "idx", "is_supporting"],
        )

In [8]:
weaviate_client = weaviate.Client("http://localhost:50080")
vector_store = WeaviateVectorStore(weaviate_client=weaviate_client)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

def make_query_engine(example):
    documents = list(make_docs(example, only_supporting=False))
    vector_index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
    query_engine = vector_index.as_query_engine(
        service_context=service_context,
        vector_store_query_mode="hybrid",
        alpha=0.6,
        similarity_top_k=3,
    )
    return query_engine

In [9]:
def make_answer_sub_question(example):
    def answer_sub_question(question: str) -> str:
        """Use this tool to answer a sub-question."""
        return make_query_engine(example).query(question).response
    return FunctionTool.from_defaults(fn=answer_sub_question)

In [10]:
def make_persist_tool():
    outputs = []
    def persist_answers(sub_answers: list[str], final_answer: str) -> str:
        """Save the sub-answers and final answer to a database at the end."""
        outputs.append(dict(final_answer=final_answer, sub_answers=sub_answers))
        return "SUCCESS"

    tool =  FunctionTool.from_defaults(fn=persist_answers)
    def _get_output():
        if outputs:
            return outputs[-1]
        else:
            return None
    tool._get_output = _get_output
    return tool

In [11]:
SYSTEM_PROMPT_STR = """
You are helpful multi-hop question answering assistant that answers the given question by answering each sub-question. You must use `answer_sub_question` tool to answer each sub-question. After you get the answer for the first question, you reformulate the second sub-question and repeat the same procedure. 
Your answers must be in 2-4 words. When you reach the final answer, you persist your answers to a database by calling `persist_answers` function only once. After that, you output "FINISH".
""".strip()

def make_mhqa_agent(example):
    tools = [
        make_answer_sub_question(example),
        make_persist_tool(),
    ]
    prefix_messages = [
        ChatMessage(content=SYSTEM_PROMPT_STR, role="system"),
    ]
    return OpenAIAgent.from_tools(
        tools, 
        prefix_messages=prefix_messages,
        # verbose=True,
    )

In [12]:
def format_question(example):
    sub_questions = '\n'.join([f"\t{i+1}.{item['question']}" for i, item in enumerate(example['question_decomposition'])])
    return f"{example['question']}\n\n{sub_questions}"

In [13]:
def mhqa(example):
    """Multi-hop question answering."""
    agent = make_mhqa_agent(example)
    response = agent.query(format_question(example)).response
    output = agent.agent_worker._get_tools(None)[1]._get_output()
    if output is None:
        example['predicted_answer'] = None
        example['predicted_sub_answers'] = None
    else:
        example['predicted_answer'] = output.get("final_answer")
        example['predicted_sub_answers'] = output.get('sub_answers')
    return example

In [17]:
def is_correct(example):
    pred, ref = example['predicted_answer'], example['answer']
    return pred is not None and ((pred in ref) or fuzzy_match(pred, ref))

In [None]:
# i = 0
# example = df.iloc[i]
# example_ = mhqa(example)
# print("Reference answer:", example['answer'])
# print("Predictions:")
# print(example_['predicted_answer'])
# print(example_['predicted_sub_answers'])

In [15]:
df = df.apply(mhqa, axis=1)
df.to_json('/Users/bdsaglam/knowledge/bellek/data/generated/musique-kg-llm/train/baseline-agent.jsonl', orient='records', lines=True)

FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
I'm sorry, but there is no information available about the city where Robinella was formed.
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH
FINISH


In [18]:
df['is_correct'] = df.apply(is_correct, axis=1)
df['is_correct'].mean()

0.54

In [None]:
df[['id', 'question', 'answer', 'predicted_answer', 'predicted_sub_answers', 'is_correct']]

Unnamed: 0,id,question,answer,predicted_answer,predicted_sub_answers,is_correct
0,2hop__128801_205185,What county is the town where KNFM is licensed...,Midland County,,"[Senmonorom, ]",True
1,2hop__719559_217649,What's the record label of the artist who put ...,Warner Bros.,Columbia Records,"[Paul Simon, Columbia Records]",False
2,2hop__128806_205185,What region is the town where KQRX is liscense...,Midland County,,"[Midland, Texas]",True
3,2hop__837090_278127,What is the record label of the Do It Again pe...,Roc-A-Fella Records,Roc-A-Fella Records,"[Jay-Z, Roc-A-Fella Records]",True
4,2hop__128895_11424,How many households were there in the town WPU...,15504,15504,"[Atlantic City, New Jersey, 15,504]",True
