# Multi-hop question answering with agent

In [16]:
from dotenv import load_dotenv
load_dotenv()

True

In [17]:
import nest_asyncio
nest_asyncio.apply()

In [18]:
import os
import warnings
from pathlib import Path

import pandas as pd
import weaviate
from pydantic import BaseModel
from llama_index import Document, ServiceContext
from llama_index.prompts.base import Prompt
from llama_index.prompts.prompt_type import PromptType
from llama_index import VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores import WeaviateVectorStore
from llama_index.storage.storage_context import StorageContext
from llama_index.llms import ChatMessage, OpenAI
from llama_index.agent import OpenAIAgent
from llama_index.tools.function_tool import FunctionTool
from llama_index.callbacks import CallbackManager, LlamaDebugHandler

from bellek.llama_index.obs import make_phoenix_trace_callback_handler
from bellek.text.utils import fuzzy_match
from bellek.utils import generate_time_id, set_seed

set_seed(42)

In [19]:
phoenix_handler = make_phoenix_trace_callback_handler(Path(f"/tmp/phoenix/thesis-kg-llm/baseline-agent/traces-{generate_time_id()}.jsonl"))
callback_manager = CallbackManager(handlers=[
    phoenix_handler,
    # LlamaDebugHandler(print_trace_on_end=True),
])

In [20]:
llm = OpenAI(
    model="gpt-3.5-turbo", 
    temperature=0,
)
embed_model = HuggingFaceEmbedding("sentence-transformers/all-MiniLM-L6-v2")
service_context = ServiceContext.from_defaults(
    llm=llm, 
    embed_model=embed_model,
    callback_manager=callback_manager
)

In [21]:
ds_df = pd.read_json('../../data/generated/musique-evaluation/dataset.jsonl', orient='records', lines=True)
qd_df = pd.read_json('../../data/generated/musique-evaluation/question-decomposition.jsonl', orient='records', lines=True)
df = pd.merge(ds_df.drop(columns=['question', 'question_decomposition']), qd_df, on='id', suffixes=('', ''))
df.head()

Unnamed: 0,id,paragraphs,answer,answer_aliases,answerable,question,question_decomposition
0,2hop__128801_205185,"[{'idx': 0, 'title': 'Pama, Burkina Faso', 'pa...",Midland County,"[Midland County, Texas]",True,What county is the town where KNFM is licensed...,[{'question': 'Which town is KNFM licensed in?...
1,2hop__719559_217649,"[{'idx': 0, 'title': 'Antoine Marchand', 'para...",Warner Bros.,[],True,What's the record label of the artist who put ...,[{'question': 'Who is the artist behind the so...
2,2hop__128806_205185,"[{'idx': 0, 'title': 'Spanish Town', 'paragrap...",Midland County,"[Midland County, Texas]",True,What region is the town where KQRX is liscense...,[{'question': 'In which town is KQRX licensed?...
3,2hop__837090_278127,"[{'idx': 0, 'title': 'The Opening (album)', 'p...",Roc-A-Fella Records,[],True,What is the record label of the Do It Again pe...,[{'question': 'Who is the performer of the son...
4,2hop__128895_11424,"[{'idx': 0, 'title': 'Ehrhardt, South Carolina...",15504,[],True,How many households were there in the town WPU...,[{'question': 'In which town is WPUR licensed?...


In [22]:
def make_docs(example, only_supporting=False):
    ps = example["paragraphs"]
    for p in ps:
        if only_supporting and not p["is_supporting"]:
            continue
        idx = p["idx"]
        title = p["title"]
        body = p["paragraph_text"]
        is_supporting = p["is_supporting"]
        text = f"# {title}\n{body}"
        yield Document(
            text=text,
            metadata={"parent_id": example["id"], "idx": idx, "is_supporting": is_supporting},
            excluded_llm_metadata_keys=["parent_id", "idx", "is_supporting"],
        )

In [23]:
weaviate_client = weaviate.Client(os.getenv('WEAVIATE_CLUSTER_URL'))
vector_store = WeaviateVectorStore(weaviate_client=weaviate_client)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

def make_query_engine(example):
    documents = list(make_docs(example, only_supporting=False))
    vector_index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
    query_engine = vector_index.as_query_engine(
        service_context=service_context,
        vector_store_query_mode="hybrid",
        alpha=0.6,
        similarity_top_k=3,
    )
    return query_engine

In [24]:
def make_answer_sub_question(example):
    def answer_sub_question(question: str) -> str:
        """Use this tool to answer a sub-question."""
        return make_query_engine(example).query(question).response
    return FunctionTool.from_defaults(fn=answer_sub_question)

In [25]:
def make_persist_tool():
    outputs = []
    def persist_answers(sub_answers: list[str], final_answer: str) -> str:
        """Save the sub-answers and final answer to a database at the end."""
        outputs.append(dict(final_answer=final_answer, sub_answers=sub_answers))
        return "SUCCESS"

    tool =  FunctionTool.from_defaults(fn=persist_answers)
    def _get_output():
        if outputs:
            return outputs[-1]
        else:
            return None
    tool._get_output = _get_output
    return tool

In [26]:
SYSTEM_PROMPT_STR = """
You are helpful multi-hop question answering assistant that answers the given question by answering each sub-question. You must use `answer_sub_question` tool to answer each sub-question. After you get the answer for the first question, you reformulate the second sub-question and repeat the same procedure. 
Your answers must be in 2-4 words. When you reach the final answer, you persist your answers to a database by calling `persist_answers` function only once. After that, you output "FINISH".
""".strip()

def make_mhqa_agent(example):
    tools = [
        make_answer_sub_question(example),
        make_persist_tool(),
    ]
    prefix_messages = [
        ChatMessage(content=SYSTEM_PROMPT_STR, role="system"),
    ]
    return OpenAIAgent.from_tools(
        tools, 
        prefix_messages=prefix_messages,
        # verbose=True,
    )

In [27]:
def format_question(example):
    sub_questions = '\n'.join([f"\t{i+1}.{item['question']}" for i, item in enumerate(example['question_decomposition'])])
    return f"{example['question']}\n\n{sub_questions}"

In [28]:
def mhqa(example):
    """Multi-hop question answering."""
    agent = make_mhqa_agent(example)
    response = agent.query(format_question(example)).response
    output = agent.agent_worker._get_tools(None)[1]._get_output()
    if output is None:
        example['predicted_answer'] = None
        example['predicted_sub_answers'] = None
    else:
        example['predicted_answer'] = output.get("final_answer")
        example['predicted_sub_answers'] = output.get('sub_answers')
    return example

In [29]:
# i = 0
# example = df.iloc[i]
# example_ = mhqa(example)
# print("Reference answer:", example['answer'])
# print("Predictions:")
# print(example_['predicted_answer'])
# print(example_['predicted_sub_answers'])

In [30]:
df = df.apply(mhqa, axis=1)

In [31]:
df.to_json('../../data/generated/musique-evaluation/baseline-agent.jsonl', orient='records', lines=True)

In [32]:
def _exact_match(example):
    pred, ref = example['predicted_answer'], example['answer']
    return pred is not None and pred == ref

def _fuzzy_match(example):
    pred, ref = example['predicted_answer'], example['answer']
    return pred is not None and ((pred in ref) or (ref in pred) or fuzzy_match(pred, ref))

In [33]:
df["exact_match"] = df.apply(_exact_match, axis=1)
df["fuzzy_match"] = df.apply(_fuzzy_match, axis=1)

# log scores
scores = {
    "exact_match": df["exact_match"].mean(),
    "fuzzy_match": df["fuzzy_match"].mean(),
}
print(scores)

{'exact_match': 0.43, 'fuzzy_match': 0.63}


In [36]:
df[['id', 'question', 'answer', 'predicted_answer', 'predicted_sub_answers', 'fuzzy_match']]

Unnamed: 0,id,question,answer,predicted_answer,predicted_sub_answers,fuzzy_match
0,2hop__128801_205185,What county is the town where KNFM is licensed...,Midland County,Ector County,"[Midland-Odessa, Ector County]",False
1,2hop__719559_217649,What's the record label of the artist who put ...,Warner Bros.,Antoine Marchand,"[Paul Simon, Antoine Marchand]",False
2,2hop__128806_205185,What region is the town where KQRX is liscense...,Midland County,Texas,[Midland],False
3,2hop__837090_278127,What is the record label of the Do It Again pe...,Roc-A-Fella Records,Roc-A-Fella Records,"[Jay-Z, Roc-A-Fella Records]",True
4,2hop__128895_11424,How many households were there in the town WPU...,15504,,,False
...,...,...,...,...,...,...
95,2hop__651488_94210,Who was the place where Pieta is located desig...,Giorgio Vasari,Pietro Belluschi,"[Pietro Belluschi, Richard Rogers]",False
96,2hop__362083_467995,What is the record label of the performer of M...,RCA Records,RCA Records,"[Christina Aguilera, RCA Records]",True
97,2hop__525596_543261,The Roman Catholic Diocese of Jim Norton's bir...,Delaware,Delaware,[Wilmington is located in Delaware.],True
98,2hop__394596_8607,What metro area is JAKAZiD's birthplace a part...,South Hampshire,South Hampshire,"[Portsmouth, England, South Hampshire]",True
