# MuSiQue single-hop baseline

In [None]:
#|default_exp musique.singlehop

In [None]:
#|hide
from fastcore.test import *
from nbdev.showdoc import *

In [None]:
#|export

from typing import Callable

import pandas as pd
from tqdm.auto import tqdm

from bellek.musique.eval import compute_scores_dataframe, aggregate_scores

tqdm.pandas()

In [None]:
#|export

def make_docs(example):
    ps = example["paragraphs"]
    for p in ps:
        idx = p["idx"]
        title = p["title"]
        body = p["paragraph_text"]
        is_supporting = p["is_supporting"]
        text = f"# {title}\n{body}"
        yield dict(
            text=text,
            is_supporting=is_supporting,
            parent_id=example["id"],
            idx=idx,
        )

In [None]:
#|export

class BaselineSingleHop:
    def __init__(self, qa_func, retrieval_func):
        self.qa_func = qa_func
        self.retrieval_func = retrieval_func

    def _call(self, example) -> dict:
        docs = list(make_docs(example))
        question = example["question"]
        query = question
        retrieved_docs = self.retrieval_func(docs, query)
        context = "\n\n".join(doc['text'] for doc in retrieved_docs)
        qa_result = self.qa_func(context=context, question=question)
        answer = qa_result.get("answer")
        hop = {
            "question": question,
            "query" : query,
            "retrieved_docs": retrieved_docs,
            "context": context,
            "answer": answer,
            "qa_result": qa_result,
        }
        return {'answer': answer, 'hops': [hop]}

    def __call__(self, example, ignore_errors: bool = False) -> dict:
        try:
            output = self._call(example)
        except Exception as exc:
            if ignore_errors:
                id = example["id"]
                print(f"Failed to answer the question {id}\n{exc}")
                output = dict(answer="N/A", hops=[{'error': str(exc)}])
            else:
                raise
        return output

In [None]:
# |export


def benchmark(
    dataf: pd.DataFrame,
    qa_func: Callable,
    retrieval_func: Callable,
    ignore_errors: bool = False,
) -> tuple[pd.DataFrame, dict]:
    pipeline = BaselineSingleHop(qa_func, retrieval_func)

    def process(example):
        output = pipeline(example, ignore_errors=ignore_errors)
        example["predicted_answer"] = output["answer"]
        example["raw_output"] = output
        example["answers"] = [example["answer"], *example["answer_aliases"]]
        return example

    dataf = dataf.progress_apply(process, axis=1)
    dataf = compute_scores_dataframe(dataf)
    scores = aggregate_scores(dataf)
    return dataf, scores

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
from datasets import load_dataset
df = load_dataset("bdsaglam/musique-mini", "answerable", split="train").to_pandas()
print(len(df))
df.head()

300


Unnamed: 0,id,paragraphs,question,question_decomposition,answer,answer_aliases,answerable,n_hops
0,2hop__129127_90527,"[{'idx': 0, 'is_supporting': False, 'paragraph...",What is the average summer temperature in the ...,"[{'answer': 'Missouri', 'id': 129127, 'paragra...",75 ° F,[],True,2
1,2hop__97238_154727,"[{'idx': 0, 'is_supporting': False, 'paragraph...",What year did the war Jameel Sayhood fought in...,"[{'answer': 'Gulf War', 'id': 97238, 'paragrap...",2 August 1990,[],True,2
2,2hop__819850_32467,"[{'idx': 0, 'is_supporting': False, 'paragraph...",How old are some of the private schools in the...,"[{'answer': 'Charleston', 'id': 819850, 'parag...",150 years,[],True,2
3,2hop__71611_90450,"[{'idx': 0, 'is_supporting': False, 'paragraph...",Who was president when the place where the maj...,"[{'answer': 'Iowa', 'id': 71611, 'paragraph_su...",President James K. Polk,[James K. Polk],True,2
4,2hop__612535_47295,"[{'idx': 0, 'is_supporting': False, 'paragraph...",What is the area code for the state where Thom...,"[{'answer': 'Hawaii', 'id': 612535, 'paragraph...",808,[],True,2


In [None]:
from bellek.musique.qa import answer_question_standard

qa_func = answer_question_standard
retrieval_func = lambda docs, query: docs
pipeline = BaselineSingleHop(qa_func, retrieval_func)

In [None]:
i = 1
example = df.iloc[i].to_dict()
output = pipeline(example)
print("Question:", example['question'])
print("Reference answer:", example['answer'])
print("Predicted answer:", output['answer'])
print("Reasoning:", output['hops'])

Question: What year did the war Jameel Sayhood fought in start?
Reference answer: 2 August 1990
Predicted answer: 1990
Reasoning: [{'question': 'What year did the war Jameel Sayhood fought in start?', 'query': 'What year did the war Jameel Sayhood fought in start?', 'retrieved_docs': [{'text': "# Muslim Atayev\nMuslim Atayev (June 24, 1973 – January 27, 2005), also known as Emir Sayfullah, was the founder of the militant organization Yarmuk Jamaat, which later became part of the Caucasus Front's Kabardino-Balkarian Sector in the Russian-held Caucasian Muslim state Kabardino-Balkaria of the Second Chechen War. Atayev was an ethnic Balkar and started his military career as a volunteer fighting in Chechnya.", 'is_supporting': False, 'parent_id': '2hop__97238_154727', 'idx': 0}, {'text': '# Warsaw Pact\nFor 36 years, NATO and the Warsaw Pact never directly waged war against each other in Europe; the United States and the Soviet Union and their respective allies implemented strategic polici

In [None]:
mdf, scores = benchmark(df.sample(2), qa_func, retrieval_func)
print(scores)
mdf

  0%|          | 0/2 [00:00<?, ?it/s]

{'exact_match': 0.5, 'f1': 0.5, 'fuzzy_match': 0.5}


Unnamed: 0,id,paragraphs,question,question_decomposition,answer,answer_aliases,answerable,n_hops,predicted_answer,raw_output,answers,exact_match,f1,fuzzy_match
73,2hop__458962_37939,"[{'idx': 0, 'is_supporting': False, 'paragraph...",How many TEUs did the location of Villa Bianca...,"[{'answer': 'Thessaloniki', 'id': 458962, 'par...",273282,[],True,2,273282,"{'answer': '273,282', 'hops': [{'question': 'H...","[273,282]",1.0,1.0,1.0
58,2hop__297202_14970,"[{'idx': 0, 'is_supporting': False, 'paragraph...",What is the size of the continent Marian Cove ...,"[{'answer': 'Antarctica', 'id': 297202, 'parag...",5400000,[],True,2,Not provided,"{'answer': 'Not provided', 'hops': [{'question...","[5,400,000]",0.0,0.0,0.0


In [None]:
#|hide
import nbdev; nbdev.nbdev_export()