# MuSiQue single-hop baseline

In [None]:
#|default_exp musique.singlehop

In [None]:
#|hide
from fastcore.test import *
from nbdev.showdoc import *

In [None]:
#|export

from typing import Callable

import pandas as pd
from tqdm.auto import tqdm

from bellek.musique.eval import calculate_metrics, compare_answers

tqdm.pandas()

In [None]:
#|export

def make_docs(example):
    ps = example["paragraphs"]
    for p in ps:
        idx = p["idx"]
        title = p["title"]
        body = p["paragraph_text"]
        is_supporting = p["is_supporting"]
        text = f"# {title}\n{body}"
        yield dict(
            text=text,
            is_supporting=is_supporting,
            parent_id=example["id"],
            idx=idx,
        )

In [None]:
#|export

class BaselineSingleHop:
    def __init__(self, qa_func, retrieval_func):
        self.qa_func = qa_func
        self.retrieval_func = retrieval_func

    def _call(self, example) -> dict:
        docs = list(make_docs(example))
        question = example["question"]
        query = question
        retrieved_docs = self.retrieval_func(docs, query)
        context = "\n\n".join(doc['text'] for doc in retrieved_docs)
        qa_result = self.qa_func(context=context, question=question)
        answer = qa_result.get("answer")
        hop = {
            "question": question,
            "query" : query,
            "retrieved_docs": retrieved_docs,
            "context": context,
            "answer": answer,
            "qa_result": qa_result,
        }
        return {'answer': answer, 'hops': [hop]}

    def __call__(self, example, ignore_errors: bool = False) -> dict:
        try:
            output = self._call(example)
        except Exception as exc:
            if ignore_errors:
                id = example["id"]
                print(f"Failed to answer the question {id}\n{exc}")
                output = dict(answer="N/A", hops=[{'error': str(exc)}])
            else:
                raise
        return output

In [None]:
#|export

def benchmark(
    dataf: pd.DataFrame,
    qa_func: Callable,
    retrieval_func: Callable,
    ignore_errors: bool = False,
) -> tuple[pd.DataFrame, dict]:
    pipeline = BaselineSingleHop(qa_func, retrieval_func)

    def process(example):
        output = pipeline(example, ignore_errors=ignore_errors)
        example["predicted_answer"] = output['answer']
        example["raw_output"] = output
        return example

    dataf = dataf.progress_apply(process, axis=1)
    dataf = compare_answers(dataf)
    scores = calculate_metrics(dataf)
    scores["fuzzy_match"] = dataf["fuzzy_match"].mean()
    return dataf, scores

In [None]:
df = pd.read_json('../data/generated/musique-evaluation/dataset.jsonl', orient='records', lines=True)
print(len(df))
df.head()

100


Unnamed: 0,id,paragraphs,question,question_decomposition,answer,answer_aliases,answerable,answers
0,2hop__131818_161450,"[{'idx': 0, 'title': 'Maria Carrillo High Scho...",Where is the Voshmgir District located?,"[{'id': 131818, 'question': 'Which state is Vo...",in the north-east of the country south of the ...,"[Caspian Sea, in the north-east of the country...",True,"[Caspian Sea, in the north-east of the country..."
1,2hop__711946_269414,"[{'idx': 0, 'title': 'Wild Thing (Tone Lōc son...",What record label is the performer who release...,"[{'id': 711946, 'question': 'All Your Faded Th...",Kill Rock Stars,[Kill Rock Stars],True,[Kill Rock Stars]
2,2hop__311931_417706,"[{'idx': 0, 'title': 'The Main Attraction (alb...",What record label does the performer of Emotio...,"[{'id': 311931, 'question': 'Emotional Rain >>...",Attic Records,"[Attic, Attic Records]",True,"[Attic, Attic Records]"
3,2hop__358582_189042,"[{'idx': 0, 'title': 'The Main Attraction (alb...",What is the record label of the Thrill of a Li...,"[{'id': 358582, 'question': 'Thrill of a Lifet...",New Renaissance Records,[New Renaissance Records],True,[New Renaissance Records]
4,2hop__341176_711757,"[{'idx': 0, 'title': 'Gmina Pabianice', 'parag...",What other district is found in the same count...,"[{'id': 341176, 'question': 'Gmina Stężyca, Lu...",Gmina Ryki,"[Ryki, Gmina Ryki]",True,"[Ryki, Gmina Ryki]"


In [None]:
from bellek.musique.qa import answer_question_standard

qa_func = answer_question_standard
retrieval_func = lambda docs, query: docs
pipeline = BaselineSingleHop(qa_func, retrieval_func)

In [None]:
i = 1
example = df.iloc[i].to_dict()
output = pipeline(example)
print("Question:", example['question'])
print("Reference answer:", example['answer'])
print("Predicted answer:", output['answer'])
print("Reasoning:", output['hops'])

Question: What record label is the performer who released All Your Faded Things on?
Reference answer: Kill Rock Stars
Predicted answer: 
Reasoning: [{'question': 'What record label is the performer who released All Your Faded Things on?', 'query': 'What record label is the performer who released All Your Faded Things on?', 'context': '# Wild Thing (Tone Lōc song)\n``Wild Thing \'\'Single by Tone Lōc from the album Lōc - ed After Dark Released January 1989 Recorded Genre Rap rock Length 4: 23 Label Delicious Vinyl Songwriter (s) Anthony Terrell Smith, Matt Dike, Marvin Young Producer (s) Matt Dike, Michael Ross Tone Lōc singles chronology`` Wild Thing\'\' (1989) ``Funky Cold Medina \'\'(1989)`` Wild Thing\'\' (1989) ``Funky Cold Medina \'\'(1989)\n# Jazz Contemporary\nJazz Contemporary is an album by American jazz trumpeter Kenny Dorham featuring performances recorded in 1960 and released on the Time label. The album features the recording debut of pianist Steve Kuhn.\n# It\'s What\'s H

In [None]:
mdf, scores = benchmark(df.sample(2), qa_func, retrieval_func)
print(scores)
mdf

  0%|          | 0/2 [00:00<?, ?it/s]

{'exact_match': 0.0, 'f1': 0.0, 'fuzzy_match': 0.5}


Unnamed: 0,id,paragraphs,question,question_decomposition,answer,answer_aliases,answerable,answers,predicted_answer,raw_output,exact_match,fuzzy_match
58,2hop__731584_700117,"[{'idx': 0, 'title': 'KAPE', 'paragraph_text':...",In which county is the city to which KKVU is l...,"[{'id': 731584, 'question': 'KKVU >> licensed ...",Berrien County,[Berrien County],True,[Berrien County],,"{'answer': '', 'hops': [{'question': 'In which...",False,True
8,2hop__271045_68633,"[{'idx': 0, 'title': 'Julio César Benítez', 'p...",Who is the president of the organization Julio...,"[{'id': 271045, 'question': 'Julio Maglione >>...",Thomas Bach,[Thomas Bach],True,[Thomas Bach],International Olympic Committee,"{'answer': 'International Olympic Committee', ...",False,False


In [None]:
#|hide
import nbdev; nbdev.nbdev_export()