In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
import json
import pandas as pd
from pathlib import Path
from copy import deepcopy
from functools import partial

from bellem.utils import set_seed, jprint
from bellem.musique.singlehop import benchmark

set_seed(89)

In [3]:
from tqdm.auto import tqdm
tqdm.pandas()

In [4]:
df = pd.read_json('../../data/generated/musique-common/base-dataset-train.jsonl', orient='records', lines=True)
df = df.iloc[:10].copy()

print(df.shape)
df.head()

(10, 8)


Unnamed: 0,id,paragraphs,question,question_decomposition,answer,answer_aliases,answerable,answers
0,2hop__128801_205185,"[{'idx': 0, 'title': 'Pama, Burkina Faso', 'pa...",What county is the town where KNFM is licensed...,"[{'id': 128801, 'question': 'What town is KNFM...",Midland County,"[Midland County, Midland County, Texas]",True,"[Midland County, Midland County, Texas]"
1,2hop__719559_217649,"[{'idx': 0, 'title': 'Antoine Marchand', 'para...",What's the record label of the artist who put ...,"[{'id': 719559, 'question': 'Me and Julio Down...",Warner Bros.,[Warner Bros.],True,[Warner Bros.]
2,2hop__128806_205185,"[{'idx': 0, 'title': 'Spanish Town', 'paragrap...",What region is the town where KQRX is liscense...,"[{'id': 128806, 'question': 'What town is KQRX...",Midland County,"[Midland County, Midland County, Texas]",True,"[Midland County, Midland County, Texas]"
3,2hop__837090_278127,"[{'idx': 0, 'title': 'The Opening (album)', 'p...",What is the record label of the Do It Again pe...,"[{'id': 837090, 'question': 'Do It Again >> pe...",Roc-A-Fella Records,[Roc-A-Fella Records],True,[Roc-A-Fella Records]
4,2hop__128895_11424,"[{'idx': 0, 'title': 'Ehrhardt, South Carolina...",How many households were there in the town WPU...,"[{'id': 128895, 'question': 'What town is WPUR...",15504,"[15,504]",True,"[15,504]"


In [5]:
perfect_retrieval_func = lambda docs, query: [doc for doc in docs if doc['is_supporting']]

In [6]:
SYSTEM_PROMPTS = [
    {
        "description": "Baseline",
        "prompt": """You are an excellent question-answering system known for providing accurate and reliable answers. Your responses should be solely based on the context information given, without drawing on prior knowledge.

Before answering the question, first, you extract relevant entity-relation-entity triplets from the context. Then, you answer the question based on the triplets.""".strip(),
    },
    {
        "description": "Enhanced Entity Extraction for Q&A",
        "prompt": "As a sophisticated question-answering system, your primary task is to deliver precise and trustworthy answers. Begin by analyzing the provided text to identify and extract key entity-relation-entity triplets. Use these triplets exclusively to construct your response to the question, ensuring that your answers are based strictly on the extracted information and not on external knowledge.",
    },
    {
        "description": "Context-focused Q&A System",
        "prompt": "You are a reliable question-answering model designed to provide accurate responses based on specific text input. First, process the given context to systematically extract entity-relation-entity triplets. Subsequently, utilize these triplets to answer questions directly related to the context, avoiding the use of any information not explicitly mentioned in the text.",
    },
    {
        "description": "Triplets-based Answer Generation",
        "prompt": "As an advanced question-answering system, you are expected to offer precise answers by strictly adhering to the information provided in the context. Start by identifying entity-relation-entity triplets within the text. These triplets will form the basis of your answers, ensuring that all responses are directly derived from the text and do not incorporate any prior knowledge.",
    },
    {
        "description": "Entity-Relation Analysis for Accurate Q&A",
        "prompt": "Your role as a question-answering system is to provide reliable and exact answers by analyzing textual content. Initially, dissect the context to identify all relevant entity-relation-entity triplets. Answer the posed questions by referencing these triplets, maintaining a strict focus on the extracted data without referencing external information.",
    },
    {
        "description": "Focused Entity Extraction for Contextual Answers",
        "prompt": "As a high-performing question-answering interface, your task is to process textual content meticulously to identify entity-relation-entity triplets. Utilize these triplets to frame your responses to ensure that your answers remain grounded in the provided context and are independent of any pre-existing knowledge.",
    },
    {
        "description": "Precision-focused Triplet Extraction Q&A",
        "prompt": "You are an advanced question-answering system designed to provide precise responses. Start by pinpointing and extracting only the most essential entity-relation-entity triplets from the provided context. Use these pivotal triplets as the sole basis for your answers, ensuring a direct and focused response that does not incorporate external knowledge.",
    },
    {
        "description": "Key Triplet Analysis for Direct Q&A",
        "prompt": "As a specialized question-answering system, your role is to deliver highly accurate answers by first identifying key entity-relation-entity triplets within the text. Focus on these core triplets to formulate your answers, ensuring they are tightly aligned with the context's specific details without referencing any outside information.",
    },
    {
        "description": "Strategic Entity-Relation Extraction for Enhanced Q&A",
        "prompt": "Operate as a precise question-answering model, focusing on extracting strategic entity-relation-entity triplets from the text. Answer questions using these selected triplets to maintain a strong alignment with the provided context, ensuring your responses are detailed, accurate, and confined to the given information.",
    },
]


In [7]:
SYSTEM_PROMPT_SUFFIX = """
# Output format
Triplets: [A list of entity-relation-entity triplets extracted from the context.]
Answer: [answer in 2-4 words]
""".strip()

In [8]:
EXAMPLES = [
    {
        "context": """Glenhis Hernández (born 7 October 1990 in Havana) is a taekwondo practitioner from Cuba. She was the 2013 World
Champion in middleweight.

The current mayor of Havana ("President of the People's Power Provincial Assembly") is Marta Hernández Romero, she
was elected on March 5, 2011.""",
        "question": "Who is the current mayor of Havana?",
        "generation": """Triplets: 
Glenhis Hernández | birth place | Havana
Marta Hernández Romero | serves as | mayor of Havana

Answer: Marta Hernández Romero""",
    }
]

In [9]:
import openai

USER_PROMPT = """The context information is provided below.
---------------------
{context}
---------------------
Given the context information and not prior knowledge, answer the question.
{question}
"""

def answer_question_cte(
    context: str,
    question: str,
    model_name: str = "gpt-3.5-turbo",
    completion_kwargs: dict | None = None,
    client=None,
    system_prompt: str = "",
    examples: list = EXAMPLES,
) -> dict:
    if client is None:
        client = openai.Client()

    completion_kwargs = completion_kwargs or {}
    
    # Prepare the messages
    messages = [
        {
            "role": "system",
            "content": system_prompt,
        },
    ]
    for example in examples:
        messages.append(
            {
                "role": "user",
                "content": USER_PROMPT.format(context=example["context"], question=example["question"]),
            }
        )
        messages.append(
            {
                "role": "assistant",
                "content": example["generation"],
            }
        )
    messages.append(
        {
            "role": "user",
            "content": USER_PROMPT.format(context=context, question=question),
        },
    )
    
    # Generate the response
    chat_completion = client.chat.completions.create(
        model=model_name,
        messages=messages,
        **completion_kwargs,
    )
    generation = chat_completion.choices[0].message.content
    
    # Parse the response
    answer = ""
    triplets = []
    for line in generation.splitlines():
        if line.startswith("Answer:"):
            answer = line.split("Answer:")[1].strip()
        elif "|" in line:
            triplets.append(line.strip())
    return dict(triplets=triplets, answer=answer, generation=generation)

In [10]:
N_RUNS = 3

In [None]:
results = []

for system_prompt_config in SYSTEM_PROMPTS:
    prompt_name = system_prompt_config["description"]
    system_prompt = system_prompt_config["prompt"] + "\n" + SYSTEM_PROMPT_SUFFIX
    qa_func = partial(answer_question_cte, system_prompt=system_prompt)
    for i in range(1, N_RUNS+1):
        df_cte, scores = benchmark(df, qa_func, perfect_retrieval_func, ignore_errors=False)
        results.append({**scores, 'prompt': prompt_name, "run": i})

In [17]:
report_df = pd.DataFrame.from_records(results, columns=['prompt', 'run', 'exact_match', 'f1'])
report_df.drop(columns=['run']).groupby(['prompt']).agg(['mean', 'std', 'min', 'max'])

Unnamed: 0_level_0,exact_match,exact_match,exact_match,exact_match,f1,f1,f1,f1
Unnamed: 0_level_1,mean,std,min,max,mean,std,min,max
prompt,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Baseline,0.8,0.0,0.8,0.8,0.8,0.0,0.8,0.8
Context-focused Q&A System,0.766667,0.057735,0.7,0.8,0.788889,0.019245,0.766667,0.8
Enhanced Entity Extraction for Q&A,0.8,0.0,0.8,0.8,0.8,0.0,0.8,0.8
Entity-Relation Analysis for Accurate Q&A,0.766667,0.057735,0.7,0.8,0.766667,0.057735,0.7,0.8
Focused Entity Extraction for Contextual Answers,0.733333,0.057735,0.7,0.8,0.755556,0.050918,0.7,0.8
Key Triplet Analysis for Direct Q&A,0.766667,0.057735,0.7,0.8,0.766667,0.057735,0.7,0.8
Precision-focused Triplet Extraction Q&A,0.7,0.0,0.7,0.7,0.75,0.0,0.75,0.75
Strategic Entity-Relation Extraction for Enhanced Q&A,0.766667,0.057735,0.7,0.8,0.775,0.043301,0.725,0.8
Triplets-based Answer Generation,0.733333,0.057735,0.7,0.8,0.755556,0.050918,0.7,0.8


In [12]:
print(report_df.drop(columns=['run']).groupby(['prompt']).mean().to_markdown())

| prompt                                                |   exact_match |       f1 |
|:------------------------------------------------------|--------------:|---------:|
| Baseline                                              |      0.8      | 0.8      |
| Context-focused Q&A System                            |      0.766667 | 0.788889 |
| Enhanced Entity Extraction for Q&A                    |      0.8      | 0.8      |
| Entity-Relation Analysis for Accurate Q&A             |      0.766667 | 0.766667 |
| Focused Entity Extraction for Contextual Answers      |      0.733333 | 0.755556 |
| Key Triplet Analysis for Direct Q&A                   |      0.766667 | 0.766667 |
| Precision-focused Triplet Extraction Q&A              |      0.7      | 0.75     |
| Strategic Entity-Relation Extraction for Enhanced Q&A |      0.766667 | 0.775    |
| Triplets-based Answer Generation                      |      0.733333 | 0.755556 |


## Inspect

In [13]:
fail_mask = ~(df_cte['fuzzy_match'])

In [14]:
df_cte.loc[fail_mask]['predicted_answer']

1    Columbia Records
7          Hear Music
Name: predicted_answer, dtype: object

In [15]:
i = 3
row = df_cte.loc[fail_mask].iloc[i]

print("="*80)
print(row['question'])
print(row['answers'])

print("="*80)
jprint(row['raw_output'])

IndexError: single positional indexer is out-of-bounds