# Question answering with LLM

In [None]:
#|default_exp musique.qa

In [None]:
#|hide
from fastcore.test import *
from nbdev.showdoc import *

In [None]:
#|hide
from dotenv import load_dotenv

load_dotenv()

True

In [None]:
#|export
from typing import Callable

import openai

from bellem.logging import get_logger

log = get_logger(__name__)

In [None]:
#|export

DEFAULT_MODEL = "gpt-3.5-turbo"
DEFAULT_COMPLETION_KWARGS = {"temperature": 0.1}

In [None]:
# DEFAULT_MODEL = "gpt-4o"

TEST_EXAMPLE = {
    "id": "2hop__834974_332063",
    "context": "# N. Monroe Marshall\nNathaniel Monroe Marshall (June 13, 1854 Schuyler Falls, Clinton County, New York \u2013 February 16, 1935 Malone, Franklin County, New York) was an American banker and politician.\n# Perry Township, Clinton County, Indiana\nPerry Township is one of fourteen townships in Clinton County, Indiana. As of the 2010 census, its population was 1,459 and it contained 606 housing units. The township was named for Oliver Hazard Perry, an American naval officer in the War of 1812.",
    "question": "Which region shares border with one where Perry Township is located?",
    "answers": ["Franklin County"],
}

In [None]:
#|export

DEFAULT_USER_PROMPT_TEMPLATE = """The context information is provided below.
---------------------
{context}
---------------------
Given the context information and not prior knowledge, answer the question.
{question}
"""


def make_qa_func(
    system_prompt: str,
    user_prompt_template: str = DEFAULT_USER_PROMPT_TEMPLATE,
    few_shot_examples: list[dict] | None = None,
):
    few_shot_examples = few_shot_examples or []

    def answer_question(
        context: str,
        question: str,
        model_name: str = DEFAULT_MODEL,
        completion_kwargs: dict | None = None,
        client=None,
    ) -> dict:
        if client is None:
            client = openai.Client()

        if completion_kwargs is None:
            completion_kwargs = DEFAULT_COMPLETION_KWARGS

        # Prepare the messages
        few_shot_messages = []
        for example in few_shot_examples:
            few_shot_messages.extend(
                [
                    {
                        "role": "user",
                        "content": user_prompt_template.format(
                            context=example["context"],
                            question=example["question"],
                        ),
                    },
                    {
                        "role": "assistant",
                        "content": example["generation"],
                    },
                ]
            )
        messages = [
            {"role": "system", "content": system_prompt},
            *few_shot_messages,
            {
                "role": "user",
                "content": user_prompt_template.format(context=context, question=question),
            },
        ]
        chat_completion = client.chat.completions.create(
            model=model_name,
            messages=messages,
            **completion_kwargs,
        )
        generation = chat_completion.choices[0].message.content
        parts = generation.split("Answer:")
        if len(parts) < 2:
            return dict(answer="", generation=generation)
        answer = parts[1].strip()
        return dict(answer=answer, generation=generation)

    return answer_question

### Standard prompt

In [None]:
#|export

SYSTEM_PROMPT_STANDARD = """
You are an excellent question-answering system known for providing accurate and reliable answers. Your responses should be solely based on the context information given, without drawing on prior knowledge. 

# Output format
Answer: [answer in least number of words possible]
""".strip()


answer_question_standard = make_qa_func(
    system_prompt=SYSTEM_PROMPT_STANDARD,
)

In [None]:
result = answer_question_standard(TEST_EXAMPLE['context'], TEST_EXAMPLE['question'])
print(result['generation'])
print(result['answer'])

Answer: Franklin County, New York
Franklin County, New York


### Chain-of-thought prompt

In [None]:
#|export

SYSTEM_PROMPT_COT = """You are an excellent question-answering system known for providing accurate and reliable answers. Your responses should be solely based on the context information given, without drawing on prior knowledge. Always provide clear and logical step-by-step reasoning in your response.

# Output format
Reasoning: [Step-by-step reasoning for the answer.]
Answer: [answer in least number of words possible]
"""

answer_question_cot_zs = make_qa_func(
    system_prompt=SYSTEM_PROMPT_COT,
)

In [None]:
result = answer_question_cot_zs(TEST_EXAMPLE['context'], TEST_EXAMPLE['question'])
print(result['generation'])
print("="*80)
print(result['answer'])

Reasoning: Perry Township is located in Clinton County, Indiana. To determine a region that shares a border with Clinton County, Indiana, we need to identify neighboring counties. Clinton County, Indiana is bordered by Carroll County to the north, Howard County to the northeast, Tipton County to the east, Hamilton County to the south, Boone County to the west, and Montgomery County to the northwest. Therefore, a region that shares a border with a county where Perry Township is located is Hamilton County.

Answer: Hamilton County
Hamilton County


In [None]:
#|export

FEW_SHOT_EXAMPLES_COT = [
    {
        "id": "2hop__784447_126070",
        "context": 'Glenhis Hern\u00e1ndez (born 7 October 1990 in Havana) is a taekwondo practitioner from Cuba. She was the 2013 World\nChampion in middleweight.\n\nThe current mayor of Havana ("President of the People\'s Power Provincial Assembly") is Marta Hern\u00e1ndez Romero, she\nwas elected on March 5, 2011.',
        "question": "Who is the current mayor of the city Glenhis Hern\u00e1ndez was born?",
        "generation": "Reasoning:\n1. Glenhis Hernández was born in Havana, as mentioned in the context.\n2. The current mayor of Havana mentioned in the context is Marta Hernández Romero.\n3. Therefore, the current mayor of the city where Glenhis Hernández was born is Marta Hernández Romero.\n\nAnswer: Marta Hernández Romero",
    },
    {
        "id": "2hop__823584_776926",
        "context": '# Rotst\u00f6ckli\nThe Rotst\u00f6ckli (2,901 m) is a peak of the Urner Alps below the Titlis, on the border between the Swiss cantons of Obwalden and Nidwalden. It is Nidwalden\'s highest point. The summit is split between the municipalities of Engelberg (Obwalden) and Wolfenschiessen (Nidwalden).\n# Uri Alps\nThe Uri Alps (also known as "Urner Alps", ) are a mountain range in Central Switzerland and part of the Western Alps. They extend into the cantons of Obwalden, Valais, Bern, Uri and Nidwalden and are bordered by the Bernese Alps (Grimsel Pass) and the Emmental Alps to the west (the four lakes: Lungerersee, Sarnersee, Wichelsee, and Alpnachersee), the Schwyzer Alps to the north (Lake Lucerne), the Lepontine Alps to the south (the valley of Urseren with Andermatt) and the Glarus Alps to the east (Reuss).',
        "question": "What area contains the region that encompasses Rotst\u00f6ckli?",
        "generation": "Reasoning:\n- The context indicates that the Rotstöckli is a peak within the Urner Alps.\n- It further describes the Urner Alps as part of the Western Alps, a larger mountain range.\n- Therefore, the larger area that contains the region encompassing the Rotstöckli is the Western Alps, as deduced from the hierarchical geographical categorization provided.\n\nAnswer: Western Alps",
    },
]

answer_question_cot_fs = make_qa_func(
    system_prompt=SYSTEM_PROMPT_COT,
    few_shot_examples=FEW_SHOT_EXAMPLES_COT,
)

In [None]:
result = answer_question_cot_fs(TEST_EXAMPLE['context'], TEST_EXAMPLE['question'])
print(result['generation'])
print("="*80)
print(result['answer'])

Reasoning:
- Perry Township is located in Clinton County, Indiana.
- To determine a region that shares a border with Clinton County, Indiana, we need to identify neighboring regions.
- Based on the context, since Nathaniel Monroe Marshall was born in Schuyler Falls, Clinton County, New York, we can infer that Clinton County, New York is a region that shares a border with Clinton County, Indiana.

Answer: Clinton County, New York
Clinton County, New York


### Connect-the-Entities prompt

In [None]:
# |export

SYSTEM_PROMPT_CTE = """
You are an excellent question-answering system known for providing accurate and reliable answers. Your responses should be solely based on the context information given, without drawing on prior knowledge.

Before answering the question, first, you extract relevant entity-relation-entity triplets from the context. Then, you answer the question based on the triplets.

# Output format
Triplets: [A list of entity-relation-entity triplets extracted from the context.]
Answer: [answer in least number of words possible]
""".strip()

answer_question_cte_zs = make_qa_func(
    system_prompt=SYSTEM_PROMPT_CTE,
)

In [None]:
result = answer_question_cte_zs(TEST_EXAMPLE['context'], TEST_EXAMPLE['question'])
print(result['generation'])
print("="*80)
print(result['answer'])

Triplets:
1. Perry Township - located in - Clinton County, Indiana
2. Clinton County, Indiana - shares border with - another region

Answer: Clinton County, Indiana shares a border with another region.
Clinton County, Indiana shares a border with another region.


In [None]:
#|export

FEW_SHOT_EXAMPLES_CTE = [
    {
        "id": "2hop__784447_126070",
        "context": 'Glenhis Hern\u00e1ndez (born 7 October 1990 in Havana) is a taekwondo practitioner from Cuba. She was the 2013 World\nChampion in middleweight.\n\nThe current mayor of Havana ("President of the People\'s Power Provincial Assembly") is Marta Hern\u00e1ndez Romero, she\nwas elected on March 5, 2011.',
        "question": "Who is the current mayor of the city Glenhis Hern\u00e1ndez was born?",
        "generation": "Triplets: \nGlenhis Hern\u00e1ndez | birth place | Havana\nMarta Hern\u00e1ndez Romero | mayor of| Havana\n\nAnswer: Marta Hern\u00e1ndez Romero",
    },
    {
        "id": "2hop__823584_776926",
        "context": '# Rotst\u00f6ckli\nThe Rotst\u00f6ckli (2,901 m) is a peak of the Urner Alps below the Titlis, on the border between the Swiss cantons of Obwalden and Nidwalden. It is Nidwalden\'s highest point. The summit is split between the municipalities of Engelberg (Obwalden) and Wolfenschiessen (Nidwalden).\n# Uri Alps\nThe Uri Alps (also known as "Urner Alps", ) are a mountain range in Central Switzerland and part of the Western Alps. They extend into the cantons of Obwalden, Valais, Bern, Uri and Nidwalden and are bordered by the Bernese Alps (Grimsel Pass) and the Emmental Alps to the west (the four lakes: Lungerersee, Sarnersee, Wichelsee, and Alpnachersee), the Schwyzer Alps to the north (Lake Lucerne), the Lepontine Alps to the south (the valley of Urseren with Andermatt) and the Glarus Alps to the east (Reuss).',
        "question": "What area contains the region that encompasses Rotst\u00f6ckli?",
        "generation": "Triplets:\nRotst\u00f6ckli | part of | Urner Alps\nUrner Alps | part of | Western Alps\n\nAnswer: Western Alps",
    },
]

answer_question_cte_fs = make_qa_func(
    system_prompt=SYSTEM_PROMPT_CTE,
    few_shot_examples=FEW_SHOT_EXAMPLES_CTE,
)

In [None]:
result = answer_question_cte_fs(TEST_EXAMPLE['context'], TEST_EXAMPLE['question'])
print(result['generation'])
print("="*80)
print(result['answer'])

Triplets:
Perry Township | located in | Clinton County, Indiana
Clinton County, Indiana | shares border with | Franklin County, New York

Answer: Franklin County, New York
Franklin County, New York


In [None]:
#|export

def load_qa_func(prompt_technique: str) -> Callable:
    prompt_technique = prompt_technique.lower()
    if prompt_technique == "standard":
        return answer_question_standard
    elif prompt_technique == "cot-zs":
        return answer_question_cot_zs
    elif prompt_technique == "cot-fs":
        return answer_question_cot_fs
    elif prompt_technique == "cte":
        return answer_question_cte_fs
    else:
        raise ValueError(f"Unknown prompt technique: {prompt_technique}")

In [None]:
#|hide
import nbdev; nbdev.nbdev_export()