# Question answering with LLM

In [None]:
#|default_exp musique.qa

In [None]:
#|hide
from fastcore.test import *
from nbdev.showdoc import *

In [None]:
#|hide
from dotenv import load_dotenv

load_dotenv()

True

In [None]:
#|export
import openai

from bellek.logging import get_logger

log = get_logger(__name__)

In [None]:
# |export

DEFAULT_MODEL = "gpt-3.5-turbo"
# DEFAULT_MODEL = "gpt-4-turbo"
DEFAULT_COMPLETION_KWARGS = {"temperature": 0.1}

In [None]:
#|export

FEW_SHOT_EXAMPLES = [
    {
        "id": "2hop__784447_126070",
        "context": 'Glenhis Hernández (born 7 October 1990 in Havana) is a taekwondo practitioner from Cuba. She was the 2013 World\nChampion in middleweight.\n\nThe current mayor of Havana ("President of the People\'s Power Provincial Assembly") is Marta Hernández Romero, she\nwas elected on March 5, 2011.',
        "question": "Who is the current mayor of Havana?",
        "cte_generation": "Triplets: \nGlenhis Hernández | birth place | Havana\nMarta Hernández Romero | serves as | mayor of Havana\n\nAnswer: Marta Hernández Romero",
        "cot_generation": "Reasoning:\n- The context provides that Glenhis Hernández was born in Havana.\n- The context also specifies that the current mayor of Havana is Marta Hernández Romero, who was elected on March 5, 2011.\n- Since there is no information indicating a change in mayoral leadership since that election, it can be inferred that Marta Hernández Romero remains the mayor.\n",
    },
]

In [None]:
test_context = """Sings America is an album released by David Hasselhoff in August 2004 (see 2004 in music). The album contains covers of songs originally made famous by artists such as Elvis Presley, The Beach Boys, Glen Campbell, Burt Bacharach and Madonna. The German release contains a bonus track, "More Than Words Can Say", which is the only original Hasselhoff composition on the album (written in conjunction with Wade Hubbard and Glenn Morrow)."""

test_question = "Which artist, known for covering songs by The Beach Boys and Glen Campbell in an album from 2004, included his own original song in the German version of this album?"

test_answer = "David Hasselhoff"

In [None]:
#|export

USER_PROMPT = """The context information is provided below.
---------------------
{context}
---------------------
Given the context information and not prior knowledge, answer the question.
{question}
"""

In [None]:
#|export

EXAMPLE_CONTEXT = """
Glenhis Hernández (born 7 October 1990 in Havana) is a taekwondo practitioner from Cuba. She was the 2013 World
Champion in middleweight.

The current mayor of Havana ("President of the People's Power Provincial Assembly") is Marta Hernández Romero, she
was elected on March 5, 2011.
""".strip()

EXAMPLE_QUESTION = "Who is the current mayor of the city Glenhis Hernández was born?"


### Standard prompt

In [None]:
#|export

SYSTEM_PROMPT_STANDARD = """
You are an excellent question-answering system known for providing accurate and reliable answers. Your responses should be solely based on the context information given, without drawing on prior knowledge. 

# Output format
Answer: [answer in 2-4 words]
""".strip()

def answer_question_standard(
    context: str,
    question: str,
    model_name: str = DEFAULT_MODEL,
    completion_kwargs: dict | None = None,
    client = None
) -> dict:
    
    if client is None:
        client = openai.Client()
    
    if completion_kwargs is None: 
        completion_kwargs = DEFAULT_COMPLETION_KWARGS
    
    # Prepare the messages
    messages = [
        {
            "role": "system",
            "content": SYSTEM_PROMPT_STANDARD,
        },
        {
            "role": "user",
            "content": USER_PROMPT.format(context=context, question=question),
        },
    ]
    chat_completion = client.chat.completions.create(
            model=model_name,
            messages=messages,
            **completion_kwargs,
        )
    generation = chat_completion.choices[0].message.content
    parts = generation.split("Answer:")
    if len(parts) < 2:
        return dict(answer="", generation=generation)
    answer = parts[1].strip()
    return dict(answer=answer, generation=generation)

In [None]:
result = answer_question_standard(test_context, test_question)
print(result['generation'])
print(result['answer'])

Answer: David Hasselhoff
David Hasselhoff


### Chain-of-thought prompt

In [None]:
# |export

SYSTEM_PROMPT_COT_FS = """You are an excellent question-answering system known for providing accurate and reliable answers. Your responses should be solely based on the context information given, without drawing on prior knowledge. Always provide clear and logical step-by-step reasoning in your response.

# Output format
Reasoning: [Step-by-step reasoning for the answer.]
Answer: [answer in 2-4 words]
"""

def answer_question_cot_fs(
    context: str,
    question: str,
    examples: list[dict] = FEW_SHOT_EXAMPLES,
    model_name: str = DEFAULT_MODEL,
    completion_kwargs: dict | None = None,
    client=None,
) -> dict:
    if client is None:
        client = openai.Client()

    if completion_kwargs is None:
        completion_kwargs = DEFAULT_COMPLETION_KWARGS

    # Prepare the messages
    messages = [
        {
            "role": "system",
            "content": SYSTEM_PROMPT_COT_FS,
        },
    ]
    for example in examples:
        messages.append(
            {
                "role": "user",
                "content": USER_PROMPT.format(context=example["context"], question=example["question"]),
            }
        )
        messages.append({"role": "assistant", "content": example["cot_generation"]})

    messages.append(
        {
            "role": "user",
            "content": USER_PROMPT.format(context=context, question=question),
        },
    )

    chat_completion = client.chat.completions.create(
        model=model_name,
        messages=messages,
        **completion_kwargs,
    )
    generation = chat_completion.choices[0].message.content
    # Parse the response
    answer = ""
    reasoning = ""
    for line in generation.splitlines():
        if line.startswith("Answer:"):
            answer = line.split("Answer:")[1].strip()
        else:
            reasoning += line.replace("Reasoning:", "") + "\n"
    return dict(reasoning=reasoning.strip(), answer=answer, generation=generation)

In [None]:
result = answer_question_cot_fs(test_context, test_question)
print(result['generation'])
print("="*80)
print(result['reasoning'])
print(result['answer'])

Reasoning:
- The context mentions that David Hasselhoff released an album called "Sings America" in August 2004.
- The album includes covers of songs by artists like The Beach Boys and Glen Campbell.
- The German release of the album contains a bonus track titled "More Than Words Can Say," which is the only original Hasselhoff composition on the album.
- Therefore, the artist known for covering songs by The Beach Boys and Glen Campbell in an album from 2004, who included his own original song in the German version of the album, is **David Hasselhoff**.
- The context mentions that David Hasselhoff released an album called "Sings America" in August 2004.
- The album includes covers of songs by artists like The Beach Boys and Glen Campbell.
- The German release of the album contains a bonus track titled "More Than Words Can Say," which is the only original Hasselhoff composition on the album.
- Therefore, the artist known for covering songs by The Beach Boys and Glen Campbell in an album 

In [None]:
def answer_question_cot(
    context: str,
    question: str,
    model_name: str = DEFAULT_MODEL,
    completion_kwargs: dict | None = None,
    client=None,
) -> dict:
    return answer_question_cot_fs(context, question, [], model_name, completion_kwargs, client)

In [None]:
result = answer_question_cot(test_context, test_question)
print(result['generation'])
print("="*80)
print(result['reasoning'])
print(result['answer'])

Reasoning: 
1. The artist known for covering songs by The Beach Boys and Glen Campbell in an album from 2004 is David Hasselhoff.
2. The German release of David Hasselhoff's album "Sings America" contains a bonus track, "More Than Words Can Say", which is the only original Hasselhoff composition on the album.

Answer: David Hasselhoff
1. The artist known for covering songs by The Beach Boys and Glen Campbell in an album from 2004 is David Hasselhoff.
2. The German release of David Hasselhoff's album "Sings America" contains a bonus track, "More Than Words Can Say", which is the only original Hasselhoff composition on the album.
David Hasselhoff


### Connect-the-Entities prompt

In [None]:
#|export

SYSTEM_PROMPT_CTE = """
You are an excellent question-answering system known for providing accurate and reliable answers. Your responses should be solely based on the context information given, without drawing on prior knowledge.

Before answering the question, first, you extract relevant entity-relation-entity triplets from the context. Then, you answer the question based on the triplets.

# Output format
Triplets: [A list of entity-relation-entity triplets extracted from the context.]
Answer: [answer in 2-4 words]
""".strip()

def answer_question_cte(
    context: str,
    question: str,
    examples: list[dict] = FEW_SHOT_EXAMPLES,
    model_name: str = DEFAULT_MODEL,
    completion_kwargs: dict | None = None,
    client=None,
) -> dict:
    if client is None:
        client = openai.Client()

    if completion_kwargs is None: 
        completion_kwargs = DEFAULT_COMPLETION_KWARGS
    
    # Prepare the messages
    messages = [
        {
            "role": "system",
            "content": SYSTEM_PROMPT_CTE,
        },
    ]
    for example in examples:
        messages.append(
            {
                "role": "user",
                "content": USER_PROMPT.format(context=example["context"], question=example["question"]),
            }
        )
        messages.append(
            {
                "role": "assistant",
                "content": example["cte_generation"],
            }
        )
    messages.append(
        {
            "role": "user",
            "content": USER_PROMPT.format(context=context, question=question),
        },
    )
    
    # Generate the response
    chat_completion = client.chat.completions.create(
        model=model_name,
        messages=messages,
        **completion_kwargs,
    )
    generation = chat_completion.choices[0].message.content
    
    # Parse the response
    answer = ""
    triplets = []
    for line in generation.splitlines():
        if line.startswith("Answer:"):
            answer = line.split("Answer:")[1].strip()
        elif "|" in line:
            triplets.append(line.strip())
    return dict(triplets=triplets, answer=answer, generation=generation)

In [None]:
result = answer_question_cte(test_context, test_question)
print(result['generation'])
print("="*80)
print(result['triplets'])
print(result['answer'])

Triplets:
David Hasselhoff | released | Sings America
David Hasselhoff | covered songs by | The Beach Boys
David Hasselhoff | covered songs by | Glen Campbell
David Hasselhoff | included | original song "More Than Words Can Say" in German release

Answer: David Hasselhoff
['David Hasselhoff | released | Sings America', 'David Hasselhoff | covered songs by | The Beach Boys', 'David Hasselhoff | covered songs by | Glen Campbell', 'David Hasselhoff | included | original song "More Than Words Can Say" in German release']
David Hasselhoff


In [None]:
#|hide
import nbdev; nbdev.nbdev_export()