# Question answering with LLM

In [None]:
#|default_exp musique.qa

In [None]:
#|hide
from fastcore.test import *
from nbdev.showdoc import *

In [None]:
#|hide
from dotenv import load_dotenv
load_dotenv()

True

In [None]:
#|export
import openai

from bellek.logging import get_logger

log = get_logger(__name__)

In [None]:
context = """Sings America is an album released by David Hasselhoff in August 2004 (see 2004 in music). The album contains covers of songs originally made famous by artists such as Elvis Presley, The Beach Boys, Glen Campbell, Burt Bacharach and Madonna. The German release contains a bonus track, "More Than Words Can Say", which is the only original Hasselhoff composition on the album (written in conjunction with Wade Hubbard and Glenn Morrow)."""

question = "Which artist, known for covering songs by The Beach Boys and Glen Campbell in an album from 2004, included his own original song in the German version of this album?"

answer = "David Hasselhoff"

In [None]:
#|export

USER_PROMPT = """The context information is provided below.
---------------------
{context}
---------------------
Given the context information and not prior knowledge, answer the question.
{question}
"""

In [None]:
#|export

EXAMPLE_CONTEXT = """
Glenhis Hernández (born 7 October 1990 in Havana) is a taekwondo practitioner from Cuba. She was the 2013 World
Champion in middleweight.

The current mayor of Havana ("President of the People's Power Provincial Assembly") is Marta Hernández Romero, she
was elected on March 5, 2011.
""".strip()

EXAMPLE_QUESTION = "Who is the current mayor of the city Glenhis Hernández was born?"


### Standard prompt

In [None]:
#|export

SYSTEM_PROMPT_STANDARD = """
You are an excellent question-answering system known for providing accurate and reliable answers. Your responses should be solely based on the context information given, without drawing on prior knowledge. 

# Output format
Answer: [answer in 2-4 words]
""".strip()

def answer_question_standard(
    context: str,
    question: str,
    model_name: str = "gpt-3.5-turbo",
    completion_kwargs: dict | None = None,
    client = None
) -> dict:
    
    if client is None:
        client = openai.Client()
    
    completion_kwargs = completion_kwargs or {}
    messages = [
        {
            "role": "system",
            "content": SYSTEM_PROMPT_STANDARD,
        },
        {
            "role": "user",
            "content": USER_PROMPT.format(context=context, question=question),
        },
    ]
    chat_completion = client.chat.completions.create(
            model=model_name,
            messages=messages,
            **completion_kwargs,
        )
    generation = chat_completion.choices[0].message.content
    if "Answer:" not in generation:
        return dict(answer="", generation=generation)
    answer = generation.split("Answer: ")[1].strip()
    return dict(answer=answer, generation=generation)

In [None]:
result = answer_question_standard(context, question)
print(result['answer'])
print(result['generation'])

David Hasselhoff
Answer: David Hasselhoff


### Chain-of-thought prompt

In [None]:
#|export

SYSTEM_PROMPT_COT = """
You are an excellent question-answering system known for providing accurate and reliable answers. Your responses should be solely based on the context information given, without drawing on prior knowledge. Always provide clear and logical step-by-step reasoning in your response.

# Output format
Reasoning: [Step-by-step reasoning for the answer.]
Answer: [answer in 2-4 words]
""".strip()

def answer_question_cot(
    context: str,
    question: str,
    model_name: str = "gpt-3.5-turbo",
    completion_kwargs: dict | None = None,
    client = None
) -> dict:

    if client is None:
        client = openai.Client()
    
    completion_kwargs = completion_kwargs or {}
    messages = [
        {
            "role": "system",
            "content": SYSTEM_PROMPT_COT,
        },
        {
            "role": "user",
            "content": USER_PROMPT.format(context=context, question=question),
        },
    ]
    chat_completion = client.chat.completions.create(
            model=model_name,
            messages=messages,
            **completion_kwargs,
        )
    generation = chat_completion.choices[0].message.content
    # Parse the response
    answer = ""
    reasoning = ""
    for line in generation.splitlines():
        if line.startswith("Answer:"):
            answer = line.split("Answer:")[1].strip()
        else:
            reasoning += line.replace("Reasoning:", "").strip()
    return dict(reasoning=reasoning, answer=answer, generation=generation)

In [None]:
result = answer_question_cot(context, question)
print(result['reasoning'])
print(result['answer'])
print(result['generation'])

1. The artist known for covering songs by The Beach Boys and Glen Campbell in an album from 2004 is David Hasselhoff.2. The German release of the album "Sings America" by David Hasselhoff contains a bonus track, "More Than Words Can Say", which is the only original Hasselhoff composition on the album.
David Hasselhoff
Reasoning: 
1. The artist known for covering songs by The Beach Boys and Glen Campbell in an album from 2004 is David Hasselhoff.
2. The German release of the album "Sings America" by David Hasselhoff contains a bonus track, "More Than Words Can Say", which is the only original Hasselhoff composition on the album.

Answer: David Hasselhoff


In [None]:
#|export

SYSTEM_PROMPT_COT_FS = """You are an excellent question-answering system known for providing accurate and reliable answers. Your responses should be solely based on the context information given, without drawing on prior knowledge. Always provide clear and logical step-by-step reasoning in your response.

# Output format
Reasoning: [Step-by-step reasoning for the answer.]
Answer: [answer in 2-4 words]
"""

EXAMPLE_COT_RESPONSE = """
Reasoning:
1. The context states that Glenhis Hernández was born in Havana.
2. It also mentions that the current mayor (President of the People's Power Provincial Assembly) of Havana is Marta Hernández Romero.
3. Marta Hernández Romero has been the mayor since her election on March 5, 2011.

Answer: Marta Hernández Romero
""".strip()

def answer_question_cot_fs(
    context: str,
    question: str,
    model_name: str = "gpt-3.5-turbo",
    completion_kwargs: dict | None = None,
    client = None
) -> dict:

    if client is None:
        client = openai.Client()
    
    completion_kwargs = completion_kwargs or {}
    messages = [
        {
            "role": "system",
            "content": SYSTEM_PROMPT_COT_FS,
        },
        {
            "role": "user",
            "content": USER_PROMPT.format(context=EXAMPLE_CONTEXT, question=EXAMPLE_QUESTION),
        },
        {
            "role": "assistant",
            "content": EXAMPLE_COT_RESPONSE
        },
        {
            "role": "user",
            "content": USER_PROMPT.format(context=context, question=question),
        },
    ]
    chat_completion = client.chat.completions.create(
            model=model_name,
            messages=messages,
            **completion_kwargs,
        )
    generation = chat_completion.choices[0].message.content
    # Parse the response
    answer = ""
    reasoning = ""
    for line in generation.splitlines():
        if line.startswith("Answer:"):
            answer = line.split("Answer:")[1].strip()
        else:
            reasoning += line.replace("Reasoning:", "") + "\n"
    return dict(reasoning=reasoning.strip(), answer=answer, generation=generation)

In [None]:
result = answer_question_cot_fs(context, question)
print(result['reasoning'])
print(result['answer'])
print(result['generation'])

1. The context mentions that David Hasselhoff released an album called "Sings America" in August 2004.
2. The album contains covers of songs originally made famous by artists such as Elvis Presley, The Beach Boys, Glen Campbell, Burt Bacharach, and Madonna.
3. The German release of the album includes a bonus track, "More Than Words Can Say", which is the only original composition by David Hasselhoff along with Wade Hubbard and Glenn Morrow.
David Hasselhoff
Reasoning:
1. The context mentions that David Hasselhoff released an album called "Sings America" in August 2004.
2. The album contains covers of songs originally made famous by artists such as Elvis Presley, The Beach Boys, Glen Campbell, Burt Bacharach, and Madonna.
3. The German release of the album includes a bonus track, "More Than Words Can Say", which is the only original composition by David Hasselhoff along with Wade Hubbard and Glenn Morrow.

Answer: David Hasselhoff


### Connect-the-Entities prompt

In [None]:
#|export

SYSTEM_PROMPT_CTE = """
You are an excellent question-answering system known for providing accurate and reliable answers. Your responses should be solely based on the context information given, without drawing on prior knowledge.

Before answering the question, first, you extract relevant entity-relation-entity triplets from the context. Then, you answer the question based on the triplets.

# Output format
Triplets: [A list of entity-relation-entity triplets extracted from the context.]
Answer: [answer in 2-4 words]
""".strip()

FEW_SHOT_EXAMPLES_CTE = [
    {
        "context": 'Glenhis Hernández (born 7 October 1990 in Havana) is a taekwondo practitioner from Cuba. She was the 2013 World\nChampion in middleweight.\n\nThe current mayor of Havana ("President of the People\'s Power Provincial Assembly") is Marta Hernández Romero, she\nwas elected on March 5, 2011.',
        "question": "Who is the current mayor of Havana?",
        "generation": "Triplets: \nGlenhis Hernández | birth place | Havana\nMarta Hernández Romero | serves as | mayor of Havana\n\nAnswer: Marta Hernández Romero",
    },
    {
        "context": "# Andrzej Sławiński\nAndrzej Sławiński (born July 31, 1951 in Warsaw) is a Polish economist and Professor of Economics at the Warsaw School of Economics. He is a member of the Council of Monetary Policies since 2004 and a fellow of Collegium Invisibile.\n# Warsaw\nLegislative power in Warsaw is vested in a unicameral Warsaw City Council (Rada Miasta), which comprises 60 members. Council members are elected directly every four years. Like most legislative bodies, the City Council divides itself into committees which have the oversight of various functions of the city government. Bills passed by a simple majority are sent to the mayor (the President of Warsaw), who may sign them into law. If the mayor vetoes a bill, the Council has 30 days to override the veto by a two-thirds majority vote.",
        "question": "Who in the birthplace of Andrzej Sławiński has the power of legislative action?",
        "generation": "Triplets:\nAndrzej Sławiński | born in | Warsaw\nWarsaw City Council | legislative power | Warsaw\n\nAnswer: Warsaw City Council",
    },
]

def answer_question_cte(
    context: str,
    question: str,
    model_name: str = "gpt-3.5-turbo",
    completion_kwargs: dict | None = None,
    client=None,
    examples: list = FEW_SHOT_EXAMPLES_CTE,
) -> dict:
    if client is None:
        client = openai.Client()

    completion_kwargs = completion_kwargs or {}
    
    # Prepare the messages
    messages = [
        {
            "role": "system",
            "content": SYSTEM_PROMPT_CTE,
        },
    ]
    for example in examples:
        messages.append(
            {
                "role": "user",
                "content": USER_PROMPT.format(context=example["context"], question=example["question"]),
            }
        )
        messages.append(
            {
                "role": "assistant",
                "content": example["generation"],
            }
        )
    messages.append(
        {
            "role": "user",
            "content": USER_PROMPT.format(context=context, question=question),
        },
    )
    
    # Generate the response
    chat_completion = client.chat.completions.create(
        model=model_name,
        messages=messages,
        **completion_kwargs,
    )
    generation = chat_completion.choices[0].message.content
    
    # Parse the response
    answer = ""
    triplets = []
    for line in generation.splitlines():
        if line.startswith("Answer:"):
            answer = line.split("Answer:")[1].strip()
        elif "|" in line:
            triplets.append(line.strip())
    return dict(triplets=triplets, answer=answer, generation=generation)

In [None]:
result = answer_question_cte(context, question)
print(result['triplets'])
print(result['answer'])
print(result['generation'])

['David Hasselhoff | released | Sings America', 'David Hasselhoff | covers | songs by The Beach Boys and Glen Campbell', 'David Hasselhoff | includes | original song in German album']
David Hasselhoff
Triplets:
David Hasselhoff | released | Sings America
David Hasselhoff | covers | songs by The Beach Boys and Glen Campbell
David Hasselhoff | includes | original song in German album

Answer: David Hasselhoff


In [None]:
#|hide
import nbdev; nbdev.nbdev_export()