# Question answering with LLM

In [None]:
#|default_exp qa.jerxrm

In [None]:
#|hide
from fastcore.test import *
from nbdev.showdoc import *

In [None]:
#|hide
from dotenv import load_dotenv
load_dotenv()

True

In [None]:
#|export
import magentic
from pydantic import BaseModel, Field

from bellek.logging import get_logger

log = get_logger(__name__)

In [None]:
#|export
DEFAULT_MODEL = magentic.OpenaiChatModel("gpt-3.5-turbo", temperature=0.1)

In [None]:
#|export

USER_PROMPT = """The context information below is provided.
---------------------
{context}
---------------------
Given the context information and not prior knowledge, answer the question.
{question}
"""

In [None]:
#|export

SYSTEM_PROMPT = """You are an excellent question-answering system known for providing accurate and reliable answers. Your responses should be solely based on the information given, without drawing on prior knowledge.

Guidelines:
1. Do not explicitly mention or refer to the provided information in your answers.
2. Avoid statements like 'Based on the context, ...' or 'The context information ...' or anything along those lines.
"""

class QuestionAnsweringResult(BaseModel):
    """Data model for answering the question."""

    answer: str = Field(description="The answer to the question in 2-4 words.")


@magentic.chatprompt(
    magentic.SystemMessage(SYSTEM_PROMPT), 
    magentic.UserMessage(USER_PROMPT),
    model=DEFAULT_MODEL,
)
def answer_question(
    context: str,
    question: str,
) -> QuestionAnsweringResult: ...

In [None]:
#|export

SYSTEM_PROMPT_REASONING = """You are an excellent question-answering system known for providing accurate and reliable answers. Your responses should be solely based on the information given, without drawing on prior knowledge.

Guidelines:
1. Do not explicitly mention or refer to the provided information in your answers.
2. Avoid statements like 'Based on the context, ...' or 'The context information ...' or anything along those lines.
3. Always provide clear and logical step-by-step reasoning in your answers.
"""

class QuestionAnsweringResultWithReasoning(BaseModel):
    """Data model for answering the question."""

    reasoning: str = Field(description="Step-by-step reasoning for the answer.")
    answer: str = Field(description="The answer to the question in 2-4 words.")


@magentic.chatprompt(
    magentic.SystemMessage(SYSTEM_PROMPT_REASONING), 
    magentic.UserMessage(USER_PROMPT),
    model=DEFAULT_MODEL,
)
def answer_question_with_reasoning(
    context: str,
    question: str,
) -> QuestionAnsweringResultWithReasoning: ...

In [None]:
#|export

SYSTEM_PROMPT_WITH_TRIPLETS = """You are an excellent question-answering system known for providing accurate and reliable answers. Your responses should be solely based on the information given, without drawing on prior knowledge.

Guidelines:
1. Do not explicitly mention or refer to the provided information in your answers.
2. Avoid statements like 'Based on the context, ...' or 'The context information ...' or anything along those lines.

Before answering the question, first, you extract relevant entity-relation-entity triplets from the context. Then, you answer the question based on the triplets. For instance, 

# Example
Context: "Glenhis Hernández (born 7 October 1990 in Havana) is a taekwondo practitioner from Cuba. She was the 2013 World
Champion in middleweight.

The current mayor of Havana ("President of the People's Power Provincial Assembly") is Marta Hernández Romero, she
was elected on March 5, 2011."

Question: "Who is the current mayor of Havana?"

Triplets: "Glenhis Hernández (Athlete) | born on | October 7, 1990
Glenhis Hernández (Athlete) | birth place | Havana
Glenhis Hernández (Athlete) | specializes in | taekwondo
Glenhis Hernández (Athlete) | won | 2013 World Champion title (Middleweight)
Marta Hernández Romero (Politician) | serves as | mayor of Havana
Marta Hernández Romero (Politician) | holds | the position of "President of the People's Power Provincial Assembly"
Marta Hernández Romero (Politician) | elected on | March 5, 2011."

Answer: "Marta Hernández Romero"

"""

class _QuestionAnsweringResultWithTriplets(BaseModel):
    """Data model for answering the question."""

    triplets: list[str] = Field(description="A list of entity-relation-entity triplets extracted from the context.")
    answer: str = Field(description="The answer to the question in 2-4 words.")

class QuestionAnsweringResultWithTriplets(_QuestionAnsweringResultWithTriplets):
    reasoning: str = ""

@magentic.chatprompt(
    magentic.SystemMessage(SYSTEM_PROMPT_WITH_TRIPLETS), 
    magentic.UserMessage(USER_PROMPT),
    model=DEFAULT_MODEL,
)
def _answer_question_with_triplets(
    context: str,
    question: str,
) -> _QuestionAnsweringResultWithTriplets: ...

def answer_question_with_triplets(
    context: str,
    question: str,
) -> QuestionAnsweringResultWithTriplets: 
    result = _answer_question_with_triplets(context, question)
    return QuestionAnsweringResultWithTriplets(triplets=result.triplets, answer=result.answer)

In [None]:
#|export

SYSTEM_PROMPT_REASONING_WITH_TRIPLETS = """You are an excellent question-answering system known for providing accurate and reliable answers. Your responses should be solely based on the information given, without drawing on prior knowledge.

Guidelines:
1. Do not explicitly mention or refer to the provided information in your answers.
2. Avoid statements like 'Based on the context, ...' or 'The context information ...' or anything along those lines.
3. Always provide clear and logical step-by-step reasoning in your answers.

Before answering the question, first, you extract relevant entity-relation-entity triplets from the context. Then, you answer the question based on the triplets. For instance, 

# Example
Context: "Glenhis Hernández (born 7 October 1990 in Havana) is a taekwondo practitioner from Cuba. She was the 2013 World
Champion in middleweight.

The current mayor of Havana ("President of the People's Power Provincial Assembly") is Marta Hernández Romero, she
was elected on March 5, 2011."

Question: "Who is the current mayor of Havana?"

Triplets: "Glenhis Hernández (Athlete) | born on | October 7, 1990
Glenhis Hernández (Athlete) | birth place | Havana
Glenhis Hernández (Athlete) | specializes in | taekwondo
Glenhis Hernández (Athlete) | won | 2013 World Champion title (Middleweight)
Marta Hernández Romero (Politician) | serves as | mayor of Havana
Marta Hernández Romero (Politician) | holds | the position of "President of the People's Power Provincial Assembly"
Marta Hernández Romero (Politician) | elected on | March 5, 2011."

Reasoning: There is a triplet "Marta Hernández Romero (Politician) | serves as | mayor of Havana" that can be used for answering the question.

Answer: "Marta Hernández Romero"
"""

class QuestionAnsweringResultReasoningWithTriplets(BaseModel):
    """Data model for answering the question."""

    triplets: list[str] = Field(description="A list of entity-relation-entity triplets extracted from the context.")
    reasoning: str = Field(description="Step-by-step reasoning for the answer.")
    answer: str = Field(description="The answer to the question in 2-4 words.")


@magentic.chatprompt(
    magentic.SystemMessage(SYSTEM_PROMPT_REASONING_WITH_TRIPLETS), 
    magentic.UserMessage(USER_PROMPT),
    model=DEFAULT_MODEL,
)
def answer_question_reasoning_with_triplets(
    context: str,
    question: str,
) -> QuestionAnsweringResultReasoningWithTriplets: ...

In [None]:
context = """Sings America is an album released by David Hasselhoff in August 2004 (see 2004 in music). The album contains covers of songs originally made famous by artists such as Elvis Presley, The Beach Boys, Glen Campbell, Burt Bacharach and Madonna. The German release contains a bonus track, "More Than Words Can Say", which is the only original Hasselhoff composition on the album (written in conjunction with Wade Hubbard and Glenn Morrow)."""
question = "Who performed Sings America?"
answer = "David Hasselhoff"

In [None]:
result = answer_question(context=context, question=question)
print(result.answer)

David Hasselhoff


In [None]:
result = answer_question_with_reasoning(context=context, question=question)
print(result.answer)
print()
print(result.reasoning)

David Hasselhoff

The album 'Sings America' was released by David Hasselhoff in August 2004, as mentioned in the context information.


In [None]:
result = answer_question_reasoning_with_triplets(context=context, question=question)
print(result.answer)
print()
for triplet in result.triplets:
    print(triplet)
print()
print(result.reasoning)

David Hasselhoff

David Hasselhoff (Musician) | released | Sings America album in August 2004
David Hasselhoff (Musician) | covers songs by | Elvis Presley, The Beach Boys, Glen Campbell, Burt Bacharach, and Madonna
David Hasselhoff (Musician) | composed | More Than Words Can Say (bonus track)

The triplets extracted from the context indicate that David Hasselhoff released the album 'Sings America' in August 2004, which contains covers of songs by various artists and includes a bonus track composed by him.


In [None]:
result = answer_question_with_triplets(context=context, question=question)
print(result.answer)
print()
for triplet in result.triplets:
    print(triplet)
print()
print(result.reasoning)

David Hasselhoff

David Hasselhoff (Musician) | released | Sings America album
David Hasselhoff (Musician) | released in | August 2004
David Hasselhoff (Musician) | album contains | covers of songs by various artists including Elvis Presley, The Beach Boys, Glen Campbell, Burt Bacharach, and Madonna
David Hasselhoff (Musician) | album contains | bonus track 'More Than Words Can Say' (original composition by David Hasselhoff, Wade Hubbard, and Glenn Morrow)




In [None]:
#|hide
import nbdev; nbdev.nbdev_export()