# QA reward model JERX task

In [None]:
#|default_exp jerx.reward.qa

In [None]:
#|hide
from fastcore.test import *
from nbdev.showdoc import *

In [None]:
#|export
import os

from openai import BadRequestError, OpenAI
from pydantic import BaseModel, Field

from bellem.logging import get_logger
from bellem.text.utils import fuzzy_match

log = get_logger(__name__)

In [None]:
#|hide
from dotenv import load_dotenv
load_dotenv()

True

In [None]:
#|export

import json

DEFAULT_SYSTEM_PROMPT = """You are an excellent Q&A system that is trusted around the world. You are given a question that requires multi-hop reasoning. Always answer the question using the provided context information, and not prior knowledge.

Some rules to follow:
1. Never directly reference the given context in your answer.
2. Avoid statements like 'Based on the context, ...' or 'The context information ...' or anything along those lines.

Output format:
Your output must be a single line in JSON such as:
{"reasoning": "Provide step by step multi-hop reasoning for the answer.", "answer": "Provide the final answer in 2-4 words."}
"""

USER_PROMPT = """The context information below is provided as a set of entity-relation-entity triplets from knowledge graph.
---------------------
{context}
---------------------
Given the context information and not prior knowledge, answer the question.
{question}
"""


class QuestionAnsweringResult(BaseModel):
    """Data model for answering the question."""

    reasoning: str = Field(description="Multi-hop reasoning for the answer.")
    answer: str = Field(description="The answer to the question in 2-4 words.")
    raw_output: str = Field(description="The raw output from the model.")


def make_question_answer_func(
    model_name: str = "gpt-3.5-turbo",
    client: OpenAI = None,
    completion_kwargs: dict | None = None,
):
    if client is None:
        client = OpenAI()

    if completion_kwargs is None:
        completion_kwargs = {}

    def func(context: str, question: str) -> QuestionAnsweringResult:
        messages = [
            {
                "role": "system",
                "content": DEFAULT_SYSTEM_PROMPT,
            },
            {
                "role": "user",
                "content": USER_PROMPT.format(context=context, question=question),
            },
        ]
        chat_completion = client.chat.completions.create(
            model=model_name,
            messages=messages,
            **completion_kwargs,
        )
        text = chat_completion.choices[0].message.content
        try:
            output = json.loads(text)
        except json.JSONDecodeError:
            log.error("Failed to decode the JSON output: %s", text)
            output = {}
        return QuestionAnsweringResult(
            answer=output.get("answer", "N/A"),
            reasoning=output.get("reasoning", ""),
            raw_output=text,
        )

    return func

In [None]:
#|export

class RewardAssessment(QuestionAnsweringResult):
    reward: float = Field(description="The reward value for the answer.")

def make_qa_reward_func(model_name: str = "gpt-3.5-turbo", answer_comparator=fuzzy_match, completion_kwargs: dict | None = None):
    qa = make_question_answer_func(model_name, completion_kwargs=completion_kwargs)

    def reward(context: str, question: str, answers: list[str]) -> RewardAssessment:
        try:
            qa_result = qa(context, question)
        except BadRequestError as e:
            log.warning(f"Failed to assess generation: {e}")
            return RewardAssessment(answer="", reasoning=str(e), reward=0.0)
        correct = any(answer_comparator(qa_result.answer, answer) for answer in answers)
        reward = 1.0 if correct else 0.0
        return RewardAssessment(**qa_result.dict(), reward=reward)

    return reward

In [None]:
triplets = [ "Dominica | first competed at | Olympic Games in 1996", "Dominica | has participated in | each Games since then", "Dominica | has not won | any medals at the Olympic Games" ]

context = "\n".join(triplets)
question = "When did the country where the River Quanery is found first compete in Olympic games?"
answer = "1996"

qa = make_question_answer_func()
qa(context=context, question=question)

QuestionAnsweringResult(reasoning='1. Dominica first competed at Olympic Games in 1996. 2. River Quanery is found in Dominica.', answer='1996', raw_output='{"reasoning": "1. Dominica first competed at Olympic Games in 1996. 2. River Quanery is found in Dominica.", "answer": "1996"}')

In [None]:
reward_func = make_qa_reward_func("gpt-3.5-turbo")
result = reward_func(context, question, [answer])
assert result.reward == 1

In [None]:
reward_func = make_qa_reward_func("gpt-4-turbo")
result = reward_func(context, question, [answer])
assert result.reward == 1

In [None]:
#|hide
import nbdev; nbdev.nbdev_export()