# Demo - Context Relevance Intrinsic

This notebook demonstrates some examples of using the [Granite context relevance intrinsic](https://huggingface.co/generative-computing/rag-intrinsics-lib/tree/main/context_relevance/README.md). It uses the shared IO processing code for intrinsics when performing model inference with an OpenAI-compatible backend such as vLLM.

In [None]:
# Imports go in this cell
import openai
import granite_common
from granite_common import ChatCompletion
from granite_common.intrinsics.constants import BASE_MODEL_TO_CANONICAL_NAME

## Constants

In [None]:
intrinsic_name = "context_relevance"
base_model_name = "ibm-granite/granite-3.3-8b-instruct"

# Change the following two constants as needed to reflect the location of the
# inference server.
openai_base_url = "http://localhost:55555/v1"
openai_api_key = "rag_intrinsics_1234"

In [None]:
base_messages = [
    {
        "role": "assistant",
        "content": "I'm here to help you prepare for your job interview!",
    },
    {
        "role": "user",
        "content": "I have a job interview next week for a marketing manager position.",
    },
    {
        "role": "assistant",
        "content": (
            "Congratulations! Marketing manager is an exciting role. "
            "How are you feeling about it?"
        ),
    },
    {
        "role": "user",
        "content": (
            "I'm nervous because I haven't interviewed in years, "
            "and this is a big career move for me."
        ),
    },
    {
        "role": "assistant",
        "content": (
            "It's natural to feel nervous, but preparation will help "
            "boost your confidence."
        ),
    },
    {
        "role": "user",
        "content": (
            "What should I expect them to ask about my experience with "
            "social media campaigns as a marketing manager?"
        ),
    },
]

## Instantiate IO processing classes

In [None]:
# Fetch IO configuration file from Hugging Face Hub
io_yaml_file = granite_common.intrinsics.util.obtain_io_yaml(
    intrinsic_name, BASE_MODEL_TO_CANONICAL_NAME[base_model_name]
)

print(
    f"Instantiating input and output processing from configuration file:\n"
    f"{io_yaml_file}"
)

intrinsics_rewriter = granite_common.IntrinsicsRewriter(config_file=io_yaml_file)
intrinsincs_result_processor = granite_common.IntrinsicsResultProcessor(
    config_file=io_yaml_file
)

## Relevant Document Context Relevance Check

In [None]:
# Create an example chat completion with a user question and two documents.
chat_input = ChatCompletion.model_validate(
    {
        "messages": base_messages,
        "extra_body": {
            "documents": [],
        },
        "model": base_model_name,
        "temperature": 0.0,
    }
)

intrinsics_kwargs = {
    "document_content": "Marketing manager interviews often focus on \
campaign experience and measurable results. \
Expect questions about social media ROI, audience engagement metrics, and \
conversion rates. \
Prepare specific examples of campaigns you've managed, including budget, \
timeline, and outcomes. \
Interviewers may ask about your experience with different social media \
platforms and their unique audiences. \
Be ready to discuss how you measure campaign success and adjust strategies \
based on performance data. \
Knowledge of current social media trends and emerging platforms demonstrates \
industry awareness.",
}

print(chat_input.model_dump_json(indent=2))

In [None]:
# Run request through input processing
intrinsics_input = chat_input.model_copy(deep=True)
intrinsics_input.model = intrinsic_name

intrinsics_request = intrinsics_rewriter.transform(
    intrinsics_input, **intrinsics_kwargs
)
print(intrinsics_request.model_dump_json(indent=2))

## Run inference

In [None]:
# Connect to the inference server
client = openai.OpenAI(base_url=openai_base_url, api_key=openai_api_key)

In [None]:
# Pass our rewritten request directly to `chat.completions.create()`
intrinsics_completion = client.chat.completions.create(
    **intrinsics_request.model_dump()
)

print(intrinsics_request.messages[-1].content)
print(intrinsics_completion.choices[0].message.content)

## Post-process inference results

In [None]:
processed_chat_completion = intrinsincs_result_processor.transform(
    intrinsics_completion, intrinsics_request
)

print("After post-processing, first completion is:")
print(processed_chat_completion.choices[0].model_dump_json(indent=2))

## Partially Relevant Context Relevance Check