# Demo - Query Rewrite

This notebook demonstrates some examples of using the Granite query rewrite intrinsic. It uses the shared IO processing code for intrinsics when performing model inference with an OpenAI-compatible backend such as vLLM.

In [None]:
# Imports go in this cell
import openai
import granite_common
from granite_common import ChatCompletion
from granite_common.intrinsics.constants import BASE_MODEL_TO_CANONICAL_NAME

## Constants

In [None]:
intrinsic_name = "query_rewrite"
base_model_name = "ibm-granite/granite-3.3-8b-instruct"

# Change the following two constants as needed to reflect the location of the
# inference server.
openai_base_url = "http://localhost:55555/v1"
openai_api_key = "rag_intrinsics_1234"

## Instantiate IO processing classes

In [None]:
# Fetch IO configuration file from Hugging Face Hub
io_yaml_file = granite_common.intrinsics.util.obtain_io_yaml(
    intrinsic_name, BASE_MODEL_TO_CANONICAL_NAME[base_model_name]
)

print(
    f"Instantiating input and output processing from configuration file:\n"
    f"{io_yaml_file}"
)

intrinsics_rewriter = granite_common.IntrinsicsRewriter(config_file=io_yaml_file)
intrinsincs_result_processor = granite_common.IntrinsicsResultProcessor(
    config_file=io_yaml_file
)

## Perform input processing

In [None]:
# Create an example chat completion with a user question and two documents.
chat_input = ChatCompletion.model_validate(
    {
        "messages": [
            {"role": "assistant", "content": "Welcome to pet questions!"},
            {
                "role": "user",
                "content": "I have two pets, a dog named Rex and a cat named Lucy.",
            },
            {
                "role": "assistant",
                "content": "Great, what would you like to share about them?",
            },
            {
                "role": "user",
                "content": "Rex spends a lot of time in the backyard and outdoors, "
                "and Luna is always inside.",
            },
            {
                "role": "assistant",
                "content": "Sounds good! Rex must love exploring outside, while Lucy "
                "probably enjoys her cozy indoor life.",
            },
            {
                "role": "user",
                "content": "But is he more likely to get fleas because of that?",
            },
        ],
        "model": base_model_name,
        "temperature": 0.0,
    }
)
print(chat_input.model_dump_json(indent=2))

In [None]:
# Run request through input processing
intrinsics_input = chat_input.model_copy(deep=True)
intrinsics_input.model = intrinsic_name

intrinsics_request = intrinsics_rewriter.transform(intrinsics_input)
print(intrinsics_request.model_dump_json(indent=2))

## Run inference

In [None]:
# Connect to the inference server
client = openai.OpenAI(base_url=openai_base_url, api_key=openai_api_key)

In [None]:
# Pass our rewritten request directly to `chat.completions.create()`
intrinsics_completion = client.chat.completions.create(
    **intrinsics_request.model_dump()
)

print(intrinsics_request.messages[-1].content)
print(intrinsics_completion.choices[0].message.content)

## Post-process inference results

In [None]:
processed_chat_completion = intrinsincs_result_processor.transform(
    intrinsics_completion, intrinsics_request
)

print(processed_chat_completion.choices[0].model_dump_json(indent=2))