Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions docs/examples/intrinsics/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ Detects when model outputs contain hallucinated information.
### query_rewrite.py
Rewrites queries for better retrieval or understanding.

### uncertainty.py
Estimates the model's certainty about answering a question.

### requirement_check.py
Detect if text adheres to provided requirements.

## Concepts Demonstrated

- **Intrinsic Functions**: Specialized model capabilities beyond text generation
Expand All @@ -48,7 +54,7 @@ from mellea.stdlib.components import Intrinsic
import mellea.stdlib.functional as mfuncs

# Create backend and adapter
backend = LocalHFBackend(model_id="ibm-granite/granite-3.3-8b-instruct")
backend = LocalHFBackend(model_id="ibm-granite/granite-4.0-micro")
adapter = IntrinsicAdapter("requirement_check",
base_model_name=backend.base_model_name)
backend.add_adapter(adapter)
Expand All @@ -71,9 +77,10 @@ out, new_ctx = mfuncs.act(
- **context_relevance**: Assess context-query relevance
- **hallucination_detection**: Detect hallucinated content
- **query_rewrite**: Improve query formulation
- **uncertainty**: Estimate certainty about answering a question

## Related Documentation

- See `mellea/stdlib/components/intrinsic/` for intrinsic implementations
- See `mellea/backends/adapters/` for adapter system
- See `docs/dev/intrinsics_and_adapters.md` for architecture details
- See `docs/dev/intrinsics_and_adapters.md` for architecture details
51 changes: 51 additions & 0 deletions docs/examples/intrinsics/requirement_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# pytest: huggingface, requires_heavy_ram, llm

"""Example usage of the requirement check intrinsic.

Intrinsic function that determines if the text satisfies the given requirements.

To run this script from the root of the Mellea source tree, use the command:
```
uv run python docs/examples/intrinsics/requirement_check.py
```
"""

from mellea.backends.huggingface import LocalHFBackend
from mellea.stdlib.components import Message
from mellea.stdlib.components.intrinsic import core
from mellea.stdlib.context import ChatContext

user_text = "Invite for an IBM office party."
response_text = """
Dear Team,

To celebrate our recent successes and take a well-deserved moment to recharge,
you are cordially invited to a team social. Please join us for an evening of
live music, appetizers, and drinks as we recognize our collective wins.

Event Details
* **Date:** Saturday, April 25, 2026
* **Time:** 6:00 PM
* **Location:** Ryan’s Bar, Chelsea, NY
* **Highlights:** Live entertainment and refreshments

RSVP
To ensure we have an accurate headcount for catering, please confirm your
attendance by **Friday, April 10, 2026**.

We look forward to seeing everyone there and celebrating our hard work together.

**Best regards,**
[Your Name/Management Team]
"""
requirement = "Use a professional tone."

backend = LocalHFBackend(model_id="ibm-granite/granite-4.0-micro")
context = (
ChatContext()
.add(Message("user", user_text))
.add(Message("assistant", response_text))
)

result = core.requirement_check(context, backend, requirement)
print(f"Requirements Satisfied: {result}") # float between 0.0 and 1.0
27 changes: 27 additions & 0 deletions docs/examples/intrinsics/uncertainty.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# pytest: huggingface, requires_heavy_ram, llm

"""Example usage of the uncertainty/certainty intrinsic.

Evaluates how certain the model is about its response to a user question.
The context should contain a user question followed by an assistant answer.

To run this script from the root of the Mellea source tree, use the command:
```
uv run python docs/examples/intrinsics/uncertainty.py
```
"""

from mellea.backends.huggingface import LocalHFBackend
from mellea.stdlib.components import Message
from mellea.stdlib.components.intrinsic import core
from mellea.stdlib.context import ChatContext

backend = LocalHFBackend(model_id="ibm-granite/granite-4.0-micro")
context = (
ChatContext()
.add(Message("user", "What is the square root of 4?"))
.add(Message("assistant", "The square root of 4 is 2."))
)

result = core.check_certainty(context, backend)
print(f"Certainty score: {result}")
7 changes: 5 additions & 2 deletions mellea/backends/adapters/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,19 @@ class IntriniscsCatalogEntry(pydantic.BaseModel):
)


# Mellea will update which repositories are linked as new ones come online. The original repos
# are on an older layout that will be changed.
_RAG_REPO = "ibm-granite/granite-lib-rag-r1.0"
_CORE_REPO = "ibm-granite/rag-intrinsics-lib"
_CORE_R1_REPO = "ibm-granite/granitelib-core-r1.0"


_INTRINSICS_CATALOG_ENTRIES = [
############################################
# Core Intrinsics
############################################
IntriniscsCatalogEntry(name="requirement_check", repo_id=_CORE_REPO),
IntriniscsCatalogEntry(name="uncertainty", repo_id=_CORE_REPO),
IntriniscsCatalogEntry(name="requirement-check", repo_id=_CORE_R1_REPO),
IntriniscsCatalogEntry(name="uncertainty", repo_id=_CORE_R1_REPO),
############################################
# RAG Intrinsics
############################################
Expand Down
57 changes: 57 additions & 0 deletions mellea/stdlib/components/intrinsic/_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""Shared utilities for intrinsic convenience wrappers."""

import json

from ....backends import ModelOption
from ....backends.adapters import AdapterMixin, AdapterType, IntrinsicAdapter
from ....stdlib import functional as mfuncs
from ...context import ChatContext
from .intrinsic import Intrinsic


def call_intrinsic(
intrinsic_name: str,
context: ChatContext,
backend: AdapterMixin,
/,
kwargs: dict | None = None,
):
"""Shared code for invoking intrinsics.

:returns: Result of the call in JSON format.
"""
# Adapter needs to be present in the backend before it can be invoked.
# We must create the Adapter object in order to determine whether we need to create
# the Adapter object.
base_model_name = backend.base_model_name
if base_model_name is None:
raise ValueError("Backend has no model ID")
adapter = IntrinsicAdapter(
intrinsic_name, adapter_type=AdapterType.LORA, base_model_name=base_model_name
)
if adapter.qualified_name not in backend.list_adapters():
backend.add_adapter(adapter)

# Create the AST node for the action we wish to perform.
intrinsic = Intrinsic(intrinsic_name, intrinsic_kwargs=kwargs)

# Execute the AST node.
model_output_thunk, _ = mfuncs.act(
intrinsic,
context,
backend,
model_options={ModelOption.TEMPERATURE: 0.0},
# No rejection sampling, please
strategy=None,
)

# act() can return a future. Don't know how to handle one from non-async code.
assert model_output_thunk.is_computed()

# Output of an Intrinsic action is the string representation of the output of the
# intrinsic. Parse the string.
result_str = model_output_thunk.value
if result_str is None:
raise ValueError("Model output is None.")
result_json = json.loads(result_str)
return result_json
52 changes: 52 additions & 0 deletions mellea/stdlib/components/intrinsic/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""Intrinsic functions for core model capabilities."""

from ....backends.adapters import AdapterMixin
from ...components import Message
from ...context import ChatContext
from ._util import call_intrinsic


def check_certainty(context: ChatContext, backend: AdapterMixin) -> float:
"""Estimate the model's certainty about its last response.

Intrinsic function that evaluates how certain the model is about the
assistant's response to a user's question. The context should end with
a user question followed by an assistant answer.

:param context: Chat context containing user question and assistant answer.
:param backend: Backend instance that supports LoRA/aLoRA adapters.

:return: Certainty score as a float (higher = more certain).
"""
result_json = call_intrinsic("uncertainty", context, backend)
return result_json["certainty"]


_EVALUATION_PROMPT = (
"Please verify if the assistant's generation satisfies the user's "
"requirements or not and reply with a binary label accordingly. "
'Respond with a json {"score": "yes"} if the constraints are '
'satisfied or respond with {"score": "no"} if the constraints are not '
"satisfied."
)


def requirement_check(
context: ChatContext, backend: AdapterMixin, requirement: str
) -> float:
"""Detect if text adheres to provided requirements.

Intrinsic function that determines if the text satisfies the given
requirements. Appends an evaluation prompt to the context following
the format specified by the Granite Guardian requirement checker model card.

:param context: Chat context containing user question and assistant answer.
:param backend: Backend instance that supports LoRA/aLoRA adapters.
:param requirement: set of requirements to satisfy

:return: Score as a float between 0.0 and 1.0 (higher = more likely satisfied).
"""
eval_message = f"<requirements>: {requirement}\n{_EVALUATION_PROMPT}"
context = context.add(Message("user", eval_message))
result_json = call_intrinsic("requirement-check", context, backend)
return result_json["requirement_check"]["score"]
71 changes: 10 additions & 61 deletions mellea/stdlib/components/intrinsic/rag.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
"""Intrinsic functions related to retrieval-augmented generation."""

import collections.abc
import json

from ....backends import ModelOption
from ....backends.adapters import AdapterMixin, AdapterType, IntrinsicAdapter
from ....stdlib import functional as mfuncs
from ....backends.adapters import AdapterMixin
from ...components import Document
from ...context import ChatContext
from ..chat import Message
from .intrinsic import Intrinsic
from ._util import call_intrinsic

_ANSWER_RELEVANCE_CORRECTION_METHODS = {
"Excessive unnecessary information": "removing the excessive information from the "
Expand All @@ -30,54 +27,6 @@
so it's important to stick to in-domain prompts."""


def _call_intrinsic(
intrinsic_name: str,
context: ChatContext,
backend: AdapterMixin,
/,
kwargs: dict | None = None,
):
"""Shared code for invoking intrinsics.

:returns: Result of the call in JSON format.
"""
# Adapter needs to be present in the backend before it can be invoked.
# We must create the Adapter object in order to determine whether we need to create
# the Adapter object.
base_model_name = backend.base_model_name
if base_model_name is None:
raise ValueError("Backend has no model ID")
adapter = IntrinsicAdapter(
intrinsic_name, adapter_type=AdapterType.LORA, base_model_name=base_model_name
)
if adapter.qualified_name not in backend.list_adapters():
backend.add_adapter(adapter)

# Create the AST node for the action we wish to perform.
intrinsic = Intrinsic(intrinsic_name, intrinsic_kwargs=kwargs)

# Execute the AST node.
model_output_thunk, _ = mfuncs.act(
intrinsic,
context,
backend,
model_options={ModelOption.TEMPERATURE: 0.0},
# No rejection sampling, please
strategy=None,
)

# act() can return a future. Don't know how to handle one from non-async code.
assert model_output_thunk.is_computed()

# Output of an Intrinsic action is the string representation of the output of the
# intrinsic. Parse the string.
result_str = model_output_thunk.value
if result_str is None:
raise ValueError("Model output is None.")
result_json = json.loads(result_str)
return result_json


def check_answerability(
question: str,
documents: collections.abc.Iterable[Document],
Expand All @@ -101,7 +50,7 @@ def check_answerability(
Returns:
Answerability score as a floating-point value from 0 to 1.
"""
result_json = _call_intrinsic(
result_json = call_intrinsic(
"answerability",
context.add(Message("user", question, documents=list(documents))),
backend,
Expand All @@ -126,7 +75,7 @@ def rewrite_question(
Returns:
Rewritten version of ``question``.
"""
result_json = _call_intrinsic(
result_json = call_intrinsic(
"query_rewrite", context.add(Message("user", question)), backend
)
return result_json["rewritten_question"]
Expand Down Expand Up @@ -155,7 +104,7 @@ def clarify_query(
Clarification question string (e.g., "Do you mean A or B?"), or
the string "CLEAR" if no clarification is needed.
"""
result_json = _call_intrinsic(
result_json = call_intrinsic(
"query_clarification",
context.add(Message("user", question, documents=list(documents))),
backend,
Expand Down Expand Up @@ -190,7 +139,7 @@ def find_citations(
``citation_end``, ``citation_text``. Begin and end offsets are character
offsets into their respective UTF-8 strings.
"""
result_json = _call_intrinsic(
result_json = call_intrinsic(
"citations",
context.add(Message("assistant", response, documents=list(documents))),
backend,
Expand All @@ -217,7 +166,7 @@ def check_context_relevance(
Returns:
Context relevance score as a floating-point value from 0 to 1.
"""
result_json = _call_intrinsic(
result_json = call_intrinsic(
"context_relevance",
context.add(Message("user", question)),
backend,
Expand Down Expand Up @@ -252,7 +201,7 @@ def flag_hallucinated_content(
``response_end``, ``response_text``, ``faithfulness_likelihood``,
``explanation``.
"""
result_json = _call_intrinsic(
result_json = call_intrinsic(
"hallucination_detection",
context.add(Message("assistant", response, documents=list(documents))),
backend,
Expand Down Expand Up @@ -289,7 +238,7 @@ def rewrite_answer_for_relevance(
# * answer_relevance_analysis
# * answer_relevance_category
# * answer_relevance_likelihood
result_json = _call_intrinsic(
result_json = call_intrinsic(
"answer_relevance_classifier",
context.add(Message("assistant", response, documents=list(documents))),
backend,
Expand All @@ -305,7 +254,7 @@ def rewrite_answer_for_relevance(
result_json["answer_relevance_category"]
]

result_json = _call_intrinsic(
result_json = call_intrinsic(
"answer_relevance_rewriter",
context.add(Message("assistant", response, documents=list(documents))),
backend,
Expand Down
Loading
Loading