In [1]:
# GraphUQ Demo - Bipartite Graph-based Uncertainty Quantification
# This notebook demonstrates the GraphUQScorer for claim-level uncertainty quantification

from uqlm.longform.black_box.graphuq import GraphUQScorer
from langchain_google_vertexai import ChatVertexAI
import logging

# Configure logging to see debug output
logging.basicConfig(level=logging.WARNING, format="%(name)s - %(levelname)s - %(message)s")
logging.getLogger("uqlm.longform.black_box.graphuq").setLevel(logging.DEBUG)

In [2]:
# Initialize the LLM and GraphUQScorer
judge_llm = ChatVertexAI(model="gemini-2.5-flash", temperature=0)
nli_llm = ChatVertexAI(model="gemini-2.5-flash", temperature=0, logprobs=True)

graphuq_scorer = GraphUQScorer(judge_llm=judge_llm, nli_llm=nli_llm)

uqlm.longform.black_box.graphuq - INFO - Initialized GraphUQScorer


In [None]:
# Define test data (2 queries)
responses = [
    [
        "The sky is blue. The grass is green.",
        "The sky is blue. The grass is red.",
        "The sky is blue.",
        "The grass is red. The ocean is pink.",
    ],
    [
        "She likes to play basketball and soccer.",
        "She likes to play basketball and tennis.",
        "She likes to play basketball and soccer.",
    ],
]

original_claim_set = [
    ["The sky is blue.", "The grass is green."],
    ["She likes to play basketball.", "She likes to play soccer."],
]

sampled_claim_sets = [
    [
        ["The ocean is pink.", "The grass is red."],
        ["The sky is blue.", "The grass is red."],
        ["The sky is blue."],
    ],
    [
        ["She likes to play basketball.", "She likes to play tennis."],
        ["She likes to play basketball.", "She likes to play soccer."],
    ],
]

entailment_score_sets = [{"the sky is blue.":[1,1,1,1,1,0,1,1,1,1],
"the ocean is pink.":[0,0,0,0,0,0,0,1,0,0]}]

In [4]:
# Run GraphUQ evaluation with probability-weighted edges (0.0 to 1.0)

result_prob = await graphuq_scorer.a_evaluate(
    responses,
    original_claim_set,
    sampled_claim_sets,
    use_entailment_prob=True,  # Use entailment probabilities as edge weights; requires nli model to return probs
    show_graph=True,
    # save_graph_path="graphuq_probability.html"
)

print("Probability mode results:")
for res in result_prob:
    print(res.model_dump_json(indent=4))

uqlm.longform.black_box.graphuq - DEBUG - Starting evaluation for 2 response sets.


TypeError: 'NoneType' object is not iterable