In [6]:
from vertexai import generative_models
from vertexai.generative_models import GenerativeModel
from google.cloud import aiplatform

In [11]:
# Step 1: 初始化 Vertex AI 项目与区域
aiplatform.init(
    project="vertex-ai-test-465220",
    location="us-central1"  # Gemini 支持区域
)
# Step 5: 初始化 Gemini 模型（开启工具支持）
generation_model  = GenerativeModel(
    model_name="gemini-2.5-pro",
)

In [18]:
# ========== 1. 准备问题与正确答案 ==========
questions = [
    "Who developed the Gemini model?",
    "What is the capital of Canada?",
    "Does Gemini support multi-modal input?",
    "What is the largest animal on Earth?",
    "Is Saturn the closest planet to the Sun?"
]

ground_truths = [
    "Gemini was developed by Google DeepMind.",
    "Ottawa is the capital of Canada.",
    "Yes, Gemini supports multi-modal input including text and images.",
    "The blue whale is the largest animal on Earth.",
    "No, Mercury is the closest planet to the Sun."
]


In [19]:
# ========== 2. 生成回答 ==========
generated_answers = []
for q in questions:
    prompt = f"{q} Please answer in one concise sentence."
    response = generation_model.generate_content(prompt)
    generated_answers.append(response.text.strip())

In [22]:
generated_answers

['The Gemini model was developed by Google DeepMind in collaboration with other teams across Google.',
 'The capital of Canada is Ottawa.',
 'Yes, Gemini is a natively multimodal model, capable of understanding and combining different types of information like text, images, audio, and video.',
 'The largest animal on Earth is the blue whale.',
 'No, Mercury is the closest planet to the Sun.']

In [20]:
# ========== 3. 使用模型判断是否幻觉 ==========
def check_consistency(generated: str, ground_truth: str) -> bool:
    """
    使用 Gemini 模型判断生成回答是否与标准答案一致。
    返回 True 表示一致（无幻觉），False 表示幻觉。
    """
    prompt = f"""You are a factual correctness evaluator.

Compare the following generated answer with the ground truth.

Generated Answer:
{generated}

Ground Truth:
{ground_truth}

Does the generated answer contradict or hallucinate compared to the ground truth? 
Answer "Yes" if it contains hallucination or contradiction, otherwise answer "No".

Your answer (Yes/No):"""

    judge_response = generation_model.generate_content(prompt)
    result = judge_response.text.strip().lower()
    return "no" in result

In [24]:
# ========== 4. 执行判定 + 统计幻觉 ==========
hallucinated = []

for i in range(len(questions)):
    is_consistent = check_consistency(generated_answers[i], ground_truths[i])
    if not is_consistent:
        hallucinated.append(i)
        print(f"\n❌ Hallucination in Q{i+1}: {questions[i]}")
        print(f"Generated: {generated_answers[i]}")
        print(f"Ground Truth: {ground_truths[i]}")
    else:
        print(f"\n✅ Consistent answer for Q{i+1}: {questions[i]}")

hallucinated


✅ Consistent answer for Q1: Who developed the Gemini model?

✅ Consistent answer for Q2: What is the capital of Canada?

❌ Hallucination in Q3: Does Gemini support multi-modal input?
Generated: Yes, Gemini is a natively multimodal model, capable of understanding and combining different types of information like text, images, audio, and video.
Ground Truth: Yes, Gemini supports multi-modal input including text and images.

✅ Consistent answer for Q4: What is the largest animal on Earth?

✅ Consistent answer for Q5: Is Saturn the closest planet to the Sun?


[2]

In [25]:
# ========== 5. 输出幻觉率 ==========
hallucination_rate = len(hallucinated) / len(questions)
print(f"\n📊 Hallucination Rate: {hallucination_rate:.2%} ({len(hallucinated)}/{len(questions)})")


📊 Hallucination Rate: 20.00% (1/5)
