In [1]:
%pip install --upgrade --quiet google-genai nest-asyncio==1.5.9 pytest

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.7/46.7 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m257.3/257.3 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m373.7/373.7 kB[0m [31m23.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import json
from inspect import cleandoc
from typing import Dict, Iterable, List, Mapping, Optional, Sequence

import pandas as pd
from IPython.display import Markdown

try:
    import vertexai
    from vertexai.generative_models import GenerativeModel, GenerationConfig
    from vertexai.evaluation import (
        MetricPromptTemplateExamples,
        EvalTask,
    )
except ImportError:  # pragma: no cover - handled in test harness
    vertexai = None  # type: ignore[assignment]
    GenerativeModel = None  # type: ignore[assignment]
    GenerationConfig = None  # type: ignore[assignment]
    MetricPromptTemplateExamples = None  # type: ignore[assignment]
    EvalTask = None  # type: ignore[assignment]

pd.set_option("display.max_colwidth", None)

  from google.cloud.aiplatform.utils import gcs_utils


In [None]:
CATEGORIES: Sequence[str] = (
    "Employment",
    "General Information",
    "Emergency Services",
    "Tax Related",
)


def _default_model(model_name: str = "gemini-2.5-flash-lite"):
    """Instantiate a default Gemini model with deterministic configuration."""
    if GenerativeModel is None or GenerationConfig is None:
        raise RuntimeError(
            "Google Vertex AI SDK is unavailable. Provide a configured `model` instance "
            "when calling these helper functions, or install `google-cloud-aiplatform`."
        )
    return GenerativeModel(model_name, generation_config=GenerationConfig(temperature=0))


def classify_question(
    question: str,
    model: Optional[object] = None,
) -> str:
    """Classify a user question into one of the predefined civic categories."""
    if not question or not question.strip():
        raise ValueError("Question must be a non-empty string.")

    prompt = f"""
        Classify the following user question into exactly one category.
        Valid categories are: Employment, General Information, Emergency Services,
        or Tax Related. Respond with a JSON object containing a single key
        "category" whose value is one of the valid categories.\n\n

        Question: {question.strip()}
    """

    model = model or _default_model()

    classification_response_schema = {
        "type": "OBJECT",
        "properties": {
            "category": {
                "type": "STRING"
            },
        }
    }

    response = model.generate_content(
        prompt,
        generation_config=GenerationConfig(
            response_mime_type="application/json",
            response_schema = classification_response_schema,
        ),
    )
    text = getattr(response, "text", "").strip()

    try:
        parsed = json.loads(text)
    except json.JSONDecodeError as exc:
        raise ValueError(f"Unable to parse classification response: {text}") from exc

    category = parsed.get("category")
    if category not in CATEGORIES:
        raise ValueError(f"Model returned unsupported category: {category!r}")
    return category


def generate_announcement_posts(
    topic: str,
    channels: Iterable[str],
    tone: str = "reassuring",
    key_details: Optional[Mapping[str, str]] = None,
    model: Optional[object] = None,
) -> Mapping[str, str]:
    """Generate social media posts tailored for specified communication channels."""
    channels_list: List[str] = [ch.strip() for ch in channels if ch and ch.strip()]
    if not channels_list:
        raise ValueError("At least one channel must be provided.")
    if not topic or not topic.strip():
        raise ValueError("Topic must be a non-empty string.")

    details_lines = ""
    if key_details:
        details_lines = "\n".join(f"- {label}: {value}" for label, value in key_details.items())

    prompt = (
        "You are a communications specialist for a municipal government. "
        "Create concise social media posts for each specified channel. "
        "Follow official tone guidelines: keep the language clear, accessible, "
        "and aligned with public service communication. "
        "Return your answer as JSON with each channel name as a key and the "
        "post text as the value.\n\n"
        f"Topic: {topic.strip()}\n"
        f"Tone: {tone.strip()}\n"
        f"Channels: {', '.join(channels_list)}\n"
    )

    if details_lines:
        prompt += f"Key details:\n{details_lines}\n"

    channel_obj = {}
    for chan in channels_list:
      channel_obj[chan] = {"type": "STRING"}

    announcement_response_schema = {
        "type": "OBJECT",
        "properties": channel_obj
    }

    model = model or _default_model()
    response = model.generate_content(
        prompt,
        generation_config=GenerationConfig(
            response_mime_type="application/json",
            response_schema = announcement_response_schema,
        ),
    )

    text = getattr(response, "text", "").strip()

    try:
        parsed = json.loads(text)
    except json.JSONDecodeError as exc:
        raise ValueError(f"Unable to parse announcement response: {text}") from exc

    missing = [ch for ch in channels_list if ch not in parsed]
    if missing:
        raise ValueError(f"Model response missing channels: {missing}")

    return {ch: parsed[ch] for ch in channels_list}



In [4]:
class DummyModel:
    """Lightweight stand-in for Gemini responses used in notebook assertions."""

    def __init__(self, response_text: str):
        self.response_text = response_text
        self.last_prompt = None

    def generate_content(self, prompt: str):
        self.last_prompt = prompt
        return type("Response", (), {"text": self.response_text})


def run_notebook_sanity_checks() -> str:
    """Execute minimal assertion-based checks for helper functions."""
    results = []

    # classify_question happy path
    classifier_model = DummyModel(json.dumps({"category": "Employment"}))
    category = classify_question("How do I apply for a government job?", model=classifier_model)
    assert category == "Employment"
    assert "How do I apply" in classifier_model.last_prompt
    results.append("classifier_ok")

    # classify_question invalid JSON should raise
    try:
        classify_question("Is there a tax form due?", model=DummyModel("not-json"))
    except ValueError:
        results.append("classifier_invalid_json_ok")
    else:  # pragma: no cover - defensive fallback
        raise AssertionError("classify_question did not raise on invalid JSON")

    # generate_announcement_posts happy path
    channels = ["Twitter", "Facebook"]
    payload = {ch: f"{ch} post content" for ch in channels}
    generator_model = DummyModel(json.dumps(payload))
    generated = generate_announcement_posts(
        topic="Winter storm warning",
        channels=channels,
        tone="urgent, calm",
        key_details={"Shelter Hotline": "555-0100"},
        model=generator_model,
    )
    assert generated == payload
    for ch in channels:
        assert ch in generator_model.last_prompt
    results.append("generator_ok")

    # generate_announcement_posts missing channel should raise
    try:
        generate_announcement_posts(
            topic="Heat advisory",
            channels=["Instagram", "TikTok"],
            model=DummyModel(json.dumps({"Instagram": "Stay safe"})),
        )
    except ValueError:
        results.append("generator_missing_channel_ok")
    else:  # pragma: no cover - defensive fallback
        raise AssertionError("generate_announcement_posts did not raise when channels missing")

    return "Notebook sanity checks passed: " + ", ".join(results)


run_notebook_sanity_checks()


'Notebook sanity checks passed: classifier_ok, classifier_invalid_json_ok, generator_ok, generator_missing_channel_ok'

In [5]:
if vertexai is not None:
    vertexai.init(location="us-central1")
else:  # pragma: no cover - environment without Vertex AI SDK
    print("vertexai SDK not installed; skipping initialization.")

In [6]:
sample_questions = [
    "How do I apply for a city maintenance job?",
    "Is the 311 hotline available for downed power lines?",
    "What documents do I need to submit my quarterly tax payment?",
    "Where can I find the latest community event calendar?",
]

coastal_flood_context = cleandoc(
    """
    Event: Coastal Flood Warning
    Location: Riverfront District and South Harbor
    Effective: April 18, 2025, 6:00 PM – April 20, 2025, 6:00 AM
    Risk Level: Moderate to high flooding along low-lying streets
    Required Actions:
      - Move vehicles to higher ground
      - Avoid driving through standing water
      - Monitor official channels for evacuation updates
    Resources:
      - Sandbag pickup at Public Works Yard (1220 Harbor Ave.)
      - Emergency shelter at Civic Center opens at 6:00 PM
      - Hotline: Dial 311 for non-emergencies; call 911 for life-threatening emergencies
    """
)

announcement_channels = ["Twitter", "Facebook", "SMS Alerts"]
announcement_details = {
    "Sandbag Pickup": "Public Works Yard, 7 AM – 7 PM",
    "Shelter": "Civic Center Gymnasium, opens 6 PM",
    "Hotline": "311",
}

In [9]:
classification_results = pd.DataFrame(
    {
        "question": sample_questions,
        "predicted_category": [classify_question(question) for question in sample_questions],
    }
)

classification_results
CATEGORIES

('Employment', 'General Information', 'Emergency Services', 'Tax Related')

In [12]:
baseline_posts = generate_announcement_posts(
    topic="Coastal flood warning for Riverfront District",
    channels=announcement_channels,
    tone="calm and informative",
    key_details=announcement_details,
)

baseline_posts



In [13]:
candidate_posts = generate_announcement_posts(
    topic="Coastal flood warning for Riverfront District",
    channels=announcement_channels,
    tone="urgent and action-oriented",
    key_details=announcement_details,
)

candidate_posts



In [14]:
def format_posts(posts: Dict[str, str]) -> str:
    return "\n\n".join(f"{channel}: {message}" for channel, message in posts.items())

formatted_baseline = format_posts(baseline_posts)
formatted_candidate = format_posts(candidate_posts)

Markdown(f"**Baseline messaging**\n\n{formatted_baseline}")

**Baseline messaging**

Twitter: Coastal flood warning for Riverfront District. Sandbags: Public Works Yard, 7 AM-7 PM. Shelter: Civic Center Gym, opens 6 PM. Questions? Call 311.

Facebook: The Riverfront District is under a coastal flood warning. Sandbags are available at the Public Works Yard from 7 AM to 7 PM. A shelter will open at the Civic Center Gymnasium at 6 PM. For more information, please call 311.

SMS Alerts: Riverfront Flood Warning. Sandbags: Public Works Yard. Shelter: Civic Center (opens 6 PM). Call 311 for info.

In [15]:
Markdown(f"**Candidate messaging**\n\n{formatted_candidate}")

**Candidate messaging**

Twitter: COASTAL FLOOD WARNING - Riverfront District! Sandbags: Public Works Yard (7 AM-7 PM). Shelter: Civic Center Gym opens 6 PM. Call 311 for info. Act now to stay safe!

Facebook: URGENT: Coastal Flood Warning issued for Riverfront District. Prepare for potential flooding. 

-Sandbag Pickup: Public Works Yard, open 7 AM – 7 PM
-Shelter: Civic Center Gymnasium, opens 6 PM
-For assistance and info, call 311.

Your safety is our priority. Please share this information.

SMS Alerts: Coastal Flood Warning - Riverfront. Sandbags: Public Works Yard. Shelter: Civic Center (6 PM). Call 311 for info.

In [16]:
evaluation_prompt = cleandoc(
    f"""
    <instructions>
    You are evaluating city government social media messaging. Choose the response that best informs residents about the event while maintaining clarity, accuracy, actionable guidance, and a calm civic tone.
    </instructions>
    <context>
    {coastal_flood_context}
    </context>
    """
)

evaluation_prompt



In [17]:
eval_records = {
    "prompt": evaluation_prompt,
    "baseline_model_response": formatted_baseline,
    "response": formatted_candidate,
}

eval_records



In [18]:
eval_dataset = pd.DataFrame(eval_records, index=[0])

eval_dataset

Unnamed: 0,prompt,baseline_model_response,response
0,"<instructions>\n You are evaluating city government social media messaging. Choose the response that best informs residents about the event while maintaining clarity, accuracy, actionable guidance, and a calm civic tone.\n </instructions>\n <context>\n Event: Coastal Flood Warning\nLocation: Riverfront District and South Harbor\nEffective: April 18, 2025, 6:00 PM – April 20, 2025, 6:00 AM\nRisk Level: Moderate to high flooding along low-lying streets\nRequired Actions:\n - Move vehicles to higher ground\n - Avoid driving through standing water\n - Monitor official channels for evacuation updates\nResources:\n - Sandbag pickup at Public Works Yard (1220 Harbor Ave.)\n - Emergency shelter at Civic Center opens at 6:00 PM\n - Hotline: Dial 311 for non-emergencies; call 911 for life-threatening emergencies\n </context>\n","Twitter: Coastal flood warning for Riverfront District. Sandbags: Public Works Yard, 7 AM-7 PM. Shelter: Civic Center Gym, opens 6 PM. Questions? Call 311.\n\nFacebook: The Riverfront District is under a coastal flood warning. Sandbags are available at the Public Works Yard from 7 AM to 7 PM. A shelter will open at the Civic Center Gymnasium at 6 PM. For more information, please call 311.\n\nSMS Alerts: Riverfront Flood Warning. Sandbags: Public Works Yard. Shelter: Civic Center (opens 6 PM). Call 311 for info.","Twitter: COASTAL FLOOD WARNING - Riverfront District! Sandbags: Public Works Yard (7 AM-7 PM). Shelter: Civic Center Gym opens 6 PM. Call 311 for info. Act now to stay safe!\n\nFacebook: URGENT: Coastal Flood Warning issued for Riverfront District. Prepare for potential flooding. \n\n-Sandbag Pickup: Public Works Yard, open 7 AM – 7 PM\n-Shelter: Civic Center Gymnasium, opens 6 PM\n-For assistance and info, call 311.\n\nYour safety is our priority. Please share this information.\n\nSMS Alerts: Coastal Flood Warning - Riverfront. Sandbags: Public Works Yard. Shelter: Civic Center (6 PM). Call 311 for info."


In [19]:
eval_task = EvalTask(
    dataset=eval_dataset,
    metrics=[MetricPromptTemplateExamples.Pairwise.QUESTION_ANSWERING_QUALITY],
    experiment="municipal-communications",
)

In [20]:
import datetime

run_ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
eval_result = eval_task.evaluate(
    experiment_run_name=f"gov-comms-{run_ts}"
)

INFO:vertexai.evaluation._evaluation:Computing metrics with a total of 1 Vertex Gen AI Evaluation Service API requests.
100%|██████████| 1/1 [00:26<00:00, 26.08s/it]
INFO:vertexai.evaluation._evaluation:All 1 metric requests are successfully computed.
INFO:vertexai.evaluation._evaluation:Evaluation Took:26.100216113999977 seconds


In [21]:
eval_result.summary_metrics

{'row_count': 1,
 'pairwise_question_answering_quality/candidate_model_win_rate': np.float64(0.0),
 'pairwise_question_answering_quality/baseline_model_win_rate': np.float64(1.0)}

In [22]:
eval_result.metrics_table

Unnamed: 0,prompt,baseline_model_response,response,pairwise_question_answering_quality/explanation,pairwise_question_answering_quality/pairwise_choice
0,"<instructions>\n You are evaluating city government social media messaging. Choose the response that best informs residents about the event while maintaining clarity, accuracy, actionable guidance, and a calm civic tone.\n </instructions>\n <context>\n Event: Coastal Flood Warning\nLocation: Riverfront District and South Harbor\nEffective: April 18, 2025, 6:00 PM – April 20, 2025, 6:00 AM\nRisk Level: Moderate to high flooding along low-lying streets\nRequired Actions:\n - Move vehicles to higher ground\n - Avoid driving through standing water\n - Monitor official channels for evacuation updates\nResources:\n - Sandbag pickup at Public Works Yard (1220 Harbor Ave.)\n - Emergency shelter at Civic Center opens at 6:00 PM\n - Hotline: Dial 311 for non-emergencies; call 911 for life-threatening emergencies\n </context>\n","Twitter: Coastal flood warning for Riverfront District. Sandbags: Public Works Yard, 7 AM-7 PM. Shelter: Civic Center Gym, opens 6 PM. Questions? Call 311.\n\nFacebook: The Riverfront District is under a coastal flood warning. Sandbags are available at the Public Works Yard from 7 AM to 7 PM. A shelter will open at the Civic Center Gymnasium at 6 PM. For more information, please call 311.\n\nSMS Alerts: Riverfront Flood Warning. Sandbags: Public Works Yard. Shelter: Civic Center (opens 6 PM). Call 311 for info.","Twitter: COASTAL FLOOD WARNING - Riverfront District! Sandbags: Public Works Yard (7 AM-7 PM). Shelter: Civic Center Gym opens 6 PM. Call 311 for info. Act now to stay safe!\n\nFacebook: URGENT: Coastal Flood Warning issued for Riverfront District. Prepare for potential flooding. \n\n-Sandbag Pickup: Public Works Yard, open 7 AM – 7 PM\n-Shelter: Civic Center Gymnasium, opens 6 PM\n-For assistance and info, call 311.\n\nYour safety is our priority. Please share this information.\n\nSMS Alerts: Coastal Flood Warning - Riverfront. Sandbags: Public Works Yard. Shelter: Civic Center (6 PM). Call 311 for info.","Both responses fail to adhere to the groundedness criterion by adding information not present in the context (sandbag pickup hours) and significantly lack completeness and actionable guidance by omitting key details (effective dates, South Harbor location, specific required actions like moving vehicles, avoiding standing water, and monitoring for evacuation). However, BASELINE response is slightly better as it maintains a consistently calmer civic tone across all platforms, whereas CANDIDATE response uses more urgent and less calm language such as 'COASTAL FLOOD WARNING!' and 'URGENT:'.",BASELINE


In [23]:
eval_result.metrics_table["pairwise_question_answering_quality/pairwise_choice"]

Unnamed: 0,pairwise_question_answering_quality/pairwise_choice
0,BASELINE


In [24]:
eval_result.metrics_table["pairwise_question_answering_quality/explanation"]

Unnamed: 0,pairwise_question_answering_quality/explanation
0,"Both responses fail to adhere to the groundedness criterion by adding information not present in the context (sandbag pickup hours) and significantly lack completeness and actionable guidance by omitting key details (effective dates, South Harbor location, specific required actions like moving vehicles, avoiding standing water, and monitoring for evacuation). However, BASELINE response is slightly better as it maintains a consistently calmer civic tone across all platforms, whereas CANDIDATE response uses more urgent and less calm language such as 'COASTAL FLOOD WARNING!' and 'URGENT:'."


In [25]:
!pytest -q


]9;4;3;\]9;4;0;\
[33m[33mno tests ran[0m[33m in 0.00s[0m[0m
