## Install required dependency and SDK

In [104]:
!pip install -q google-cloud-aiplatform pytest

## Create the environment for the notebook
Import Dependencies and Initialize Vertex AI

In [105]:
from vertexai.language_models import ChatModel, InputOutputTextPair
from vertexai.preview.language_models import TextGenerationModel
import vertexai

PROJECT_ID = "qwiklabs-gcp-04-d9bb68112d04"
LOCATION = "global"

vertexai.init(project=PROJECT_ID, location=LOCATION)

## Function 1: Classify the Topic

1. Created a function which will classify the message based on the following category
  1. Employment
  2. General Information
  3. Emergency Services
  4. Tax Related
  5. Non-Government Questions

In [106]:
%%writefile gov_functions.py
from vertexai.preview.generative_models import GenerativeModel
chat_model = GenerativeModel("gemini-2.0-flash-001")

def classify_government_message(question: str) -> str:
    prompt = f"""You are an AI assistant trained to classify questions related to **government services** into one of the following categories:

      - Employment
      - General Information
      - Emergency Services
      - Tax Related

      If the question is not about a government-related topic, respond with: **Non-Government questions**

      Respond **only with one of the following**: "Employment", "General Information", "Emergency Services", "Tax Related", or "Non-Government questions".

      Question: {question}"""

    response = chat_model.generate_content(prompt)
    return response.text.strip()

Overwriting gov_functions.py


# Test case for Classify the Topic -  Function 1

In [107]:
import pytest
from gov_functions import classify_government_message

def test_employment():
    response = classify_government_message("How can I apply for a job in the public sector?")
    assert response == "Employment"
    print("test_employment passed.")

def test_emergency_services():
    response = classify_government_message("There's a fire in my neighborhood. What do I do?")
    assert response == "Emergency Services"
    print("test_emergency_services passed.")

def test_tax_related():
    response = classify_government_message("When do I need to file my income taxes?")
    assert response == "Tax Related"
    print("test_tax_related passed.")

def test_general_information():
    response = classify_government_message("Where is my nearest DMV office?")
    assert response == "General Information"
    print("test_general_information passed.")

def test_non_government():
    response = classify_government_message("What’s the best laptop under $1000?")
    assert response == "Non-Government questions"
    print("test_non_government passed.")

test_employment()
test_emergency_services()
test_tax_related()
test_general_information()
test_non_government()


test_employment passed.
test_emergency_services passed.
test_tax_related passed.
test_general_information passed.
test_non_government passed.


## Function 2: Create a Government Post

1. generate_social_post - will generate a post based on the event specified.
2. evaluate_generated_post - will give score for the generated post and the reference provided(dataset) between 1-5.

In [108]:
%%writefile gov_posting_functions.py
from vertexai.preview.generative_models import GenerativeModel

# Initialize the Gemini model once
chat_model = GenerativeModel("gemini-2.0-flash-001")

def generate_social_post(event: str) -> str:
    prompt = f"""
    You are a helpful government assistant. Write a short, friendly, informative social media post (under 280 characters) about this announcement:
    Event: {event}
    """
    response = chat_model.generate_content(prompt)
    return response.text.strip()

def evaluate_generated_post(event: str, generated: str, reference: str) -> str:
    prompt = f"""
    You are an impartial evaluator.

    Event: {event}

    Generated Post:
    {generated}

    Reference Post:
    {reference}

    Rate the generated post on a scale of 1 to 5 for clarity, relevance, tone, and informativeness.
    Just return the number only.
    """
    response = chat_model.generate_content(prompt)
    return response.text.strip()


Overwriting gov_posting_functions.py


# Test case for Create a Government Post  -  Function 2

1. Here I have used Google Evaluation API to verify the LLM generated response score with the refernce one and how suitable the reference is.

In [109]:
import pytest
from gov_posting_functions import generate_social_post, evaluate_generated_post

def test_generate_social_post_with_evaluation():
    event = "City Hall will be closed on July 4th"
    reference_post = "Reminder: City Hall will be closed on July 4th in observance of Independence Day"

    generated_post = generate_social_post(event)
    print("Generated:", generated_post)

    score = evaluate_generated_post(event, generated_post, reference_post)
    print("LLM-Evaluated Score:", score)

    assert generated_post is not None
    assert isinstance(generated_post, str)
    assert len(generated_post) <= 280

    assert score.isdigit()
    assert 1 <= int(score) <= 5
    assert int(score) >= 4

def test_generate_social_post_weather_alert():
    event = "Severe thunderstorm expected in Dallas tomorrow evening"
    reference_post = "Severe thunderstorm alert: Dallas residents should stay indoors tomorrow evening."

    generated_post = generate_social_post(event)
    print("Generated:", generated_post)

    score = evaluate_generated_post(event, generated_post, reference_post)
    print("LLM-Evaluated Score:", score)

    assert generated_post is not None
    assert isinstance(generated_post, str)
    assert len(generated_post) <= 280
    assert score.isdigit()
    assert 1 <= int(score) <= 5
    assert int(score) >= 4


def test_evaluate_poor_quality_post():
    event = "Power outage scheduled for maintenance on Sunday"
    bad_post = "power. no lights. sunday."
    reference_post = "Scheduled power outage this Sunday for maintenance. Please plan accordingly."

    score = evaluate_generated_post(event, bad_post, reference_post)
    print("Bad Post:", bad_post)
    print("LLM-Evaluated Score:", score)

    assert score.isdigit()
    score_int = int(score)
    assert 1 <= score_int <= 5
    assert score_int < 4

test_generate_social_post_with_evaluation()
test_generate_social_post_weather_alert()
test_evaluate_poor_quality_post()


Generated: Heads up! 🇺🇸 City Hall will be closed on July 4th for Independence Day. Have a safe and happy holiday! ✨
LLM-Evaluated Score: 4
Generated: Heads up, Dallas! ⛈️ A severe thunderstorm is expected tomorrow evening. Stay safe, monitor weather updates, and have a plan in case of power outages! #DallasWeather #SevereWeather
LLM-Evaluated Score: 4
Bad Post: power. no lights. sunday.
LLM-Evaluated Score: 2


# Test case for Create a Government Post  -  Function 2

1. Here I have used Google Evaluation API to verify the LLM generated response score with the refernce one and how suitable the reference is from
https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/evaluation

In [110]:
import pytest
import vertexai
from vertexai.preview.generative_models import GenerativeModel
from vertexai.evaluation import EvalTask, PointwiseMetric, MetricPromptTemplateExamples
import pandas as pd

from gov_posting_functions import generate_social_post


LOCATION = "us-central1"
vertexai.init(project=PROJECT_ID, location=LOCATION)


chat_model = GenerativeModel("gemini-2.0-flash-001")

def run_vertex_evaluation(event, prediction, reference):
    df = pd.DataFrame([{
        "prompt": event,
        "reference": reference,
        "response": prediction
    }])

    eval_task = EvalTask(
        dataset=df,
        metrics=[
            MetricPromptTemplateExamples.Pointwise.FLUENCY,
            MetricPromptTemplateExamples.Pointwise.SUMMARIZATION_QUALITY,
            MetricPromptTemplateExamples.Pointwise.GROUNDEDNESS,
            MetricPromptTemplateExamples.Pointwise.VERBOSITY,
            MetricPromptTemplateExamples.Pointwise.INSTRUCTION_FOLLOWING,
            "rouge_1",
            "rouge_2",
            "rouge_l",
            "rouge_l_sum",
        ]
    )

    result = eval_task.evaluate(model=None)
    for key, value in result.summary_metrics.items():
      print(f"{key}: \t{value}")
    score = float(result.metrics_table.loc[0, "fluency/score"])
    print(f"Evaluation score for '{event}': {score}")
    return score

def test_generate_social_post_city_hall_closure():
    event = "City Hall will be closed on July 4th"
    reference = "Reminder: City Hall is closed on July 4th in observance of Independence Day 🇺🇸"
    prediction = generate_social_post(event)

    score = run_vertex_evaluation(event, prediction, reference)

    assert isinstance(prediction, str) and len(prediction) <= 280
    assert 1 <= score <= 5
    assert score >= 4.0


def test_generate_social_post_weather_alert():
    event = "Severe thunderstorm expected in Dallas tomorrow evening"
    reference = "Severe thunderstorm alert: Dallas residents should stay indoors tomorrow evening."
    prediction = generate_social_post(event)

    score = run_vertex_evaluation(event, prediction, reference)

    assert isinstance(prediction, str) and len(prediction) <= 280
    assert 1 <= score <= 5
    assert score >= 4.0


def test_evaluate_poor_quality_post():
    event = "Power outage scheduled for maintenance on Sunday"
    bad_post = "power. no lights. sunday."
    reference = "Scheduled power outage this Sunday for maintenance. Please plan accordingly."

    score = run_vertex_evaluation(event, bad_post, reference)

    assert 1 <= score <= 5
    assert score < 4.0


test_generate_social_post_city_hall_closure()
test_generate_social_post_weather_alert()
test_evaluate_poor_quality_post()


INFO:vertexai.evaluation._evaluation:Computing metrics with a total of 9 Vertex Gen AI Evaluation Service API requests.
100%|██████████| 9/9 [00:01<00:00,  8.61it/s]
INFO:vertexai.evaluation._evaluation:All 9 metric requests are successfully computed.
INFO:vertexai.evaluation._evaluation:Evaluation Took:1.060842547001812 seconds


row_count: 	1
fluency/mean: 	5.0
fluency/std: 	nan
summarization_quality/mean: 	4.0
summarization_quality/std: 	nan
groundedness/mean: 	1.0
groundedness/std: 	nan
verbosity/mean: 	0.0
verbosity/std: 	nan
instruction_following/mean: 	5.0
instruction_following/std: 	nan
rouge_1/mean: 	0.6111111
rouge_1/std: 	nan
rouge_2/mean: 	0.47058824
rouge_2/std: 	nan
rouge_l/mean: 	0.6111111
rouge_l/std: 	nan
rouge_l_sum/mean: 	0.6111111
rouge_l_sum/std: 	nan
Evaluation score for 'City Hall will be closed on July 4th': 5.0


INFO:vertexai.evaluation._evaluation:Computing metrics with a total of 9 Vertex Gen AI Evaluation Service API requests.
100%|██████████| 9/9 [00:01<00:00,  8.88it/s]
INFO:vertexai.evaluation._evaluation:All 9 metric requests are successfully computed.
INFO:vertexai.evaluation._evaluation:Evaluation Took:1.0269343810032296 seconds


INFO:vertexai.evaluation._evaluation:Computing metrics with a total of 9 Vertex Gen AI Evaluation Service API requests.


row_count: 	1
fluency/mean: 	5.0
fluency/std: 	nan
summarization_quality/mean: 	4.0
summarization_quality/std: 	nan
groundedness/mean: 	1.0
groundedness/std: 	nan
verbosity/mean: 	0.0
verbosity/std: 	nan
instruction_following/mean: 	5.0
instruction_following/std: 	nan
rouge_1/mean: 	0.375
rouge_1/std: 	nan
rouge_2/mean: 	0.13333334
rouge_2/std: 	nan
rouge_l/mean: 	0.25
rouge_l/std: 	nan
rouge_l_sum/mean: 	0.25
rouge_l_sum/std: 	nan
Evaluation score for 'Severe thunderstorm expected in Dallas tomorrow evening': 5.0


100%|██████████| 9/9 [00:01<00:00,  7.79it/s]
INFO:vertexai.evaluation._evaluation:All 9 metric requests are successfully computed.
INFO:vertexai.evaluation._evaluation:Evaluation Took:1.1756987430017034 seconds


row_count: 	1
fluency/mean: 	1.0
fluency/std: 	nan
summarization_quality/mean: 	4.0
summarization_quality/std: 	nan
groundedness/mean: 	1.0
groundedness/std: 	nan
verbosity/mean: 	-2.0
verbosity/std: 	nan
instruction_following/mean: 	1.0
instruction_following/std: 	nan
rouge_1/mean: 	0.2857143
rouge_1/std: 	nan
rouge_2/mean: 	0.0
rouge_2/std: 	nan
rouge_l/mean: 	0.2857143
rouge_l/std: 	nan
rouge_l_sum/mean: 	0.2857143
rouge_l_sum/std: 	nan
Evaluation score for 'Power outage scheduled for maintenance on Sunday': 1.0
