In [2]:
import unittest
from google import genai
from google.genai import types
import base64

In [4]:
#Gen AI Client
genai_client = genai.Client(
      vertexai=True,
      project="qwiklabs-gcp-00-171b5867e51b",
      location="global",
)

In [5]:
#Gen AI Model
model = "gemini-2.5-pro-preview-06-05"

In [22]:
#Gen AI Content generation function
def generate(system_prompt, user_input):
  contents = [
    types.Content(
      role="user",
      parts=[
        types.Part.from_text(text=user_input)
      ]
    ),
  ]

  generate_content_config = types.GenerateContentConfig(
    temperature = 0.7, # Control the content creativity
    top_p = 1, #control the probability of token selection
    max_output_tokens = 65535,
    system_instruction=[types.Part.from_text(text=system_prompt)],
    thinking_config=types.ThinkingConfig(
      thinking_budget=-1,
    ),
  )

  result = genai_client.models.generate_content(
    model=model,
    contents=contents,
    config=generate_content_config
  )
  return result.text

# Classification Unit Test

In [23]:
def classify_question(question):
  system_prompt = """
  You are a helpful AI Assistant. Your task is to classify user questions into one of the following categories:

  1. Employment
  2. General Information
  3. Emergency Services
  4. Tax Related

  If a specific category cannot be determined, assign the category: 'General Information'.

  Your response must include **only** the category name — no additional text.

  Example:
  Question: What is the ambulance contact number?
  Answer: Emergency Services
  """
  return generate(system_prompt, question)

In [24]:
class TestQuestionClassifier(unittest.TestCase):

    def test_employment_category(self):
        question = "How do I apply for a government job?"
        category = classify_question(question)
        self.assertEqual(category, "Employment")

    def test_emergency_services_category(self):
        question = "What is the ambulance contact number?"
        category = classify_question(question)
        self.assertEqual(category, "Emergency Services")

    def test_tax_related_category(self):
        question = "How do I file my income tax returns?"
        category = classify_question(question)
        self.assertEqual(category, "Tax Related")

    def test_general_info_category(self):
        question = "What are the office hours?"
        category = classify_question(question)
        self.assertEqual(category, "General Information")

    def test_default_to_general_information(self):
        question = "Can you help me with this?"
        category = classify_question(question)
        self.assertEqual(category, "General Information")

# Run in a notebook
unittest.main(argv=[''], verbosity=2, exit=False)


test_default_to_general_information (__main__.TestQuestionClassifier.test_default_to_general_information) ... ok
test_emergency_services_category (__main__.TestQuestionClassifier.test_emergency_services_category) ... ok
test_employment_category (__main__.TestQuestionClassifier.test_employment_category) ... ok
test_general_info_category (__main__.TestQuestionClassifier.test_general_info_category) ... ok
test_tax_related_category (__main__.TestQuestionClassifier.test_tax_related_category) ... ok

----------------------------------------------------------------------
Ran 5 tests in 16.744s

OK


<unittest.main.TestProgram at 0x7bbc329d6550>

# Social Media Post Generation Unit Test

In [34]:
post_system_prompt = """
  You are a communications assistant for a government agency. Your task is to draft clear, concise, and polite social media posts for official public announcements. These may include weather emergencies, public holidays, school closings, or other alerts affecting citizens.

  ### Instructions:
  1. Keep the message under **200 words**.
  2. Use a **calm, professional, and reassuring** tone — no slang or informal phrasing.
  3. Clearly state:
    - What the announcement is about
    - Who it affects (e.g., residents, students, commuters)
    - When it is in effect and for how long
  4. Include **basic safety tips or recommended actions**, if applicable.
  5. Use **simple, accessible language** — avoid technical or bureaucratic terms.
  6. End the post with **relevant hashtags** (e.g., #WeatherAlert, #SchoolClosure, #PublicNotice). Include 2–4 concise, meaningful hashtags that help categorize the message.
  7. Do not use emojis.
  8. Avoid adding contact numbers, links, or excessive formatting unless instructed.

  ### Output format:
  Only return the post text with hashtags at the end. Do not include any commentary, metadata, or instructions.
  """
def generate_post(news):
  return generate(post_system_prompt,)

In [27]:
print(generate_post("Due to flight accident, all schools & colleges in Delhi are closed"))

Public Announcement:

Due to an ongoing emergency response following a flight accident, all public and private schools and colleges in Delhi will be closed tomorrow.

This measure is to ensure the safety of students and staff and to allow emergency services to operate without obstruction. We ask all residents to please stay home if possible and avoid the vicinity of the incident to give first responders clear access.

We appreciate your cooperation during this time. Further updates will be provided as they become available.

#PublicNotice #Delhi #SchoolClosure


In [28]:
def evaluate_post(post):
  evaluation_prompt = """
  You are a communications quality reviewer. Your task is to evaluate whether a social media post for a government announcement meets all the required criteria.

  Criteria:
  1. The message is clear and easy to understand.
  2. The message is within 200 words.
  3. The tone is polite, professional, and reassuring.
  4. It clearly states what the announcement is about, who it affects, and when it applies.
  5. It includes 2 to 4 relevant hashtags placed at the end.
  6. The language is accessible, without jargon or overly complex terms.

  Respond with **only one word**:
  - **Yes** — if the post satisfies **all** of the above criteria.
  - **No** — if the post fails to satisfy **any** of the criteria.

  Do not provide any explanation or additional text.

  """
  return generate(evaluation_prompt, post)

In [29]:
import unittest

class TestPostEvaluation(unittest.TestCase):

    def test_valid_post(self):
        post = (
            "Due to heavy rainfall, all public schools will remain closed on Monday, July 10th. "
            "This applies to all grades in the city limits. Stay safe and avoid unnecessary travel. "
            "#SchoolClosure #WeatherAlert #StaySafe"
        )
        result = evaluate_post(post)
        self.assertEqual(result, "Yes")

    def test_missing_hashtags(self):
        post = (
            "Due to heavy rainfall, all public schools will remain closed on Monday, July 10th. "
            "This applies to all grades in the city limits. Stay safe and avoid unnecessary travel."
        )
        result = evaluate_post(post)
        self.assertEqual(result, "No")

    def test_unclear_message(self):
        post = (
            "Important change to facilities schedule, more info soon."
            " #Notice"
        )
        result = evaluate_post(post)
        self.assertEqual(result, "No")

    def test_informal_tone(self):
        post = (
            "OMG guys! Schools are out Monday due to rain ☔️ Stay dry!! #RainDay #SchoolClosed"
        )
        result = evaluate_post(post)
        self.assertEqual(result, "No")

    def test_too_many_hashtags(self):
        post = (
            "Public offices will be closed on January 26th in observance of Republic Day. "
            "We wish everyone a safe and respectful holiday. #RepublicDay #HolidayNotice #GovtClosed #StaySafe #Announcement"
        )
        result = evaluate_post(post)
        self.assertEqual(result, "No")

# Run tests in notebook
unittest.main(argv=[''], verbosity=2, exit=False)


test_informal_tone (__main__.TestPostEvaluation.test_informal_tone) ... ok
test_missing_hashtags (__main__.TestPostEvaluation.test_missing_hashtags) ... ok
test_too_many_hashtags (__main__.TestPostEvaluation.test_too_many_hashtags) ... ok
test_unclear_message (__main__.TestPostEvaluation.test_unclear_message) ... ok
test_valid_post (__main__.TestPostEvaluation.test_valid_post) ... ok
test_default_to_general_information (__main__.TestQuestionClassifier.test_default_to_general_information) ... ok
test_emergency_services_category (__main__.TestQuestionClassifier.test_emergency_services_category) ... ok
test_employment_category (__main__.TestQuestionClassifier.test_employment_category) ... ok
test_general_info_category (__main__.TestQuestionClassifier.test_general_info_category) ... ok
test_tax_related_category (__main__.TestQuestionClassifier.test_tax_related_category) ... ok

----------------------------------------------------------------------
Ran 10 tests in 56.682s

OK


<unittest.main.TestProgram at 0x7bbc31aa3b90>

# Google Evaluation API

In [31]:
import vertexai
from vertexai.evaluation import (
    EvalTask,
    PointwiseMetric,
    PairwiseMetric,
    PointwiseMetricPromptTemplate,
    PairwiseMetricPromptTemplate,
    MetricPromptTemplateExamples,
)
from vertexai.generative_models import (
    GenerativeModel,
    HarmCategory,
    HarmBlockThreshold,
)
import pandas as pd
import plotly.graph_objects as go
from IPython.display import HTML, Markdown, display
import datetime

vertexai.init(project="qwiklabs-gcp-00-171b5867e51b", location="us-central1")

In [32]:
posts = [
    {
        "prompt": "Flood warnings have been issued for low-lying areas in Mumbai from July 5 to July 7. Residents are advised to stay alert and avoid unnecessary travel.",
        "post": "Flood warnings in Mumbai from July 5-7. Please stay alert and avoid travel unless necessary. Stay safe everyone! #MumbaiFloods #StaySafe"
    },
    {
        "prompt": "City hospitals will offer free vaccination camps on August 1 and August 2. Citizens are encouraged to participate.",
        "post": "Free vaccines at city hospitals on Aug 1 & 2. Don’t miss it! Protect yourself and loved ones. #VaccinationDrive #HealthFirst"
    },
    {
        "prompt": "All markets in Hyderabad will be closed on September 10 due to the Ganesh Chaturthi festival.",
        "post": "Markets closed Sept 10 for Ganesh Chaturthi. Plan your shopping ahead. #GaneshChaturthi #Hyderabad"
    },
    {
        "prompt": "Severe air pollution levels are expected in Delhi over the next three days. People are advised to limit outdoor activities.",
        "post": "Air pollution bad in Delhi for next 3 days. Don’t go outside much. #DelhiPollution"
    },
    {
        "prompt": "Due to ongoing construction, Main Street in Pune will be closed from June 15 to June 20. Use alternative routes.",
        "post": "Main Street Pune closed June 15-20. Find other roads. #PuneTraffic"
    },
    {
        "prompt": "An outbreak of seasonal flu has been reported in several schools across Kolkata. Students with symptoms should stay home and seek medical advice.",
        "post": "Flu outbreak in Kolkata schools. Sick students must stay home and see a doctor. #KolkataFlu #HealthAlert"
    },
    {
        "prompt": "Power outage scheduled in Sector 5 and 6 of Chandigarh on July 18 from 9 AM to 5 PM for maintenance.",
        "post": "POWER OUTAGE IN SECTOR 5 & 6 ON JULY 18. PREPARE ACCORDINGLY!!! #PowerOutage #Chandigarh"
    },
    {
        "prompt": "The annual city marathon will be held on October 10. Roads around the city center will be closed from 6 AM to 12 PM.",
        "post": "City marathon on Oct 10. Road closures 6 AM-12 PM downtown. Plan alternate routes. #CityMarathon #RoadClosure"
    }
]


# Dataset Creation

In [36]:
eval_dataset = pd.DataFrame([
    {
        "instruction": post_system_prompt,
        "context": f"announcement: {item['prompt']}",
        "response": item["post"],
    } for item in posts
])

In [37]:
run_ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
eval_task = EvalTask(
    dataset=eval_dataset,
    metrics=[
        MetricPromptTemplateExamples.Pointwise.GROUNDEDNESS,
        MetricPromptTemplateExamples.Pointwise.VERBOSITY,
        MetricPromptTemplateExamples.Pointwise.INSTRUCTION_FOLLOWING,
        MetricPromptTemplateExamples.Pointwise.SAFETY
    ],
    experiment=f"social-media-post-{run_ts}"
)

# Evaluation

In [38]:
prompt_template = (
    "Instruction: {instruction}. Prompt: {context}. Post: {response}"
)
result = eval_task.evaluate(
      prompt_template=prompt_template,
      experiment_run_name=f"social-media-post-{run_ts}"
)
evaluation_results = []
evaluation_results.append(result)

INFO:vertexai.evaluation.eval_task:Logging Eval Experiment metadata: {'prompt_template': 'Instruction: {instruction}. Prompt: {context}. Post: {response}'}
INFO:vertexai.evaluation._evaluation:Assembling prompts from the `prompt_template`. The `prompt` column in the `EvalResult.metrics_table` has the assembled prompts used for model response generation.
INFO:vertexai.evaluation._evaluation:Computing metrics with a total of 32 Vertex Gen AI Evaluation Service API requests.
100%|██████████| 32/32 [00:05<00:00,  5.65it/s]
INFO:vertexai.evaluation._evaluation:All 32 metric requests are successfully computed.
INFO:vertexai.evaluation._evaluation:Evaluation Took:5.687203375000536 seconds


# Comparision

In [39]:
from vertexai.preview.evaluation import notebook_utils
notebook_utils.display_eval_result(eval_result=result)

### Summary Metrics

Unnamed: 0,row_count,groundedness/mean,groundedness/std,verbosity/mean,verbosity/std,instruction_following/mean,instruction_following/std,safety/mean,safety/std
0,8.0,1.0,0.0,-0.75,1.035098,4.25,1.38873,1.0,0.0


### Row-based Metrics

Unnamed: 0,instruction,context,response,prompt,groundedness/explanation,groundedness/score,verbosity/explanation,verbosity/score,instruction_following/explanation,instruction_following/score,safety/explanation,safety/score
0,\n You are a communications assistant for a g...,announcement: Flood warnings have been issued ...,Flood warnings in Mumbai from July 5-7. Please...,Instruction: \n You are a communications assi...,The response is fully grounded in the user pro...,1.0,"The response is perfectly concise, providing a...",0.0,The response followed all the instructions by ...,5.0,The response is safe because it does not conta...,1.0
1,\n You are a communications assistant for a g...,announcement: City hospitals will offer free v...,Free vaccines at city hospitals on Aug 1 & 2. ...,Instruction: \n You are a communications assi...,The response is fully grounded in the provided...,1.0,"The response is appropriately concise, providi...",0.0,The AI's response is consistent with the promp...,5.0,The response is safe as it does not contain an...,1.0
2,\n You are a communications assistant for a g...,announcement: All markets in Hyderabad will be...,Markets closed Sept 10 for Ganesh Chaturthi. P...,Instruction: \n You are a communications assi...,The response is completely grounded in the pro...,1.0,The response is perfectly concise and provides...,0.0,"The model completely follows all instructions,...",5.0,The response is safe because it does not conta...,1.0
3,\n You are a communications assistant for a g...,announcement: Severe air pollution levels are ...,Air pollution bad in Delhi for next 3 days. Do...,Instruction: \n You are a communications assi...,The response is fully grounded because all asp...,1.0,The response is too short and lacks crucial in...,-2.0,The response misses key requirements such as t...,2.0,"The response does not contain any hate speech,...",1.0
4,\n You are a communications assistant for a g...,"announcement: Due to ongoing construction, Mai...",Main Street Pune closed June 15-20. Find other...,Instruction: \n You are a communications assi...,The response is fully grounded in the provided...,1.0,The response is excessively brief as it does n...,-2.0,The response follows all the instructions: it ...,5.0,"The response does not contain any hate speech,...",1.0
5,\n You are a communications assistant for a g...,announcement: An outbreak of seasonal flu has ...,Flu outbreak in Kolkata schools. Sick students...,Instruction: \n You are a communications assi...,"The response is fully grounded in the prompt, ...",1.0,"The response is perfectly concise, providing a...",0.0,The response completely fulfills the instructi...,5.0,The response is safe because it does not conta...,1.0
6,\n You are a communications assistant for a g...,announcement: Power outage scheduled in Sector...,POWER OUTAGE IN SECTOR 5 & 6 ON JULY 18. PREPA...,Instruction: \n You are a communications assi...,The response is fully grounded in the user pro...,1.0,"The response is far too short, it does not men...",-2.0,The response does not adhere to most of the in...,2.0,"The response does not contain any hate speech,...",1.0
7,\n You are a communications assistant for a g...,announcement: The annual city marathon will be...,City marathon on Oct 10. Road closures 6 AM-12...,Instruction: \n You are a communications assi...,The response is fully grounded in the user pro...,1.0,The response is concise and provides all the n...,0.0,The response follows all instructions: staying...,5.0,The response is safe as it does not contain an...,1.0
