In [3]:
## Step 1: Install Dependencies

!pip install --upgrade --quiet "google-cloud-aiplatform"

In [5]:
## Step 2: Configuration & Initialization

import vertexai
from vertexai.generative_models import GenerativeModel
import re
import os
import unittest

PROJECT_ID = "qwiklabs-gcp-03-c72749959822"
LOCATION = "us-central1"

# Initialize Vertex AI
try:
    vertexai.init(project=PROJECT_ID, location=LOCATION)
    print(f"‚úÖ Vertex AI initialized for project: {PROJECT_ID}")
except Exception as e:
    print(f"Error initializing Vertex AI: {e}")

# Set an environment variable for our test class to read
os.environ["GCP_PROJECT_ID"] = PROJECT_ID

‚úÖ Vertex AI initialized for project: qwiklabs-gcp-03-c72749959822


In [6]:
## Step 3: Define Classification function - build prompt using Gemini as the classifier

def classify_question_with_gemini(user_question: str) -> str:
    """
    (Production) Classifies a user question into one of four categories.
    """
    categories = ["Employment", "General Information", "Emergency Services", "Tax Related"]
    prompt = f"""
    You are a text classification assistant. Your task is to classify the user's question
    into one of the following categories:
    - Employment
    - General Information
    - Emergency Services
    - Tax Related
    Respond with *only* the category name and nothing else.
    User Question:
    "{user_question}"
    Category:
    """
    try:
        model = GenerativeModel(model_name="gemini-2.5-flash")
        response = model.generate_content(prompt)
        category = response.text.strip()
        if category in categories:
            return category
        else:
            return "General Information" # Fallback
    except Exception as e:
        print(f"Error during classification: {e}")
        return "General Information"

In [7]:
## Step 4: Define Generate Social Media Post function with instructions to return creative text for posts

def generate_social_media_post(topic: str, platform: str) -> str:
    """
    (Production) Generates a government social media post.
    """
    constraints = "The post must be under 280 characters." if platform.lower() == "twitter" else "The post can be 2-3 short paragraphs."
    prompt = f"""
    You are a social media manager for a local government agency.
    Target Platform: {platform}
    Announcement Topic: "{topic}"
    Instructions:
    1.  Tone is authoritative, yet calm.
    2.  Use 1-3 relevant emojis.
    3.  Include 2-3 relevant hashtags.
    4.  Constraint: {constraints}
    5.  Respond with *only* the post content.
    Post:
    """
    try:
        model = GenerativeModel(model_name="gemini-2.5-flash")
        response = model.generate_content(prompt)
        post = response.text.strip()
        return re.sub(r'^["\']|["\']$', '', post)
    except Exception as e:
        print(f"Error during post generation: {e}")
        return "Error: Could not generate post."

In [8]:
## Step 5: Define the Unit Test class to build test cases

# Check if the project ID is set, otherwise skip the tests
PROJECT_ID_SET = "GCP_PROJECT_ID" in os.environ and os.environ["GCP_PROJECT_ID"] != "your-gcp-project-id"

@unittest.skipUnless(PROJECT_ID_SET, "GCP_PROJECT_ID not set in Cell 2. Skipping tests.")
class TestProductionFunctions(unittest.TestCase):

    def print_test_name(self):
        """Helper function to print the test name."""
        print(f"\n--- RUNNING: {self._testMethodName} ---")

    def test_classify_employment(self):
        self.print_test_name()
        result = classify_question_with_gemini("I need help finding a new job")
        self.assertEqual(result, "Employment")

    def test_classify_tax(self):
        self.print_test_name()
        result = classify_question_with_gemini("How do I file my 1040?")
        self.assertEqual(result, "Tax Related")

    def test_classify_emergency(self):
        self.print_test_name()
        result = classify_question_with_gemini("My house is on fire, please help!")
        self.assertEqual(result, "Emergency Services")

    def test_generate_post_twitter(self):
        self.print_test_name()
        topic = "All schools closed tomorrow due to heavy snow"
        post = generate_social_media_post(topic, "Twitter")
        print(f"Generated Post:\n{post}")

        self.assertIn("#", post)
        self.assertIn("school", post.lower())
        self.assertLessEqual(len(post), 300) # 280 + buffer

In [9]:
## Step 6: Run Unit Tests

# We use verbosity=2 to get detailed output for each test
if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False, verbosity=2)

test_classify_emergency (__main__.TestProductionFunctions.test_classify_emergency) ... 


--- RUNNING: test_classify_emergency ---


ok
test_classify_employment (__main__.TestProductionFunctions.test_classify_employment) ... 


--- RUNNING: test_classify_employment ---


ok
test_classify_tax (__main__.TestProductionFunctions.test_classify_tax) ... 


--- RUNNING: test_classify_tax ---


ok
test_generate_post_twitter (__main__.TestProductionFunctions.test_generate_post_twitter) ... 


--- RUNNING: test_generate_post_twitter ---


ok

----------------------------------------------------------------------
Ran 4 tests in 6.593s

OK


Generated Post:
PUBLIC ANNOUNCEMENT: All schools will be CLOSED tomorrow due to heavy snow. Please prioritize safety & stay warm. üå®Ô∏è‚ùÑÔ∏è #SchoolClosures #SnowDay #WinterWeather


In [10]:
## Step 7: Define Prompts to evaluate against using original prompts (V1) and new prompts (V2)

# --- V1 FUNCTIONS (Your Originals) ---

def classify_question_v1(user_question: str) -> str:
    # This is a copy of your function from Cell 3
    return classify_question_with_gemini(user_question)

def generate_post_v1(topic: str, platform: str) -> str:
    # This is a copy of your function from Cell 4
    return generate_social_media_post(topic, platform)

# --- V2 FUNCTIONS (New Prompts to Test) ---

def classify_question_v2(user_question: str) -> str:
    """V2: A shorter, more direct classification prompt."""
    categories = ["Employment", "General Information", "Emergency Services", "Tax Related"]

    prompt = f"""Classify: "{user_question}"
Categories: {', '.join(categories)}
Category:"""

    try:
        model = GenerativeModel(model_name="gemini-2.5-flash")
        response = model.generate_content(prompt)
        category = response.text.strip()
        if category in categories:
            return category
        else:
            return "General Information"
    except Exception:
        return "General Information"

def generate_post_v2(topic: str, platform: str) -> str:
    """V2: A more 'engaging' tone for social media."""
    constraints = "The post must be under 280 characters." if platform.lower() == "twitter" else "The post can be 2-3 short paragraphs."
    prompt = f"""
    You are a witty, engaging social media manager for a local government.
    Target Platform: {platform}
    Announcement Topic: "{topic}"
    Instructions:
    1.  Tone is engaging and clear.
    2.  Use emojis to make it friendly.
    3.  Include 2-3 relevant hashtags.
    4.  Constraint: {constraints}
    5.  Respond with *only* the post content.
    Post:
    """
    try:
        model = GenerativeModel(model_name="gemini-2.5-flash")
        response = model.generate_content(prompt)
        post = response.text.strip()
        return re.sub(r'^["\']|["\']$', '', post)
    except Exception:
        return "Error: Could not generate post."

print("‚úÖ V1 and V2 functions for evaluation are now defined.")

‚úÖ V1 and V2 functions for evaluation are now defined.


In [11]:
# Step 8: Run Classification A/B Test, running both V1 and V2 functions and then comparing the results.

print("--- üìä STARTING CLASSIFICATION A/B TEST ---")

# 1. Create your "golden dataset" for classification
classification_dataset = [
    {"user_question": "I need help finding a new job", "ground_truth": "Employment"},
    {"user_question": "How do I file my 1040?", "ground_truth": "Tax Related"},
    {"user_question": "My house is on fire!", "ground_truth": "Emergency Services"},
    {"user_question": "What's the capital of France?", "ground_truth": "General Information"},
]

v1_correct = 0
v2_correct = 0

# 2. Loop over the dataset and test each prompt
for item in classification_dataset:
    question = item["user_question"]
    expected = item["ground_truth"]

    print(f"\nTesting Question: '{question}' (Expected: {expected})")

    # Test V1
    v1_result = classify_question_v1(question)
    print(f"  V1 (Original) Result: {v1_result}", end="")
    if v1_result == expected:
        v1_correct += 1
        print(" (‚úÖ Correct)")
    else:
        print(" (‚ùå Incorrect)")

    # Test V2
    v2_result = classify_question_v2(question)
    print(f"  V2 (Short) Result:    {v2_result}", end="")
    if v2_result == expected:
        v2_correct += 1
        print(" (‚úÖ Correct)")
    else:
        print(" (‚ùå Incorrect)")

# 3. Print the final scores
print("\n--- üèÅ FINAL CLASSIFICATION SCORES ---")
print(f"V1 (Original) Score: {v1_correct} / {len(classification_dataset)}")
print(f"V2 (Short) Score:    {v2_correct} / {len(classification_dataset)}")

--- üìä STARTING CLASSIFICATION A/B TEST ---

Testing Question: 'I need help finding a new job' (Expected: Employment)
  V1 (Original) Result: Employment (‚úÖ Correct)
  V2 (Short) Result:    Employment (‚úÖ Correct)

Testing Question: 'How do I file my 1040?' (Expected: Tax Related)
  V1 (Original) Result: Tax Related (‚úÖ Correct)
  V2 (Short) Result:    Tax Related (‚úÖ Correct)

Testing Question: 'My house is on fire!' (Expected: Emergency Services)
  V1 (Original) Result: Emergency Services (‚úÖ Correct)
  V2 (Short) Result:    Emergency Services (‚úÖ Correct)

Testing Question: 'What's the capital of France?' (Expected: General Information)
  V1 (Original) Result: General Information (‚úÖ Correct)
  V2 (Short) Result:    General Information (‚úÖ Correct)

--- üèÅ FINAL CLASSIFICATION SCORES ---
V1 (Original) Score: 4 / 4
V2 (Short) Score:    4 / 4


In [12]:
## Step 9: Run Generation A/B test, showing each created post side by side with different tones.

print("--- üìä STARTING GENERATION A/B TEST ---")

# 1. Create your dataset for generation
generation_dataset = [
    {"topic": "All schools closed tomorrow due to heavy snow", "platform": "Twitter"},
    {"topic": "City Hall is closed Monday for the holiday", "platform": "Facebook"},
    {"topic": "Flash flood warning for downtown", "platform": "Twitter"},
]

# 2. Loop and print results side-by-side
for i, item in enumerate(generation_dataset):
    print(f"\n--- TEST {i+1}: {item['topic']} ({item['platform']}) ---")

    # Test V1
    print("\n  --- V1 (Authoritative) Post ---")
    v1_post = generate_post_v1(topic=item["topic"], platform=item["platform"])
    print(v1_post)

    # Test V2
    print("\n  --- V2 (Engaging) Post ---")
    v2_post = generate_post_v2(topic=item["topic"], platform=item["platform"])
    print(v2_post)
    print("---------------------------------")

print("\n--- üèÅ GENERATION TEST COMPLETE ---")

--- üìä STARTING GENERATION A/B TEST ---

--- TEST 1: All schools closed tomorrow due to heavy snow (Twitter) ---

  --- V1 (Authoritative) Post ---
ATTENTION: All schools will be closed tomorrow, [Date], due to heavy snow and hazardous travel conditions. Your safety is our priority. Stay warm! ‚ùÑÔ∏èüå®Ô∏è #SchoolClosures #SnowDay

  --- V2 (Engaging) Post ---
Snow much fun coming! ‚ùÑÔ∏èüå®Ô∏è All local schools will be CLOSED tomorrow due to heavy snow. Time for cozy blankets & snow day adventures! Stay safe & warm, everyone. üòä #SnowDay #SchoolClosures #CommunityAlert
---------------------------------

--- TEST 2: City Hall is closed Monday for the holiday (Facebook) ---

  --- V1 (Authoritative) Post ---
Please be advised that City Hall will be closed on Monday, [Insert Date of Monday Here], in observance of the upcoming holiday. We want to ensure all residents are aware of this temporary closure to plan accordingly. üèõÔ∏è

Normal business operations will resume promptly on 