In [81]:
import pandas as pd
from inspect import cleandoc
from IPython.display import display, Markdown

from google import genai
from google.genai import types
import os
import base64
import vertexai
from vertexai.generative_models import GenerativeModel, GenerationConfig
from vertexai.evaluation import (
    MetricPromptTemplateExamples,
    EvalTask,
    PairwiseMetric,
    PairwiseMetricPromptTemplate,
    PointwiseMetric,
    PointwiseMetricPromptTemplate,
)

pd.set_option("display.max_colwidth", None)

In [82]:
PROJECT_ID = "qwiklabs-gcp-00-a22ea8041afb"
LOCATION = "us-central1"

vertexai.init(project=PROJECT_ID, location=LOCATION)

In [83]:
import os
def classify_question(prompt):
    client = genai.Client(
        vertexai=True,
        api_key=os.environ.get("GOOGLE_CLOUD_API_KEY"),
    )
    si_text1 = "Classify the following question into one of the following categories: Employment, General Information, Emergency Services, or Tax Related"

    model = "gemini-3-pro-preview"
    contents = [
        types.Content(
            role="user",
            parts=[types.Part.from_text(text=prompt)]
        ),
    ]

    tools = [types.Tool(google_search=types.GoogleSearch())]

    generate_content_config = types.GenerateContentConfig(
        temperature=0.2,
        top_p=0.95,
        max_output_tokens=1000,
        safety_settings=[
            types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="BLOCK_ONLY_HIGH"),
            types.SafetySetting(category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="BLOCK_ONLY_HIGH"),
            types.SafetySetting(category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="BLOCK_ONLY_HIGH"),
            types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="BLOCK_ONLY_HIGH")
        ],
        tools=tools,
        system_instruction=[types.Part.from_text(text=si_text1)],
        thinking_config=types.ThinkingConfig(thinking_level="HIGH"),
    )

    for chunk in client.models.generate_content_stream(
      model = model,
      contents = contents,
      config = generate_content_config,
      ):
      if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
        continue
    print(chunk.text, end="")

@pytest.fixture
def mock_env_key(monkeypatch):
    monkeypatch.setenv("GOOGLE_CLOUD_API_KEY", "fake-key")


@pytest.fixture
def mock_client():
    """
    Creates a mock genai.Client() instance with a mocked generate_content_stream().
    """
    client_mock = MagicMock()
    return client_mock


@pytest.fixture
def mock_genai_client(mock_client):
    """
    Patches genai.Client to return our mocked client.
    """
    with patch("your_module.genai.Client", return_value=mock_client):
        yield mock_client


@pytest.fixture
def mock_types():
    """
    Patch types.Part, types.Content, etc. since we only need them to run without errors.
    """
    fake_types = MagicMock()
    fake_types.Part.from_text.return_value = MagicMock()
    fake_types.Content.return_value = MagicMock()
    fake_types.Tool.return_value = MagicMock()
    fake_types.GoogleSearch.return_value = MagicMock()
    fake_types.SafetySetting.return_value = MagicMock()
    fake_types.GenerateContentConfig.return_value = MagicMock()
    fake_types.ThinkingConfig.return_value = MagicMock()
    with patch("your_module.types", fake_types):
        yield fake_types

pytest.main(["-q"])


../usr/local/lib/python3.12/dist-packages/_pytest/config/__init__.py:1290
    self._mark_plugins_for_rewrite(hook, disable_autoload)



<ExitCode.NO_TESTS_COLLECTED: 5>

In [84]:
classify_question("How do I go about applying for unemployment?")

Employment

In [85]:
classify_question("Where can I file my W2?")

Tax Related

In [86]:
classify_question("How do planes generate lift?")

General Information

In [87]:
llm_pro = GenerativeModel(
    'gemini-2.5-pro',
    generation_config={
        "temperature": 0,
    },
)
llm_flash = GenerativeModel(
    'gemini-2.0-flash-001',
    generation_config={
        "temperature": 0,
    },
)

In [88]:
prompt_template = cleandoc("""
  <instructions>
  You are a popular newletter running a social media account posting about current happenings.
  </instructions>
<task>
  Please create a random post about government announcements like weather emergencies, holidays, school closings, etc
  </task>
  """)

In [89]:
response_pro = llm_pro.generate_content(prompt_template)
display(Markdown(response_pro.text))

Of course! Here are a few options for a random social media post, each with a slightly different tone, fitting for a popular newsletter's account.

---

### Option 1: The Winter Storm Warning Post

**(Image: A graphic with a large snowflake icon and bold text: "WEATHER ALERT: WINTER STORM WATCH")**

**Post Text:**

‚ùÑÔ∏è **STORM WATCH ISSUED** ‚ùÑÔ∏è

Heads up, everyone! The National Weather Service has issued a Winter Storm Watch for our area, effective from late tomorrow night through Thursday morning.

Local officials are advising residents to prepare for potentially hazardous travel conditions, with significant snowfall and gusty winds expected.

**Here's the latest from government announcements:**
*   **School Closings:** Several major school districts, including [City/County Name] Public Schools, have already announced they will be **CLOSED** on Thursday. Keep an eye on your district's official channels for updates.
*   **Travel Advisory:** A state of emergency may be declared, which would restrict non-essential travel. For now, plan to stay off the roads if you can.
*   **City Services:** Trash and recycling pickup may be delayed.

Stay warm, stay safe, and check on your neighbors! We'll keep you updated as more information comes in. #WeatherAlert #SnowDay #WinterStorm #SchoolClosing #[YourCity]News #StaySafe

---

### Option 2: The Public Holiday Reminder Post

**(Image: A simple, clean graphic with a calendar icon circled on a Monday and text: "HOLIDAY REMINDER")**

**Post Text:**

Friendly reminder for your week ahead! üóìÔ∏è

This coming Monday is **[Holiday Name, e.g., Presidents' Day]**, a federal holiday.

Here‚Äôs what that means for you based on official announcements:
*   üèõÔ∏è All federal, state, and local government offices will be **CLOSED**.
*   ‚úâÔ∏è There will be **NO** mail delivery from the USPS.
*   üè´ Public schools will be **CLOSED**.
*   üóëÔ∏è Trash and recycling collection will operate on a holiday schedule. Check your local sanitation department's website for details!

Enjoy the long weekend if you have it! Tag a friend who always forgets about the trash schedule. üòâ #PublicServiceAnnouncement #HolidayWeekend #[HolidayName] #DayOff #CityLife #[YourNewsletterName]

---

### Option 3: The Heat Wave / Emergency Services Post

**(Image: A bright graphic with a sun and thermometer icon and text: "EXCESSIVE HEAT WARNING")**

**Post Text:**

‚òÄÔ∏è **EXTREME HEAT ADVISORY** ‚òÄÔ∏è

It's going to be a scorcher! City officials have issued an Excessive Heat Warning from Tuesday through Friday, with temperatures expected to feel like 105¬∞+ F.

**Here's the critical info from the Mayor's Office:**
*   **Cooling Centers:** The city has opened public cooling centers. Locations and hours are available at the link in our bio. Please share this with anyone who may need it!
*   **Conserve Energy:** Residents are being asked to conserve electricity, especially during peak hours (2 PM - 7 PM), to prevent strain on the power grid.
*   **Safety First:** Stay hydrated, avoid strenuous outdoor activity, and **NEVER** leave children or pets in a vehicle.

Let's look out for one another. Check on elderly family members and neighbors. Stay cool and stay safe out there! #HeatWave #PublicHealth #CoolingCenter #SummerSafety #Community #PSA #[YourCity]

In [46]:
import pytest
from unittest.mock import MagicMock, patch
from textwrap import dedent
from vertexai.generative_models import GenerativeModel


@pytest.fixture
def prompt_template():
    return dedent("""
      <instructions>
      You are a popular newletter running a social media account posting about current happenings.
      </instructions>
    <task>
      Please create a random post about government announcements like weather emergencies, holidays, school closings, etc
      </task>
      """)


@patch.object(GenerativeModel, "generate_content")
def test_generate_content_called(mock_generate, prompt_template):
    """Verify generate_content is called with the right prompt."""

    # Mock return value
    mock_generate.return_value = MagicMock(
        text="Mocked generated social media post."
    )

    model = GenerativeModel(
        'gemini-2.5-pro',
        generation_config={"temperature": 0},
    )

    response = model.generate_content(prompt_template)

    # Assertions
    mock_generate.assert_called_once_with(prompt_template)
    assert response.text == "Mocked generated social media post."


@patch.object(GenerativeModel, "generate_content")
def test_generate_content_output_format(mock_generate, prompt_template):
    """Validate that the generated content has a string output."""

    mock_generate.return_value = MagicMock(
        text="Test output"
    )

    model = GenerativeModel(
        'gemini-2.5-pro',
        generation_config={"temperature": 0},
    )

    response = model.generate_content(prompt_template)

    assert isinstance(response.text, str)
    assert len(response.text) > 0

pytest.main(["-q"])


../usr/local/lib/python3.12/dist-packages/_pytest/config/__init__.py:1290
    self._mark_plugins_for_rewrite(hook, disable_autoload)



<ExitCode.NO_TESTS_COLLECTED: 5>

In [90]:
response_flash = llm_flash.generate_content(prompt_template)
display(Markdown(response_flash.text))

Heads up, folks! üö® The National Weather Service has issued a severe thunderstorm WATCH for our area until 8 PM tonight. Stay indoors if possible, secure loose objects, and keep an eye on the sky! We'll keep you updated. #WeatherAlert #SevereWeather #StaySafe


In [91]:
eval_data = pd.DataFrame({
    'prompt': [prompt_template],
    'baseline_model_response': [response_pro.text],
    'response': [response_flash.text]
})

In [94]:
eval_task = EvalTask(
    dataset=eval_data,
    metrics=[
        MetricPromptTemplateExamples.Pairwise.VERBOSITY,
        MetricPromptTemplateExamples.Pairwise.COHERENCE
    ],
    experiment='social-media-proficiency'
)

In [95]:
eval_result = eval_task.evaluate()

INFO:vertexai.evaluation._evaluation:Computing metrics with a total of 2 Vertex Gen AI Evaluation Service API requests.
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:18<00:00,  9.47s/it]
INFO:vertexai.evaluation._evaluation:All 2 metric requests are successfully computed.
INFO:vertexai.evaluation._evaluation:Evaluation Took:18.947888450999017 seconds


In [96]:
display(eval_result.summary_metrics)

{'row_count': 1,
 'pairwise_verbosity/candidate_model_win_rate': np.float64(1.0),
 'pairwise_verbosity/baseline_model_win_rate': np.float64(0.0),
 'pairwise_coherence/candidate_model_win_rate': np.float64(0.0),
 'pairwise_coherence/baseline_model_win_rate': np.float64(1.0)}

In [97]:
display(eval_result.metrics_table)

Unnamed: 0,prompt,baseline_model_response,response,pairwise_verbosity/explanation,pairwise_verbosity/pairwise_choice,pairwise_coherence/explanation,pairwise_coherence/pairwise_choice
0,"<instructions>\n You are a popular newletter running a social media account posting about current happenings.\n </instructions>\n<task>\n Please create a random post about government announcements like weather emergencies, holidays, school closings, etc\n </task>\n","Of course! Here are a few options for a random social media post, each with a slightly different tone, fitting for a popular newsletter's account.\n\n---\n\n### Option 1: The Winter Storm Warning Post\n\n**(Image: A graphic with a large snowflake icon and bold text: ""WEATHER ALERT: WINTER STORM WATCH"")**\n\n**Post Text:**\n\n‚ùÑÔ∏è **STORM WATCH ISSUED** ‚ùÑÔ∏è\n\nHeads up, everyone! The National Weather Service has issued a Winter Storm Watch for our area, effective from late tomorrow night through Thursday morning.\n\nLocal officials are advising residents to prepare for potentially hazardous travel conditions, with significant snowfall and gusty winds expected.\n\n**Here's the latest from government announcements:**\n* **School Closings:** Several major school districts, including [City/County Name] Public Schools, have already announced they will be **CLOSED** on Thursday. Keep an eye on your district's official channels for updates.\n* **Travel Advisory:** A state of emergency may be declared, which would restrict non-essential travel. For now, plan to stay off the roads if you can.\n* **City Services:** Trash and recycling pickup may be delayed.\n\nStay warm, stay safe, and check on your neighbors! We'll keep you updated as more information comes in. #WeatherAlert #SnowDay #WinterStorm #SchoolClosing #[YourCity]News #StaySafe\n\n---\n\n### Option 2: The Public Holiday Reminder Post\n\n**(Image: A simple, clean graphic with a calendar icon circled on a Monday and text: ""HOLIDAY REMINDER"")**\n\n**Post Text:**\n\nFriendly reminder for your week ahead! üóìÔ∏è\n\nThis coming Monday is **[Holiday Name, e.g., Presidents' Day]**, a federal holiday.\n\nHere‚Äôs what that means for you based on official announcements:\n* üèõÔ∏è All federal, state, and local government offices will be **CLOSED**.\n* ‚úâÔ∏è There will be **NO** mail delivery from the USPS.\n* üè´ Public schools will be **CLOSED**.\n* üóëÔ∏è Trash and recycling collection will operate on a holiday schedule. Check your local sanitation department's website for details!\n\nEnjoy the long weekend if you have it! Tag a friend who always forgets about the trash schedule. üòâ #PublicServiceAnnouncement #HolidayWeekend #[HolidayName] #DayOff #CityLife #[YourNewsletterName]\n\n---\n\n### Option 3: The Heat Wave / Emergency Services Post\n\n**(Image: A bright graphic with a sun and thermometer icon and text: ""EXCESSIVE HEAT WARNING"")**\n\n**Post Text:**\n\n‚òÄÔ∏è **EXTREME HEAT ADVISORY** ‚òÄÔ∏è\n\nIt's going to be a scorcher! City officials have issued an Excessive Heat Warning from Tuesday through Friday, with temperatures expected to feel like 105¬∞+ F.\n\n**Here's the critical info from the Mayor's Office:**\n* **Cooling Centers:** The city has opened public cooling centers. Locations and hours are available at the link in our bio. Please share this with anyone who may need it!\n* **Conserve Energy:** Residents are being asked to conserve electricity, especially during peak hours (2 PM - 7 PM), to prevent strain on the power grid.\n* **Safety First:** Stay hydrated, avoid strenuous outdoor activity, and **NEVER** leave children or pets in a vehicle.\n\nLet's look out for one another. Check on elderly family members and neighbors. Stay cool and stay safe out there! #HeatWave #PublicHealth #CoolingCenter #SummerSafety #Community #PSA #[YourCity]","Heads up, folks! üö® The National Weather Service has issued a severe thunderstorm WATCH for our area until 8 PM tonight. Stay indoors if possible, secure loose objects, and keep an eye on the sky! We'll keep you updated. #WeatherAlert #SevereWeather #StaySafe\n","CANDIDATE response is more appropriately concise as it provides a single, random post as requested. BASELINE response provides three distinct options, making it more verbose than necessary for the singular request to 'create a random post', even though each individual option is well-structured and concise.",CANDIDATE,"BASELINE response is significantly better as it provides multiple well-structured, coherent, and detailed options that fully address the prompt's requirements for different types of government announcements from the perspective of a popular newsletter. Its use of clear headings, bullet points, and specific information for each scenario demonstrates superior organization and logical flow, making it highly coherent. CANDIDATE response, while coherent for its brevity, only offers one generic example and lacks the depth and variety of BASELINE response, thus falling short of the prompt's implied scope for a 'popular newsletter' and 'random post' covering various announcements.",BASELINE


In [99]:
display(eval_result.metrics_table['pairwise_verbosity/explanation'])

Unnamed: 0,pairwise_verbosity/explanation
0,"CANDIDATE response is more appropriately concise as it provides a single, random post as requested. BASELINE response provides three distinct options, making it more verbose than necessary for the singular request to 'create a random post', even though each individual option is well-structured and concise."


In [100]:
display(eval_result.metrics_table['pairwise_coherence/explanation'])

Unnamed: 0,pairwise_coherence/explanation
0,"BASELINE response is significantly better as it provides multiple well-structured, coherent, and detailed options that fully address the prompt's requirements for different types of government announcements from the perspective of a popular newsletter. Its use of clear headings, bullet points, and specific information for each scenario demonstrates superior organization and logical flow, making it highly coherent. CANDIDATE response, while coherent for its brevity, only offers one generic example and lacks the depth and variety of BASELINE response, thus falling short of the prompt's implied scope for a 'popular newsletter' and 'random post' covering various announcements."
