In [3]:
!pip install -q -U google-genai

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
dataproc-jupyter-plugin 0.1.80 requires pydantic~=1.10.0, but you have pydantic 2.11.3 which is incompatible.
ydata-profiling 4.6.0 requires pydantic<2,>=1.8.1, but you have pydantic 2.11.3 which is incompatible.[0m[31m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [1]:
from google import genai
from google.genai import types
import json
import unittest
import pandas as pd
import datetime
from vertexai.generative_models import GenerativeModel
from vertexai.evaluation import (
    MetricPromptTemplateExamples,
    EvalTask
)

In [9]:
PROJECT_ID = "qwiklabs-gcp-02-780f38a9daf1"
LOCATION = "us-central1"
MODEL = "gemini-2.0-flash-001"
client = genai.Client(
    vertexai=True,
    project=PROJECT_ID,
    location=LOCATION,
)


def get_content_config():
#     Define content configuration
    return types.GenerateContentConfig(
    temperature = 1,
    top_p = 0.95,
    max_output_tokens = 8192,
    response_modalities = ["TEXT"],
    safety_settings = [types.SafetySetting(
      category="HARM_CATEGORY_HATE_SPEECH",
      threshold=types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE
    ),types.SafetySetting(
      category="HARM_CATEGORY_DANGEROUS_CONTENT",
      threshold=types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE
    ),types.SafetySetting(
      category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
      threshold=types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE
    ),types.SafetySetting(
      category="HARM_CATEGORY_HARASSMENT",
      threshold=types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE
    )])  


def classify_user_question(user_input):
#     Classify user question with Gemini
    contents = [
        types.Content(
            role="user",
            parts=[types.Part.from_text(text="""
            Classify the prompt question into one of the following categories: Employment, General Information, Emergency Services, Tax Related. 
            It must belong to one category. Return only the exact text of category.
            """),
                   types.Part.from_text(text=user_input)
                  ]
        )
    
    ]
    
    response = []
    for chunk in client.models.generate_content_stream(model = MODEL,contents = contents,config = get_content_config()):
        if chunk.text:
            response.append(chunk.text)
    response = "".join(response).replace("\n","")
    return response

def validate_classification(question, classification):
#     Validate classification is correct with Gemini
    prompt = f"""Does this question belong to the correct classification out of these categories?:
        Question: {question}
        Classification: {classification}
        Categories: Employment, General Information, Emergency Services, Tax Related
        
        Only return Yes or No
        """
    response = client.models.generate_content(contents=prompt, model=MODEL)
    
    return response.text.strip()


def generate_social_media_posts(user_input=""):
#     Generate social media post with Gemini
    prompt = "Generate social media posts for goverment annoucements. Make sure to include hashtags at the end. Keep it below 100 characters, and return only the post itself."
    if user_input =="":
        contents = [types.Content(role="user",parts=[types.Part.from_text(text=prompt)])]
    else:
        contents = [types.Content(role="user",parts=[types.Part.from_text(text=prompt),
                       types.Part.from_text(text=user_input)])]

    response = []
    for chunk in client.models.generate_content_stream(model = MODEL,contents = contents,config = get_content_config()):
        if chunk.text:
            response.append(chunk.text)
    response = "".join(response).replace("\n","")
    return response    


def validate_hashtags(response):
#     Validate hashtags exist in response
    prompt = f"""Does this response include hashtags?:
        Response: {response}
        
        Only return Yes or No
        """
    response = client.models.generate_content(contents=prompt, model=MODEL)
    
    return response.text.strip()

def get_eval_metrics(user_inputs):
#     Evaluate prompts with a model
    now = str(datetime.datetime.timestamp(datetime.datetime.now())).split(".")[0]
    experiment_name = f"eval-{now}"

    eval_dataset = pd.DataFrame({
        "prompt": user_inputs,
    })

    eval_task = EvalTask(
    dataset=eval_dataset,
    metrics=[MetricPromptTemplateExamples.Pointwise.GROUNDEDNESS,MetricPromptTemplateExamples.Pointwise.COHERENCE],
    experiment="eval"
    )

    model = GenerativeModel(MODEL)
    result = eval_task.evaluate(
        model=model,
        experiment_run_name=experiment_name
    )
    return result.metrics_table


In [3]:
class TestClassification(unittest.TestCase):
#     Test classification function

    def test_classifiction_belongs(self, user_input):
        response = classify_user_question(user_input)
        self.assertIn(response, ["Employment", "General Information", "Emergency Services", "Tax Related"])
        
    def test_employment(self, user_input):
        response = classify_user_question(user_input)
        self.assertEqual(response, "Employment")
        
    def test_general_information(self, user_input):
        response = classify_user_question(user_input)
        self.assertEqual(response, "General Information")
        
    def test_emergency_service(self, user_input):
        response = classify_user_question(user_input)
        self.assertEqual(response, "Emergency Services")

    def test_tax_related(self, user_input):
        response = classify_user_question(user_input)
        self.assertEqual(response, "Tax Related")

    def test_with_gemini(self, user_input):
        response = classify_user_question(user_input)
        correct = validate_classification(user_input, response)
        self.assertEqual(correct, "Yes")

In [4]:
class TestSocialMediaPost(unittest.TestCase):
#     Test Social Media Post creation function

    def test_under_100_characters(self, user_input):
        response = generate_social_media_posts(user_input)
        self.assertEqual(True, len(user_input)<=100)
        
    def test_hashtags(self, user_input):
        response = generate_social_media_posts(user_input)
        correct = validate_hashtags(response)
        self.assertEqual(correct, "Yes")


In [5]:
# Execute runs and test for classification
example_prompts = ["How many people work here?", "What's the address?", "There's fire.", "How can I get W-2?"]
TestClassification().test_employment("Who is the employer?")
TestClassification().test_general_information("What's the address?")
TestClassification().test_emergency_service("There's fire.")
TestClassification().test_tax_related("How can I get W-2?")
for user_input in example_prompts:
    TestClassification().test_with_gemini(user_input)

prompts = []
instruction = """
Classify the prompt question into one of the following categories: Employment, General Information, Emergency Services, Tax Related. 
It must belong to one category. Return only the exact text of category.
"""
for example_prompt in example_prompts:
    prompts.append(f"{instruction}. Prompt: {example_prompt}")
get_eval_metrics(prompts)

Associating projects/732525375974/locations/us-central1/metadataStores/default/contexts/eval-eval to Experiment: eval


Logging Eval Experiment metadata: {'model_name': 'publishers/google/models/gemini-2.0-flash-001'}
Generating a total of 4 responses from Gemini model gemini-2.0-flash-001.


100%|██████████| 4/4 [00:00<00:00, 11.21it/s]

All 4 responses are successfully generated from Gemini model gemini-2.0-flash-001.
Multithreaded Batch Inference took: 0.3649741539993556 seconds.
Computing metrics with a total of 8 Vertex Gen AI Evaluation Service API requests.



100%|██████████| 8/8 [00:07<00:00,  1.01it/s]

All 8 metric requests are successfully computed.
Evaluation Took:7.915447583000059 seconds





Unnamed: 0,prompt,response,groundedness/explanation,groundedness/score,coherence/explanation,coherence/score
0,\nClassify the prompt question into one of the...,General Information\n,"The classification is grounded in the prompt, ...",1.0,The response provides a clear and direct answe...,5.0
1,\nClassify the prompt question into one of the...,General Information\n,The response is fully grounded because it corr...,1.0,The response perfectly delivers the single-wor...,5.0
2,\nClassify the prompt question into one of the...,Emergency Services\n,The response provided is completely grounded i...,1.0,The response perfectly aligns with the prompt'...,5.0
3,\nClassify the prompt question into one of the...,Tax Related\n,The response 'Tax Related' is directly derived...,1.0,The response is completely coherent as it dire...,5.0


In [11]:
# Execute runs and test for social media post generation
example_prompts = ["There is a storm coming. Please stay inside until announced.", "School will be closed tomorrow for Good Friday.", "There is a forest fire in Califorina."]

for user_input in example_prompts:
    TestSocialMediaPost().test_under_100_characters(user_input)
    TestSocialMediaPost().test_hashtags(user_input)

prompts = []
instruction = "Generate social media posts for goverment annoucements. Make sure to include hashtags at the end. Keep it below 100 characters, and return only the post itself."
for example_prompt in example_prompts:
    prompts.append(f"{instruction}. Prompt: {example_prompt}")

get_eval_metrics(prompts)

Associating projects/732525375974/locations/us-central1/metadataStores/default/contexts/eval-eval-1745895376 to Experiment: eval


Logging Eval Experiment metadata: {'model_name': 'publishers/google/models/gemini-2.0-flash-001'}
Generating a total of 3 responses from Gemini model gemini-2.0-flash-001.


100%|██████████| 3/3 [00:00<00:00,  3.47it/s]

All 3 responses are successfully generated from Gemini model gemini-2.0-flash-001.
Multithreaded Batch Inference took: 0.872646337003971 seconds.
Computing metrics with a total of 6 Vertex Gen AI Evaluation Service API requests.



100%|██████████| 6/6 [00:06<00:00,  1.02s/it]

All 6 metric requests are successfully computed.
Evaluation Took:6.138234571000794 seconds





Unnamed: 0,prompt,response,groundedness/explanation,groundedness/score,coherence/explanation,coherence/score
0,Generate social media posts for goverment anno...,Stay safe! A storm is coming. Shelter indoors ...,All information in the response is directly ba...,1.0,The post follows a coherent flow from warning ...,5.0
1,Generate social media posts for goverment anno...,Schools closed tomorrow for Good Friday. Enjoy...,The response accurately reflects the informati...,1.0,"The response is clear, concise, and logically ...",5.0
2,Generate social media posts for goverment anno...,California forest fire alert! Stay safe and in...,The response is fully grounded because all inf...,1.0,"The generated post is perfectly coherent, pres...",5.0
