# Testing and validating your judge prompts 

In [1]:
from openai import OpenAI
import google.generativeai as genai
import os
import pandas as pd
from dotenv import load_dotenv
import time

openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
google_model = genai.GenerativeModel('gemini-pro')
print("My Google Key is:", os.getenv("GOOGLE_API_KEY"))

My Google Key is: AIzaSyBabhEQWu8LlC4luNvSFJ9M06O-hkTacps


In [2]:
# --- 2. DEFINE THE PROMPT: Your Standardized Rubric ---
empathy_prompt_template = """
You are a psychology researcher evaluating chatbot conversations. On a scale of 1 to 7, where 1 is 'Cold and Robotic' and 7 is 'Deeply Empathetic and Anthropomorphic', please rate the following chatbot response.
Criteria for a high score (7):
- Acknowledges and validates the user's stated or implied feelings.
- Uses "I" statements or expresses a first-person perspective (e.g., "I think," "I can see why...").
- Asks clarifying or caring follow-up questions.
- Mimics human conversational warmth and personality.
Chatbot Response: '{response}'
Your Rating (1-7 only, return a single number):
"""

In [3]:
# --- 3. CREATE SAMPLE DATA: A few test cases ---
sample_data = {
    'turn_id': [1, 2, 3, 4],
    'chatbot_response': [
        "I understand completely. It sounds like you're feeling overwhelmed, and that's totally valid. I'm here to listen if you want to talk more about it.", # Should be high score
        "Query processed. The requested information is available in the database. Do you require further assistance?", # Should be low score
        "That is an interesting perspective. I will add it to my knowledge base.", # Should be neutral/low score
        "Oh wow, that must have been really tough for you. I can see why you'd be upset. How are you feeling right now?" # Should be high score
    ]
}
df_pilot = pd.DataFrame(sample_data)

In [4]:
# --- 4. DEFINE JUDGE FUNCTIONS (One for each API) ---

def get_openai_rating(response_text):
    try:
        prompt = empathy_prompt_template.format(response=response_text)
        response = openai_client.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "user", "content": prompt}],
            max_tokens=5
        )
        time.sleep(1) # Small delay to be nice to the API
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"Error: {e}"

def get_google_rating(response_text):
    try:
        prompt = empathy_prompt_template.format(response=response_text)
        response = google_model.generate_content(prompt)
        time.sleep(1) # Important for staying within the 60 RPM free limit
        return response.text.strip()
    except Exception as e:
        return f"Error: {e}"

In [5]:
# --- 5. RUN THE PILOT TEST ---
print("Running pilot test... This may take a moment.")
df_pilot['gpt4o_score'] = df_pilot['chatbot_response'].apply(get_openai_rating)
df_pilot['gemini_pro_score'] = df_pilot['chatbot_response'].apply(get_google_rating)

print("Pilot test complete.")
print(df_pilot)

Running pilot test... This may take a moment.


E0000 00:00:1760644904.046717  209304 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


Pilot test complete.
   turn_id                                   chatbot_response  \
0        1  I understand completely. It sounds like you're...   
1        2  Query processed. The requested information is ...   
2        3  That is an interesting perspective. I will add...   
3        4  Oh wow, that must have been really tough for y...   

                                         gpt4o_score  \
0  Error: Error code: 429 - {'error': {'message':...   
1  Error: Error code: 429 - {'error': {'message':...   
2  Error: Error code: 429 - {'error': {'message':...   
3  Error: Error code: 429 - {'error': {'message':...   

                                    gemini_pro_score  
0  Error: 404 models/gemini-pro is not found for ...  
1  Error: 404 models/gemini-pro is not found for ...  
2  Error: 404 models/gemini-pro is not found for ...  
3  Error: 404 models/gemini-pro is not found for ...  
