In [2]:
import os
import json
from pydantic import BaseModel, Field
from typing import List, Union

# -----------------
# 1. SETUP
# -----------------

# Set your API Key (Assuming it's configured as an environment variable or using userdata)
import google.generativeai as genai
from google.colab import userdata
GOOGLE_API_KEY = userdata.get('GEMINI_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

# Define the models (Simulated, as we're not running the full API loop)
JUDGE_MODEL = 'gemini-3-pro'
STUDENT_MODEL = 'gemini-2.5-flash-lite'

# -----------------
# 2. DEFINITION: TASK & EVALUATION CRITERIA
# -----------------

class Product(BaseModel):
    """A single product to be extracted from the HTML."""
    product_id: str = Field(..., description="The unique identifier from the HTML's 'id' attribute.")
    product_name: str = Field(..., description="The full, descriptive name of the product.")
    price_usd: float = Field(..., description="The price of the product as a clean numerical float. MUST strip any currency symbols (like $ or USD).")
    is_in_stock: bool = Field(..., description="True if the stock status text is 'In Stock', otherwise False.")

class ProductList(BaseModel):
    """The root object containing a list of all extracted products."""
    products: List[Product]

# Evaluation Criteria:
# 1. Schema Compliance: Output must be valid JSON and successfully parsed into the ProductList Pydantic object. (Strict Pass/Fail)
# 2. Accuracy: Extracted values (product_name, price_usd, is_in_stock) must be correct.

In [3]:
# -----------------
# 3. INITIAL SYSTEM PROMPT (VERSION 1)
# -----------------

INITIAL_SYSTEM_PROMPT = (
    "You are a basic JSON data extraction tool. Extract product information "
    "from the provided HTML snippet. Return the result as a single JSON object "
    "that strictly conforms to the provided schema. Do not output any extra "
    "text, markdown wrappers (like ```json), or explanations. "
)

# Test Case (A common scenario where price formatting is inconsistent)
TEST_HTML_SNIPPET = """
<div class="product-grid">
  <div id="P-1001" class="item-card">
    <h3 class="name">Smartwatch X-Series</h3>
    <p class="price">$199.99</p>
    <p class="status">In Stock</p>
  </div>
  <div id="P-1002" class="item-card">
    <h3 class="name">Mini Bluetooth Speaker</h3>
    <p class="price">45.00 USD</p>
    <p class="status">Out of Stock</p>
  </div>
</div>
"""

print(f"--- Student Model ({STUDENT_MODEL}) Run 1 ---")
# Simulated Student Model Output (Common initial failure mode: non-compliant data type)
LITE_MODEL_OUTPUT_V1 = """
{
  "products": [
    {
      "product_id": "P-1001",
      "product_name": "Smartwatch X-Series",
      "price_usd": "$199.99",
      "is_in_stock": true
    },
    {
      "product_id": "P-1002",
      "product_name": "Mini Bluetooth Speaker",
      "price_usd": "45.00 USD",
      "is_in_stock": false
    }
  ]
}
"""

print(f"**Initial System Prompt:**\n{INITIAL_SYSTEM_PROMPT}")
# print(f"**HTML Input:**\n{TEST_HTML_SNIPPET}")
print(f"**Lite Model Output:**\n{LITE_MODEL_OUTPUT_V1}")

--- Student Model (gemini-2.5-flash-lite) Run 1 ---
**Initial System Prompt:**
You are a basic JSON data extraction tool. Extract product information from the provided HTML snippet. Return the result as a single JSON object that strictly conforms to the provided schema. Do not output any extra text, markdown wrappers (like ```json), or explanations. 
**Lite Model Output:**

{
  "products": [
    {
      "product_id": "P-1001",
      "product_name": "Smartwatch X-Series",
      "price_usd": "$199.99",
      "is_in_stock": true
    },
    {
      "product_id": "P-1002",
      "product_name": "Mini Bluetooth Speaker",
      "price_usd": "45.00 USD",
      "is_in_stock": false
    }
  ]
}



In [4]:
# -----------------
# 4. GEMINI 3 PRO ANALYSIS OF FAILURE
# -----------------

print(f"\n--- Judge Model ({JUDGE_MODEL}) Analysis & Optimization ---")

# The Judge (Gemini 3 Pro) would be prompted with:
# "The model gemini-2.5-flash-lite failed to parse the output below against the Pydantic schema because
# the 'price_usd' field was returned as a string ('$199.99', '45.00 USD') instead of a float.
# The initial prompt was: '...'. Rewrite the System Prompt to fix this specific failure."

FEEDBACK_V1 = "Schema Compliance Failure: The 'price_usd' field in the output is a string, which violates the schema's requirement for a float. The model failed to perform the necessary data cleaning to remove currency symbols."

RATIONALE_V1 = (
    "The initial zero-shot prompt was too generic. While the Pydantic schema's description "
    "explicitly states 'MUST strip any currency symbols', the smaller `gemini-2.5-flash-lite` model "
    "requires more direct instruction to perform data type conversion/cleaning on noisy inputs. "
    "We need to introduce a **Crucial Constraint** to the system prompt to ensure this step is executed."
)

print(f"**Feedback (Judge):**\n{FEEDBACK_V1}")
print(f"**Rationale (Judge):**\n{RATIONALE_V1}")

# -----------------
# 5. GEMINI 3 PRO REWRITES THE PROMPT (VERSION 2)
# -----------------

OPTIMIZED_SYSTEM_PROMPT_V2 = (
    "You are a STRICTURE DATA EXTRACTION ENGINE. Your *only* output must be a single JSON object "
    "conforming precisely to the provided schema. Do not include any extra text or formatting (e.g., ```json). "
    "**CRITICAL CONSTRAINT**: When extracting the `price_usd` field from the HTML, you **MUST** "
    "clean the value by stripping all currency symbols ($/USD) and any descriptive text. "
    "The final output for `price_usd` must be a clean numerical float type. Failure to do so is a critical error."
)

print(f"**New Optimized System Prompt for next turn (Judge Output):**\n{OPTIMIZED_SYSTEM_PROMPT_V2}")


--- Judge Model (gemini-3-pro) Analysis & Optimization ---
**Feedback (Judge):**
Schema Compliance Failure: The 'price_usd' field in the output is a string, which violates the schema's requirement for a float. The model failed to perform the necessary data cleaning to remove currency symbols.
**Rationale (Judge):**
The initial zero-shot prompt was too generic. While the Pydantic schema's description explicitly states 'MUST strip any currency symbols', the smaller `gemini-2.5-flash-lite` model requires more direct instruction to perform data type conversion/cleaning on noisy inputs. We need to introduce a **Crucial Constraint** to the system prompt to ensure this step is executed.
**New Optimized System Prompt for next turn (Judge Output):**
You are a STRICTURE DATA EXTRACTION ENGINE. Your *only* output must be a single JSON object conforming precisely to the provided schema. Do not include any extra text or formatting (e.g., ```json). **CRITICAL CONSTRAINT**: When extracting the `pric