In [20]:
# Assignment 6 â€“ Prompt Engineering for Performance Improvement (Gemini Version)

!pip install -q google-genai

from google import genai

# ---------------------------------------------------
# ðŸ”‘ CONFIGURE YOUR GEMINI API KEY
# ---------------------------------------------------
# â›” IMPORTANT:
# - For local use: you can either hardcode your key here while testing
#   OR set an environment variable GEMINI_API_KEY before running.
# - BEFORE pushing to GitHub, REMOVE the real key and leave a placeholder.
#
# Simplest for class: paste your key once here while you work,
# then replace it with "<YOUR_API_KEY_HERE>" before committing.

GEMINI_API_KEY = "<YOUR_API_KEY_HERE>"  # <-- replace at runtime, don't commit real key

client = genai.Client(api_key=GEMINI_API_KEY)

# ---------------------------------------------------
# CONSTANT INPUT (same text for all prompts)
# ---------------------------------------------------
input_text = """
I went to Joeâ€™s Pizza last Friday. The slice was $3.50 and honestly it tasted burnt.
My friend Sarah paid $4.25 for a soda. I might go back on March 5th, 2025.
"""

# Helper function to run a prompt and print the output nicely
def run_prompt(label: str, prompt: str) -> str:
    """
    Sends the combined prompt + constant input to Gemini and prints the result.
    Returns the raw text output as a string.
    """
    full_input = f"{prompt}\n\nInput text:\n{input_text}"

    response = client.models.generate_content(
        model="gemini-2.5-flash",  # fast + cheap model per Google docs :contentReference[oaicite:1]{index=1}
        contents=full_input,
    )

    text = response.text
    print(f"\n{'=' * 80}")
    print(f"{label} OUTPUT")
    print(f"{'=' * 80}")
    print(text)
    print("\n")
    return text


In [15]:
# -------------------------
# 1. BASELINE TEST
# -------------------------

baseline_prompt = "Extract the data."

baseline_output = run_prompt("BASELINE", baseline_prompt)



BASELINE OUTPUT
Here's the extracted data:

*   **Business Name:** Joeâ€™s Pizza
*   **Items & Prices:**
    *   Pizza Slice: $3.50
    *   Soda: $4.25
*   **People Involved:**
    *   Narrator (bought slice)
    *   Sarah (friend, bought soda)
*   **Dates:**
    *   Last Visit: Last Friday
    *   Potential Future Visit: March 5th, 2025
*   **Observations:**
    *   Pizza slice tasted burnt.




In [16]:
# -------------------------
# 2. TECHNIQUE 1 â€“ Role Prompting
# -------------------------

tech1_prompt = """
You are a Senior Data Analyst who specializes in converting messy natural language
into accurate structured summaries.

From the input text, identify:
- The restaurant name
- All purchased items
- Each itemâ€™s price
- Any dates mentioned

Return a clear description in natural language.
"""

tech1_output = run_prompt("TECHNIQUE 1 â€“ ROLE PROMPTING", tech1_prompt)



TECHNIQUE 1 â€“ ROLE PROMPTING OUTPUT
Here is a structured summary of the information from the input text:

The restaurant mentioned is **Joeâ€™s Pizza**. Purchased items include a **slice** for **$3.50** and a **soda** for **$4.25**. The dates mentioned are **last Friday** (when the visit occurred) and **March 5th, 2025** (a potential future visit date).




In [17]:
# -------------------------
# 3. TECHNIQUE 2 â€“ Output Formatting (JSON)
# -------------------------

tech2_prompt = """
You are a Senior Data Analyst.

Extract the restaurant name, all purchased items (with names and prices),
and the date mentioned in the text.

Return ONLY valid JSON using the following exact structure:

{
  "restaurant": "",
  "items": [
    {"name": "", "price": ""},
    {"name": "", "price": ""}
  ],
  "date": ""
}

Rules:
- Do NOT add extra fields.
- Do NOT include any explanation or commentary.
- Prices should be strings exactly as they appear in the text.
- If something is missing, use an empty string.
"""

tech2_output = run_prompt("TECHNIQUE 2 â€“ OUTPUT FORMATTING (JSON)", tech2_prompt)



TECHNIQUE 2 â€“ OUTPUT FORMATTING (JSON) OUTPUT
```json
{
  "restaurant": "Joeâ€™s Pizza",
  "items": [
    {"name": "slice", "price": "$3.50"},
    {"name": "soda", "price": "$4.25"}
  ],
  "date": "March 5th, 2025"
}
```




In [18]:
# -------------------------
# 4. TECHNIQUE 3 â€“ CHAIN-OF-THOUGHT
# -------------------------

tech3_prompt = """
You are a Senior Data Analyst.

First, think step-by-step to extract:
1. The restaurant name.
2. Each purchased item and its price.
3. The date of visit or planned visit.

Explain your reasoning briefly in natural language.
Then, at the end, output the extracted information in the following JSON structure:

{
  "restaurant": "",
  "items": [
    {"name": "", "price": ""},
    {"name": "", "price": ""}
  ],
  "date": ""
}
"""

tech3_output = run_prompt("TECHNIQUE 3 â€“ CHAIN-OF-THOUGHT", tech3_prompt)



TECHNIQUE 3 â€“ CHAIN-OF-THOUGHT OUTPUT
Here's my step-by-step extraction and reasoning:

1.  **Restaurant Name**: I identified "Joeâ€™s Pizza" as the restaurant because the text explicitly states "I went to Joeâ€™s Pizza".
2.  **Purchased Items and Prices**: I looked for nouns associated with monetary values. "The slice was $3.50" clearly links "slice" to "$3.50". Similarly, "Sarah paid $4.25 for a soda" links "soda" to "$4.25".
3.  **Date of visit or planned visit**: The phrase "I might go back on March 5th, 2025" provides a specific, unambiguous date for a planned future visit, which fits the criteria. While "last Friday" refers to a past visit, "March 5th, 2025" is a concrete, explicit date.

```json
{
  "restaurant": "Joeâ€™s Pizza",
  "items": [
    {"name": "slice", "price": "$3.50"},
    {"name": "soda", "price": "$4.25"}
  ],
  "date": "March 5th, 2025"
}
```




In [19]:
# -------------------------
# 5. FINAL OPTIMIZED PROMPT
# -------------------------

final_prompt = """
You are a Senior Data Analyst with expertise in structured-data extraction.

Your task is to extract:
- The restaurant name.
- All purchased items, each with an item name and price.
- The date of the visit or planned visit.

Follow this two-step process:

Step 1 (Reason silently):
- Internally, parse the text and identify each required field.
- Ensure that prices match exactly what appears in the text.
- Ensure that the date is in the same textual format as in the input.

Step 2 (Output):
Return ONLY the final JSON, with no explanation or commentary, using this exact structure:

{
  "restaurant": "",
  "items": [
    {"name": "", "price": ""},
    {"name": "", "price": ""}
  ],
  "date": ""
}
"""

final_output = run_prompt("FINAL OPTIMIZED PROMPT", final_prompt)



FINAL OPTIMIZED PROMPT OUTPUT
```json
{
  "restaurant": "Joeâ€™s Pizza",
  "items": [
    {"name": "slice", "price": "$3.50"},
    {"name": "soda", "price": "$4.25"}
  ],
  "date": "March 5th, 2025"
}
```


