In [None]:
# üìå 0. Imports & client setup
import os
from openai import OpenAI
import pandas as pd

client = OpenAI()  

In [3]:
# üìå 1. Constant input text
REVIEW_TEXT = """
I finally bought the ‚ÄúLogitech G502 HERO gaming mouse‚Äù last Saturday at TechWorld downtown. 
They had it on sale for $59.99 plus tax, which I think is a decent price. 
I‚Äôd give it a solid 4.5 out of 5 so far. The receipt says 2025-11-15, which I guess is the official purchase date. 
I might go back to TechWorld for a keyboard later.
"""

In [4]:
# üìå 2. Define all prompts
prompts = {
    "baseline": f"""
Extract the product name, price, purchase date, store, and rating from the following review and put them into JSON.

Review:
"{REVIEW_TEXT}"
""",
    "technique_1_role_prompting": f"""
You are a Senior Data Analyst who specializes in extracting structured data from noisy text.

Your task:
- Read the customer review.
- Find the product_name, price, purchase_date, store, and rating.

Output:
- Return ONLY a valid JSON object with these exact keys:
  product_name, price, purchase_date, store, rating.

Review:
"{REVIEW_TEXT}"
""",
    "technique_2_output_formatting": f"""
You are an information extraction engine.

Extract the following fields from the review:
- product_name (string)
- price (string, exactly as shown with the $ sign if present)
- purchase_date (string in YYYY-MM-DD format)
- store (string)
- rating (string, as written in the text, e.g., "4.5 out of 5")

Rules:
- Return ONLY a JSON object.
- Use these EXACT keys: product_name, price, purchase_date, store, rating.
- If a field is missing, set its value to null.
- Do NOT include any explanations.

Template:
{{
  "product_name": "...",
  "price": "...",
  "purchase_date": "...",
  "store": "...",
  "rating": "..."
}}

Review:
"{REVIEW_TEXT}"
""",
    "technique_3_chain_of_thought": f"""
You are a careful data extraction system.

Step 1: List the key pieces of information you find in the review (product name, price, purchase date, store, rating).
Step 2: Based on Step 1, build a final JSON object.

Rules:
- At the end, include a line that says: FINAL_JSON:
- On the line after FINAL_JSON:, output ONLY the JSON object with keys:
  product_name, price, purchase_date, store, rating.

Review:
"{REVIEW_TEXT}"
""",
    "final_optimized_prompt": f"""
You are a Senior Data Analyst that extracts structured data from customer reviews.

Task:
- Read the review.
- Identify:
  - product_name (string)
  - price (string, exactly as shown with the $ sign)
  - purchase_date (string in YYYY-MM-DD format if possible)
  - store (string)
  - rating (string, e.g., "4.5 out of 5")

Process (internal thinking allowed, but DO NOT show it):
- Carefully locate each field in the text.
- Double-check that each value is correct.

Output rules:
- Return ONLY a JSON object.
- Use exactly these keys:
  product_name, price, purchase_date, store, rating.
- If a field is missing, set it to null.
- Do NOT include any explanation before or after the JSON.

Review:
"{REVIEW_TEXT}"
"""
}

In [5]:
# üìå 3. Helper: call the model
def run_prompt(prompt: str, model: str = "gpt-4.1-mini") -> str:
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content.strip()

In [6]:
# üìå 4. Run all prompts and collect results
results = []

for name, prompt in prompts.items():
    print(f"Running prompt: {name}")
    output = run_prompt(prompt)
    print(output)
    print("-" * 80)
    
    # Here you will later fill in the qualitative score manually
    results.append({
        "prompt_name": name,
        "output": output,
        "accuracy_score_1_10": None,  # fill after reviewing
        "observation": ""             # e.g., "JSON correct but rating missing"
    })

Running prompt: baseline
```json
{
  "product_name": "Logitech G502 HERO gaming mouse",
  "price": 59.99,
  "purchase_date": "2025-11-15",
  "store": "TechWorld",
  "rating": 4.5
}
```
--------------------------------------------------------------------------------
Running prompt: technique_1_role_prompting
{
  "product_name": "Logitech G502 HERO gaming mouse",
  "price": 59.99,
  "purchase_date": "2025-11-15",
  "store": "TechWorld",
  "rating": 4.5
}
--------------------------------------------------------------------------------
Running prompt: technique_2_output_formatting
{
  "product_name": "Logitech G502 HERO gaming mouse",
  "price": "$59.99",
  "purchase_date": "2025-11-15",
  "store": "TechWorld",
  "rating": "4.5 out of 5"
}
--------------------------------------------------------------------------------
Running prompt: technique_3_chain_of_thought
Step 1: Key pieces of information:
- Product name: Logitech G502 HERO gaming mouse
- Price: $59.99
- Purchase date: 2025-11-15
-

In [11]:
df_results = pd.DataFrame(results)
df_results

Unnamed: 0,prompt_name,output,accuracy_score_1_10,observation
0,baseline,"```json\n{\n ""product_name"": ""Logitech G502 H...",,
1,technique_1_role_prompting,"{\n ""product_name"": ""Logitech G502 HERO gamin...",,
2,technique_2_output_formatting,"{\n ""product_name"": ""Logitech G502 HERO gamin...",,
3,technique_3_chain_of_thought,Step 1: Key pieces of information:\n- Product ...,,
4,final_optimized_prompt,"```json\n{\n ""product_name"": ""Logitech G502 H...",,


## Summary Table

| Prompt Name                    | Accuracy (1‚Äì10) | Observation                                  |
|--------------------------------|-----------------|----------------------------------------------|
| baseline                       | 5               | Correct fields, but JSON format inconsistent (code block + wrong price and rating format).   |
| technique_1_role_prompting     | 7               | Cleaner JSON and better accuracy, but price and rating formats still incorrect.  |
| technique_2_output_formatting  | 9               | Accurate JSON with correct formatting; nearly perfect.      |
| technique_3_chain_of_thought   | 8               | Good reasoning, but extraneous text and minor field inaccuracies (‚ÄúTechWorld downtown‚Äù, numeric price).  |
| final_optimized_prompt         | 10              | Accurate, clean JSON with full format adherence and no noise.      |