In [1]:

# ----------------------------
# 1. Install Dependencies
# ----------------------------
!pip install kagglehub pandas requests tqdm numpy

# ----------------------------
# 2. Imports
# ----------------------------
import kagglehub
import pandas as pd
import requests
import json
import numpy as np
from tqdm import tqdm
import os

# ----------------------------
# 3. OpenRouter API Key
# ----------------------------
OPENROUTER_API_KEY = "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"

# ----------------------------
# 4. Download Dataset
# ----------------------------
path = kagglehub.dataset_download("omkarsabnis/yelp-reviews-dataset")
print("Dataset path:", path)

print("\nFiles in dataset directory:")
print(os.listdir(path))

# ----------------------------
# 5. Load CSV Safely
# ----------------------------
csv_file = None
for file in os.listdir(path):
    if file.endswith(".csv"):
        csv_file = file
        break

if csv_file is None:
    raise FileNotFoundError("No CSV file found in dataset directory.")

print("\nUsing CSV file:", csv_file)

df = pd.read_csv(os.path.join(path, csv_file))
print("\nOriginal columns:", df.columns.tolist())

# ----------------------------
# 6. Normalize Column Names
# ----------------------------
# Common variations handled safely
column_map = {
    "review_text": "review_text",
    "text": "review_text",
    "review": "review_text",
    "stars": "stars",
    "rating": "stars"
}

df = df.rename(columns={c: column_map[c] for c in df.columns if c in column_map})

if "review_text" not in df.columns or "stars" not in df.columns:
    raise KeyError("Required columns not found. Expected review_text and stars.")

df = df[["review_text", "stars"]].dropna()

# Sample ~200 reviews
df_sample = df.sample(200, random_state=42).reset_index(drop=True)

print("\nSample size:", len(df_sample))

# ----------------------------
# 7. OpenRouter API Call
# ----------------------------
def query_llm(prompt):
    response = requests.post(
        "https://openrouter.ai/api/v1/chat/completions",
        headers={
            "Authorization": f"Bearer {OPENROUTER_API_KEY}",
            "Content-Type": "application/json"
        },
        json={
            "model": "openai/gpt-4o-mini",
            "messages": [{"role": "user", "content": prompt}],
            "temperature": 0
        },
        timeout=30
    )
    return response.json()

# ----------------------------
# 8. Prompt Templates
# ----------------------------
PROMPT_1_ZERO_SHOT = """
You are an AI assistant.

Given the following Yelp review, predict the star rating from 1 to 5.

Review:
"{review_text}"

Return the result in valid JSON format:
{{
  "predicted_stars": integer,
  "explanation": "brief reason"
}}
"""

PROMPT_2_SENTIMENT_ANCHORED = """
You are an expert sentiment analysis system trained on Yelp reviews.

Classification rules:
1 star = very negative experience
2 stars = negative
3 stars = neutral or mixed
4 stars = positive
5 stars = extremely positive

Review:
"{review_text}"

Respond ONLY in valid JSON:
{{
  "predicted_stars": number,
  "explanation": "short justification"
}}
"""

PROMPT_3_STRICT_JSON = """
You are a strict JSON-only API.

Task:
Predict Yelp review rating from 1 to 5 stars.

Rules:
- Output MUST be valid JSON
- No extra text
- Use integers only
- Double-check sentiment before responding

Review:
"{review_text}"

Output format:
{{
  "predicted_stars": 1-5,
  "explanation": "one sentence"
}}
"""

PROMPTS = {
    "Zero-Shot": PROMPT_1_ZERO_SHOT,
    "Sentiment-Anchored": PROMPT_2_SENTIMENT_ANCHORED,
    "Strict-JSON": PROMPT_3_STRICT_JSON
}

# ----------------------------
# 9. Evaluation Function
# ----------------------------
def evaluate_prompt(prompt_template, runs=2):
    all_predictions = []
    valid_json_count = 0

    for run in range(runs):
        predictions = []

        for _, row in tqdm(df_sample.iterrows(), total=len(df_sample)):
            prompt = prompt_template.format(review_text=row["review_text"])
            response = query_llm(prompt)

            try:
                content = response["choices"][0]["message"]["content"]
                parsed = json.loads(content)
                pred = int(parsed["predicted_stars"])

                if 1 <= pred <= 5:
                    predictions.append(pred)
                    valid_json_count += 1
                else:
                    predictions.append(None)
            except:
                predictions.append(None)

        all_predictions.append(predictions)

    # Accuracy
    correct, total = 0, 0
    for pred, actual in zip(all_predictions[0], df_sample["stars"]):
        if pred is not None:
            total += 1
            if pred == actual:
                correct += 1

    accuracy = correct / total if total else 0

    # Consistency (std deviation across runs)
    consistency = 0.0
    if runs > 1:
        arr = np.array(all_predictions, dtype=float)
        consistency = np.nanmean(np.nanstd(arr, axis=0))

    json_validity = valid_json_count / (len(df_sample) * runs)

    return accuracy, json_validity, consistency

# ----------------------------
# 10. Run Experiments
# ----------------------------
results = []

for name, prompt in PROMPTS.items():
    print(f"\nEvaluating Prompt Strategy: {name}")
    acc, json_rate, consistency = evaluate_prompt(prompt, runs=2)

    results.append({
        "Prompt Strategy": name,
        "Accuracy": round(acc, 3),
        "JSON Validity Rate": round(json_rate, 3),
        "Consistency (↓ better)": round(consistency, 3)
    })

# ----------------------------
# 11. Results Table
# ----------------------------
results_df = pd.DataFrame(results)

print("\n================ FINAL COMPARISON TABLE ================\n")
print(results_df)

# ----------------------------
# 12. Save Results
# ----------------------------
results_df.to_csv("yelp_prompting_results.csv", index=False)
print("\nResults saved as yelp_prompting_results.csv")


Downloading from https://www.kaggle.com/api/v1/datasets/download/omkarsabnis/yelp-reviews-dataset?dataset_version_number=1...


100%|██████████| 3.49M/3.49M [00:00<00:00, 59.1MB/s]

Extracting files...





Dataset path: /root/.cache/kagglehub/datasets/omkarsabnis/yelp-reviews-dataset/versions/1

Files in dataset directory:
['yelp.csv']

Using CSV file: yelp.csv

Original columns: ['business_id', 'date', 'review_id', 'stars', 'text', 'type', 'user_id', 'cool', 'useful', 'funny']

Sample size: 200

Evaluating Prompt Strategy: Zero-Shot


100%|██████████| 200/200 [05:06<00:00,  1.53s/it]
100%|██████████| 200/200 [04:52<00:00,  1.46s/it]
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,



Evaluating Prompt Strategy: Sentiment-Anchored


100%|██████████| 200/200 [04:31<00:00,  1.36s/it]
100%|██████████| 200/200 [05:20<00:00,  1.60s/it]



Evaluating Prompt Strategy: Strict-JSON


100%|██████████| 200/200 [05:02<00:00,  1.51s/it]
100%|██████████| 200/200 [04:17<00:00,  1.29s/it]



      Prompt Strategy  Accuracy  JSON Validity Rate  Consistency (↓ better)
0           Zero-Shot     0.684                0.89                   0.000
1  Sentiment-Anchored     0.680                1.00                   0.005
2         Strict-JSON     0.630                1.00                   0.005

Results saved as yelp_prompting_results.csv



