In [30]:
import pandas as pd
import numpy as np
import requests
import os
import json
import re

In [31]:
df = pd.read_csv('yelp.csv')
df.head()

Unnamed: 0,business_id,date,review_id,stars,text,type,user_id,cool,useful,funny
0,9yKzy9PApeiPPOUJEtnvkg,2011-01-26,fWKvX83p0-ka4JS3dc6E5A,5,My wife took me here on my birthday for breakf...,review,rLtl8ZkDX5vH5nAx9C3q5Q,2,5,0
1,ZRJwVLyzEJq1VAihDhYiow,2011-07-27,IjZ33sJrzXqU-0X6U8NwyA,5,I have no idea why some people give bad review...,review,0a2KyEL0d3Yb1V6aivbIuQ,0,0,0
2,6oRAC4uyJCsJl1X0WZpVSA,2012-06-14,IESLBzqUCLdSzSqm0eCSxQ,4,love the gyro plate. Rice is so good and I als...,review,0hT2KtfLiobPvh6cDC8JQg,0,1,0
3,_1QQZuf4zZOyFCvXc0o6Vg,2010-05-27,G-WvGaISbqqaMHlNnByodA,5,"Rosie, Dakota, and I LOVE Chaparral Dog Park!!...",review,uZetl9T0NcROGOyFfughhg,1,2,0
4,6ozycU1RpktNG2-1BroVtw,2012-01-05,1uJFq2r5QfJG_6ExMRCaGw,5,General Manager Scott Petello is a good egg!!!...,review,vYmM4KTsC8ZfQBg-j5MWkw,0,0,0


In [32]:
df.columns

Index(['business_id', 'date', 'review_id', 'stars', 'text', 'type', 'user_id',
       'cool', 'useful', 'funny'],
      dtype='object')

In [33]:
df = df[["text", "stars"]].dropna()
df.shape

(10000, 2)

In [34]:
df_sample = df.sample(n=200, random_state=42).reset_index(drop=True)
df_sample.head()

Unnamed: 0,text,stars
0,We got here around midnight last Friday... the...,4
1,Brought a friend from Louisiana here. She say...,5
2,"Every friday, my dad and I eat here. We order ...",3
3,"My husband and I were really, really disappoin...",1
4,Love this place! Was in phoenix 3 weeks for w...,5


In [35]:
!pip install -q python-dotenv google-generativeai



[notice] A new release of pip is available: 25.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [36]:
from dotenv import load_dotenv
# Force reload of env
import importlib
load_dotenv(override=True)

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
print(f"Loaded API Key: {GOOGLE_API_KEY[:20] if GOOGLE_API_KEY else 'None'}...")
GOOGLE_API_KEY is not None


Loaded API Key: AIzaSyDLj1WslWAE9Tvq...


True

In [37]:
import google.generativeai as genai
import time

genai.configure(api_key=GOOGLE_API_KEY)

def call_llm(prompt, review_text):
    final_prompt = prompt.replace("{{REVIEW_TEXT}}", review_text[:2000])
    
    try:
        model = genai.GenerativeModel('gemini-2.5-flash')
        response = model.generate_content(final_prompt)
        
        content = response.text
        print(f"LLM Response: {content[:1000]}...")

        match = re.search(r"\{.*\}", content, re.DOTALL)
        if not match:
            print(f"Error: No JSON found in response: {content}")
            raise ValueError("No JSON found")

        parsed = json.loads(match.group())
        return parsed, True

    except Exception as e:
        print(f"Error in call_llm: {str(e)}")
        return {
            "predicted_stars": None,
            "explanation": str(e)
        }, False


def call_llm_batch(prompt, reviews_list, max_retries=3, timeout=60, batch_size=10):
    """Evaluate reviews in smaller batches with retry logic and timeout"""
    all_outputs = []
    total_batches = (len(reviews_list) + batch_size - 1) // batch_size
    
    for batch_idx, batch_start in enumerate(range(0, len(reviews_list), batch_size)):
        batch_end = min(batch_start + batch_size, len(reviews_list))
        batch_reviews = reviews_list[batch_start:batch_end]
        
        reviews_text = "\n---\n".join([f"Review {i+1}:\n{r[:1000]}" for i, r in enumerate(batch_reviews)])
        final_prompt = prompt.replace("{{REVIEW_TEXT}}", reviews_text)
        
        # Retry logic
        for attempt in range(max_retries):
            try:
                print(f"  Batch {batch_idx+1}/{total_batches}: Processing reviews {batch_start+1}-{batch_end} (attempt {attempt+1}/{max_retries})")
                model = genai.GenerativeModel('gemini-2.5-flash')
                response = model.generate_content(final_prompt, request_options={"timeout": timeout})
                
                content = response.text
                print(f"    ✓ Received response ({len(content)} chars)")

                # Try to parse response - handle both array and individual JSON objects in code blocks
                parsed = []
                
                # First, try to find a JSON array [...]
                match = re.search(r"\[.*\]", content, re.DOTALL)
                if match:
                    parsed = json.loads(match.group())
                else:
                    # If no array, extract individual JSON objects from code blocks
                    # Pattern: ```json\n{...}\n```
                    json_blocks = re.findall(r"```json\s*(\{.*?\})\s*```", content, re.DOTALL)
                    if json_blocks:
                        for block in json_blocks:
                            parsed.append(json.loads(block))
                    else:
                        # Last resort: try to find any JSON objects
                        json_objects = re.findall(r"\{.*?\}", content, re.DOTALL)
                        if json_objects:
                            for obj_str in json_objects:
                                try:
                                    parsed.append(json.loads(obj_str))
                                except:
                                    pass
                
                if not parsed:
                    print(f"    Error: No JSON found in response")
                    raise ValueError("No JSON found")

                all_outputs.extend(parsed)
                
                # Rate limiting: Add delay between requests to avoid hitting API limits
                if batch_idx < total_batches - 1:  # Don't delay after the last batch
                    time.sleep(1)  # 1 second delay between batches
                
                break  # Success, move to next batch
                
            except KeyboardInterrupt:
                print("    Interrupted by user")
                raise
            except Exception as e:
                print(f"    ✗ Error: {str(e)}")
                if attempt < max_retries - 1:
                    wait_time = 2 ** attempt  # Exponential backoff: 1s, 2s, 4s
                    print(f"    Retrying in {wait_time}s...")
                    time.sleep(wait_time)
                else:
                    print(f"    Failed after {max_retries} attempts")
                    all_outputs.extend([{"predicted_stars": None, "explanation": str(e)}] * len(batch_reviews))
    
    return all_outputs, True


In [38]:
# Load prompts
prompts = {}

with open("prompts/v1_baseline.txt", "r") as f:
    prompts["baseline"] = f.read()

with open("prompts/v2_rubric.txt", "r") as f:
    prompts["rubric"] = f.read()

with open("prompts/v3_reason_then_classify.txt", "r") as f:
    prompts["reason_then_classify"] = f.read()

# Display prompts to verify
for key in prompts:
    print(f"Prompt '{key}' loaded: {len(prompts[key])} characters")


Prompt 'baseline' loaded: 300 characters
Prompt 'rubric' loaded: 557 characters
Prompt 'reason_then_classify' loaded: 441 characters


In [39]:
results = []

# Process reviews with improved batch handling and retry logic
reviews_list = df_sample["text"].tolist()

for prompt_name, prompt_text in prompts.items():
    print(f"\n{'='*60}")
    print(f"Processing prompt: {prompt_name}")
    print(f"{'='*60}")
    outputs, valid = call_llm_batch(prompt_text, reviews_list, max_retries=3, timeout=60)
    
    for i, output in enumerate(outputs):
        results.append({
            "prompt": prompt_name,
            "actual_stars": df_sample.iloc[i]["stars"] if i < len(df_sample) else None,
            "predicted_stars": output.get("predicted_stars") if isinstance(output, dict) else output,
            "json_valid": valid
        })

print(f"\n{'='*60}")
print(f"Total results collected: {len(results)}")
print(f"{'='*60}")



Processing prompt: baseline
  Batch 1/20: Processing reviews 1-10 (attempt 1/3)


    ✓ Received response (3073 chars)
  Batch 2/20: Processing reviews 11-20 (attempt 1/3)
    ✓ Received response (2524 chars)
  Batch 3/20: Processing reviews 21-30 (attempt 1/3)
    ✓ Received response (2769 chars)
  Batch 4/20: Processing reviews 31-40 (attempt 1/3)
    ✓ Received response (2557 chars)
  Batch 5/20: Processing reviews 41-50 (attempt 1/3)
    ✓ Received response (3194 chars)
  Batch 6/20: Processing reviews 51-60 (attempt 1/3)
    ✓ Received response (2300 chars)
  Batch 7/20: Processing reviews 61-70 (attempt 1/3)
    ✗ Error: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. 
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 20, model: gemini-2.5-flash
Please retry in 25.528576295s. [links {
  description: "L

  summary = results_df.groupby("prompt").apply(


Unnamed: 0,prompt,accuracy,json_validity_rate
0,baseline,0.137363,1.0
1,reason_then_classify,0.0,1.0
2,rubric,0.0,1.0


In [41]:
summary.to_csv("results.csv", index=False)


PermissionError: [Errno 13] Permission denied: 'results.csv'