In [6]:
import pandas as pd
import json
from dotenv import load_dotenv
import os
from groq import Groq


In [13]:
from dotenv import load_dotenv
import os
from groq import Groq

load_dotenv()

groq_api_key = os.getenv("GROQ_API_KEY")
print("Groq key loaded:", groq_api_key is not None)

client = Groq(api_key=groq_api_key)

def call_llm(prompt: str) -> str:
    """
    Call Groq's Llama 3.3 70B Versatile model and return the text response.
    """
    response = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[
            {"role": "user", "content": prompt}
        ],
        temperature=0.1,
    )
    return response.choices[0].message.content





Groq key loaded: True


In [14]:
print(call_llm("Say hello in one short sentence."))




Hello, it's nice to meet you.


In [15]:
import pandas as pd

df = pd.read_csv("yelp.csv")   # adjust name/path if needed
df = df[['text', 'stars']].dropna()
df_sample = df.sample(200, random_state=42).reset_index(drop=True)

df_sample.head()


Unnamed: 0,text,stars
0,We got here around midnight last Friday... the...,4
1,Brought a friend from Louisiana here. She say...,5
2,"Every friday, my dad and I eat here. We order ...",3
3,"My husband and I were really, really disappoin...",1
4,Love this place! Was in phoenix 3 weeks for w...,5


In [16]:
import json

def parse_json_safely(text: str):
    cleaned = text.strip()

    if cleaned.startswith("```"):
        cleaned = cleaned.strip("`")
        cleaned = cleaned.replace("json", "", 1).strip()

    start = cleaned.find("{")
    end = cleaned.rfind("}")
    if start != -1 and end != -1:
        cleaned = cleaned[start:end+1]

    return json.loads(cleaned)


In [17]:
def build_prompt_v1(review_text: str) -> str:
    return f"""
You are an assistant that predicts restaurant star ratings (1 to 5) based on reviews.

Return ONLY valid JSON:

{{
  "predicted_stars": <integer 1-5>,
  "explanation": "<short reason>"
}}

Review:
"{review_text}"
"""



In [19]:
sample = df_sample.iloc[0]
prompt = build_prompt_v1(sample["text"])

print("PROMPT:\n", prompt)

response = call_llm(prompt)
print("\nRAW RESPONSE:\n", response)

try:
    parsed = parse_json_safely(response)
    print("\nPARSED JSON:", parsed)
except Exception as e:
    print("JSON ERROR:", e)




PROMPT:
 
You are an assistant that predicts restaurant star ratings (1 to 5) based on reviews.

Return ONLY valid JSON:

{
  "predicted_stars": <integer 1-5>,
  "explanation": "<short reason>"
}

Review:
"We got here around midnight last Friday... the place was dead. However, they were still serving food and we enjoyed some well made pub grub. Service was friendly, quality cocktails were served, and the atmosphere is derived from an old Uno's, which certainly works for a sports bar. It being located in a somewhat commercial area, I can see why it's empty so late on a Friday. From what my friends tell me - this is a great spot for happy hour, and it stays relatively busy thru 10pm.

*UPDATE - Great patio for day-drinking on the weekends!"


RAW RESPONSE:
 {
  "predicted_stars": 4,
  "explanation": "Friendly service, well-made food, and quality cocktails"
}

PARSED JSON: {'predicted_stars': 4, 'explanation': 'Friendly service, well-made food, and quality cocktails'}


In [20]:
results_v1 = []

for _, row in df_sample.iterrows():
    prompt = build_prompt_v1(row["text"])
    
    try:
        resp = call_llm(prompt)
        data = parse_json_safely(resp)
        pred = int(data["predicted_stars"])
        json_valid = True
    except:
        pred = None
        json_valid = False

    results_v1.append({
        "actual_stars": row["stars"],
        "predicted_stars": pred,
        "json_valid": json_valid,
        "prompt_version": "v1"
    })

df_v1 = pd.DataFrame(results_v1)
df_v1.head()



Unnamed: 0,actual_stars,predicted_stars,json_valid,prompt_version
0,4,4,True,v1
1,5,5,True,v1
2,3,4,True,v1
3,1,1,True,v1
4,5,5,True,v1


In [21]:

valid = df_v1["predicted_stars"].notna()

accuracy_v1 = (df_v1.loc[valid, "actual_stars"] 
               == df_v1.loc[valid, "predicted_stars"]).mean()

json_valid_rate_v1 = df_v1["json_valid"].mean()

accuracy_v1, json_valid_rate_v1



(np.float64(0.635), np.float64(1.0))

In [22]:
def build_prompt_v2(review_text: str) -> str:
    return f"""
You are a strict JSON API that predicts restaurant review ratings.

TASK:
- Read the review.
- Decide a star rating from 1 to 5 (integer).
- Explain briefly why.

OUTPUT RULES:
- Output ONLY valid JSON.
- No extra words, no markdown, no explanations outside JSON.
- The "predicted_stars" must be an integer 1–5.

FORMAT:
{{
  "predicted_stars": <integer 1-5>,
  "explanation": "<short reason>"
}}

Review:
"{review_text}"
"""


In [23]:
results_v2 = []

for _, row in df_sample.iterrows():
    prompt = build_prompt_v2(row["text"])
    
    try:
        resp = call_llm(prompt)
        data = parse_json_safely(resp)
        pred = int(data["predicted_stars"])
        json_valid = True
    except:
        pred = None
        json_valid = False

    results_v2.append({
        "actual_stars": row["stars"],
        "predicted_stars": pred,
        "json_valid": json_valid,
        "prompt_version": "v2"
    })

df_v2 = pd.DataFrame(results_v2)
df_v2.head()


Unnamed: 0,actual_stars,predicted_stars,json_valid,prompt_version
0,4,4.0,True,v2
1,5,5.0,True,v2
2,3,4.0,True,v2
3,1,1.0,True,v2
4,5,5.0,True,v2


In [24]:
valid2 = df_v2["predicted_stars"].notna()

accuracy_v2 = (df_v2.loc[valid2, "actual_stars"] 
               == df_v2.loc[valid2, "predicted_stars"]).mean()

json_valid_rate_v2 = df_v2["json_valid"].mean()

accuracy_v2, json_valid_rate_v2


(np.float64(0.5954198473282443), np.float64(0.655))

In [25]:
def build_prompt_v3(review_text: str) -> str:
    return f"""
You are a strict JSON API that predicts restaurant star ratings from reviews.

Here are examples:

Review: "Amazing food and great service. I will come again."
Rating: 5

Review: "Food was okay, nothing special."
Rating: 3

Review: "Terrible experience, very rude staff."
Rating: 1

Now follow these rules:

- Read the new review.
- Decide a rating (1–5) that matches the tone and sentiment.
- Explain briefly.
- Output ONLY valid JSON, no markdown, no extra text.

FORMAT:
{{
  "predicted_stars": <integer 1-5>,
  "explanation": "<short reason>"
}}

New review:
"{review_text}"
"""


In [26]:
results_v3 = []

for _, row in df_sample.iterrows():
    prompt = build_prompt_v3(row["text"])
    
    try:
        resp = call_llm(prompt)
        data = parse_json_safely(resp)
        pred = int(data["predicted_stars"])
        json_valid = True
    except:
        pred = None
        json_valid = False

    results_v3.append({
        "actual_stars": row["stars"],
        "predicted_stars": pred,
        "json_valid": json_valid,
        "prompt_version": "v3"
    })

df_v3 = pd.DataFrame(results_v3)
df_v3.head()


Unnamed: 0,actual_stars,predicted_stars,json_valid,prompt_version
0,4,,False,v3
1,5,5.0,True,v3
2,3,,False,v3
3,1,,False,v3
4,5,,False,v3


In [27]:
valid3 = df_v3["predicted_stars"].notna()

accuracy_v3 = (df_v3.loc[valid3, "actual_stars"] 
               == df_v3.loc[valid3, "predicted_stars"]).mean()

json_valid_rate_v3 = df_v3["json_valid"].mean()

accuracy_v3, json_valid_rate_v3


(np.float64(1.0), np.float64(0.005))

In [28]:
all_results = pd.concat([df_v1, df_v2, df_v3], ignore_index=True)

summary = all_results.groupby("prompt_version").agg(
    accuracy=("predicted_stars", 
              lambda s: (s == all_results.loc[s.index, "actual_stars"]).mean()),
    json_valid_rate=("json_valid", "mean")
)

summary


Unnamed: 0_level_0,accuracy,json_valid_rate
prompt_version,Unnamed: 1_level_1,Unnamed: 2_level_1
v1,0.635,1.0
v2,0.39,0.655
v3,0.005,0.005
