In [None]:

import pandas as pd
import os
import json
import re
import time
from tqdm import tqdm
from groq import Groq

# --- CONFIGURATION ---
# Load API key
try:
    import toml
    secrets = toml.load(".streamlit/secrets.toml")
    api_key = secrets.get("GROQ_API_KEY") or secrets.get("GOOGLE_API_KEY") # Fallback if user reused var
except:
    api_key = os.environ.get("GROQ_API_KEY")

if not api_key:
    raise ValueError("Please set GROQ_API_KEY in .streamlit/secrets.toml")

client = Groq(api_key=api_key)
MODEL_NAME = "llama-3.1-8b-instant"

print(f"Initialized Groq Client with model: {MODEL_NAME}")

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Initialized Groq Client with model: llama-3.1-8b-instant


In [None]:
#Data Loading
FILENAME = "yelp.csv"
SAMPLE_SIZE = 200

if os.path.exists(FILENAME):
    df = pd.read_csv(FILENAME)
    df = df[['text', 'stars']].dropna()

    if len(df) > SAMPLE_SIZE:
        df = df.sample(SAMPLE_SIZE, random_state=42).reset_index(drop=True)
else:
    df = pd.DataFrame({
        "text": ["Great food", "Bad service"] * 100,
        "stars": [5, 1] * 100
    })

print("Dataset size:", len(df))


Dataset size: 200


In [None]:
#Prompts

def get_prompt_v1(review):
    return f"""
You are a rating classifier.

RULES:
- Output ONLY valid JSON
- No extra text

Review: "{review}"

Output JSON:
{{ "predicted_stars": <int>, "explanation": "<brief reasoning>" }}
"""


def get_prompt_v2(review):
    return f"""
You are a rating classifier.

RULES:
- Output ONLY valid JSON
- No extra text

Examples:
Review: "The service was slow but the food was excellent."
Output: {{ "predicted_stars": 4, "explanation": "Good food outweighed slow service." }}

Review: "Worst place ever. Dirty and disgusting."
Output: {{ "predicted_stars": 1, "explanation": "Strong negative sentiment." }}

Review: "It was okay. Nothing special."
Output: {{ "predicted_stars": 3, "explanation": "Neutral experience." }}

Review: "{review}"

Output JSON:
{{ "predicted_stars": <int>, "explanation": "<brief reasoning>" }}
"""


def get_prompt_v3(review):
    return f"""
You are an expert sentiment analyst.

RULES:
- Think internally
- Do NOT output reasoning steps
- Output ONLY valid JSON

Review: "{review}"

Output JSON:
{{ "predicted_stars": <int>, "explanation": "<concise reasoning>" }}
"""


In [None]:
#Execution loop
def extract_text(response):
    try:
        return response.choices[0].message.content
    except Exception as e:
        print(f"DEBUG: Extraction Error: {e}")
        return ""

def parse_response(text):
    try:
        # Robust Parsing
        match = re.search(r'\{[\s\S]*\}', text)
        if not match:
            print(f"DEBUG: Parsing Error - No JSON found in text: {text}")
            return None
        return json.loads(match.group())
    except Exception as e:
        print(f"DEBUG: Parsing Exception: {e}")
        return None

results = []

strategies = {
    "V1_ZeroShot": get_prompt_v1,
    "V2_FewShot": get_prompt_v2,
    "V3_CoT": get_prompt_v3
}

print("Running experiments via Groq (With Debugging)...")

# Run on a subset first for debugging if things fail
subset_df = df.head(5) 

for idx, row in tqdm(subset_df.iterrows(), total=len(subset_df)):
    review = row["text"]
    actual = int(row["stars"])

    record = {"actual_stars": actual}

    for name, func in strategies.items():
        try:
            # Groq API Call
            completion = client.chat.completions.create(
                model=MODEL_NAME,
                messages=[{"role": "user", "content": func(review)}],
                temperature=0,
                response_format={"type": "json_object"}, # Force JSON
                stop=None
            )
            
            text = completion.choices[0].message.content
            data = parse_response(text)

            if data and "predicted_stars" in data:
                record[f"{name}_pred"] = int(data["predicted_stars"])
                record[f"{name}_valid"] = True
            else:
                record[f"{name}_pred"] = None
                record[f"{name}_valid"] = False
        except Exception as e:
            print(f"ERROR ({name}): {e}")
            record[f"{name}_pred"] = None
            record[f"{name}_valid"] = False

    results.append(record)

results_df = pd.DataFrame(results)
results_df.head()

Running experiments via Groq (With Debugging)...


100%|██████████| 5/5 [00:03<00:00,  1.59it/s]


Unnamed: 0,actual_stars,V1_ZeroShot_pred,V1_ZeroShot_valid,V2_FewShot_pred,V2_FewShot_valid,V3_CoT_pred,V3_CoT_valid
0,4,4,True,4,True,4,True
1,5,5,True,5,True,5,True
2,3,4,True,4,True,4,True
3,1,1,True,1,True,1,True
4,5,5,True,5,True,5,True


In [None]:
#Metrics & Evaluation
summary = []

for strat in strategies.keys():
    valid = results_df[results_df[f"{strat}_valid"] == True]

    validity = (len(valid) / len(results_df)) * 100
    accuracy = (
        (valid[f"{strat}_pred"] == valid["actual_stars"]).mean() * 100
        if len(valid) > 0 else 0
    )

    summary.append({
        "Strategy": strat,
        "Validity (%)": round(validity, 1),
        "Accuracy (%)": round(accuracy, 1)
    })

comparison_table = pd.DataFrame(summary)
comparison_table


Unnamed: 0,Strategy,Validity (%),Accuracy (%)
0,V1_ZeroShot,100.0,80.0
1,V2_FewShot,100.0,80.0
2,V3_CoT,100.0,80.0
