In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [104]:
import time
import json, time, concurrent.futures as cf
import polars as pl
from tqdm import tqdm

from azure.ai.inference import ChatCompletionsClient
from azure.ai.inference.models import SystemMessage, UserMessage, ChatCompletions
from azure.core.credentials import AzureKeyCredential

In [24]:
df = pl.read_parquet('../data/test_yelp_review.parquet')
working_df = df.group_by('label').agg(pl.all().sample(n=1000, with_replacement=False))
df_obj = {"label": [], "text": []}
few_shot_examples = ""
for row in working_df.iter_rows():
    for text in row[1]:
        df_obj["label"].append(row[0] + 1)
        df_obj["text"].append(text)
df = pl.DataFrame(df_obj)
df = df.with_row_index(name="row_id")

In [25]:
df.shape

(5000, 3)

In [26]:
df.head()

row_id,label,text
u32,i64,str
0,4,"""The name is as good as the foo…"
1,4,"""I travel to Las Vegas monthly …"
2,4,"""My mate and I just checked out…"
3,4,"""The food is excellent. I had t…"
4,4,"""these guys have really stepped…"


In [27]:
# For Serverless API or Managed Compute endpoints
from os import getenv


client = ChatCompletionsClient(
    endpoint="https://portfolio-resource.services.ai.azure.com/models",
    credential=AzureKeyCredential(getenv("AZURE_AI_INFERENCE_KEY")),
    api_version="2024-05-01-preview"
)
model_name = "llama-31_8b"

In [59]:
DEFAULT_ERROR_MESSAGE = "I am facing issue with performing the task. Kindly try in some time."
def call_llm(messages, response_format: str = "text"):
    max_retries = 3
    for try_i in range(max_retries):
        response = None
        try:
            response: ChatCompletions = client.complete(
                messages=messages,
                temperature=0.1,
                top_p=0.95,
                model=model_name,
                max_tokens=1024,
                response_format=response_format
            )
            output = response.choices[0].message.content
            return output
        except Exception as ex:
            print(str(ex))
            try_i += 1
            if try_i == max_retries:
                return DEFAULT_ERROR_MESSAGE
                


In [60]:
COMPLAINT_COMPLIMENT_EXTRATOR_PROMPT = """You are an expert in extracting complaints/compliments from Yelp reviews.
Extract these in the form of JSON object with key as complaint/complement and the value as one or more complain/complement.

Response format:
{{
    "complaint": ["complaint1", "complaint2", ...],
    "compliment": ["compliment1", "compliment2", ...]
}}
"""

COMPLAINT_COMPLIMENT_JUDGE_PROMPT = """You are an evaluator checking whether an assistant correctly extracted the main complaint or compliment from a Yelp review.
Your Task:
Evaluate how faithful the KEY_POINT is to the REVIEW content.  Justification is a must for your response.

Scoring (0-2):
- 2 = Faithful and central: captures the main complaint or compliment accurately and succinctly.
- 1 = Partially faithful: somewhat related but incomplete, vague, or misses the main issue/praise.
- 0 = Incorrect/off-topic: does not match the content or invents something not in the review.

Output ONLY valid JSON:
{
  "faithfulness": 0|1|2,
  "justification": "<one concise sentence explaining score>"
}
"""

In [61]:
def extract_key_points(review_text: str):
    key_point_extractor_messages = [
        SystemMessage(content=COMPLAINT_COMPLIMENT_EXTRATOR_PROMPT),
        UserMessage(content=f"""Review:
\"\"\"{review_text}\"\"\"""")
    ]
    key_point_extractor_response = call_llm(messages=key_point_extractor_messages, response_format="json_object")

    judge_messages = [
        SystemMessage(content=COMPLAINT_COMPLIMENT_JUDGE_PROMPT),
        UserMessage(content=f"""Review:
\"\"\"{review_text}\"\"\"\n
Key Point:
\"\"\"{key_point_extractor_response}\"\"\"""")
    ]
    judge_response = call_llm(messages=judge_messages, response_format="json_object")
    judge_response = json.loads(judge_response)
    key_points = json.loads(key_point_extractor_response)
    return {
        "complaint": key_points.get("complaint", ""),
        "compliment": key_points.get("compliment", ""),
        "faithfulness": judge_response.get("faithfulness", 0),
        "key_point_justification": judge_response.get("justification", "")
    }
    

In [98]:
STAR_PROMPT = """You are a review expert from Yelp reviews.
Your task:
Rate the review using the following rubric.

Rubric for rating:
1 = very negative, would not return
2 = negative with some redeeming detail
3 = mixed/neutral
4 = positive with minor issues
5 = very positive, enthusiastic

Response with only the rating int integer. Do not provide justification
"""

In [99]:
def get_rating(review_text: str):
    rating_messages = [
        SystemMessage(content=STAR_PROMPT),
        UserMessage(content=f"""Review:
\"\"\"{review_text}\"\"\"""")
    ]
    rating_response = call_llm(messages=rating_messages)
    if rating_response == DEFAULT_ERROR_MESSAGE:
        return { "rating": 0 }
    return { "rating": rating_response }
    

In [100]:
BUSINESS_RESPONSE_PROMPT = """You are you an employee of the business whose reviews have been posted on yelp.
You Task:
Revert back to the customer/user with a polite response addressing the complaint or compliment.

Responsd with only the response. Do not add/provide any justification.
"""

BUSINESS_RESPONSE_JUDGE_PROMPT = """You are an evaluator checking whether a business reply to a Yelp review is appropriate.
Your Task:
Evaluate the quality of the reply on two dimensions. Justification is a must for your response.

Scoring (each 0-2):
- "tone":
   2 = Polite, empathetic, brand-safe
   1 = Neutral but acceptable
   0 = Rude, dismissive, or inappropriate
- "actionability":
   2 = Provides a clear next step, remedy, or thanks
   1 = Minimal acknowledgment but little concrete action
   0 = No action or irrelevant response

Output ONLY valid JSON:
{
  "tone": 0|1|2,
  "actionability": 0|1|2,
  "justification": "<one concise sentence explaining scores>"
}
"""

In [101]:
def provide_business_response(review_text: str):
    business_response_messages = [
        SystemMessage(content=BUSINESS_RESPONSE_PROMPT),
        UserMessage(content=f"""Review:
\"\"\"{review_text}\"\"\"""")
    ]
    business_response = call_llm(messages=business_response_messages)

    judge_messages = [
        SystemMessage(content=BUSINESS_RESPONSE_JUDGE_PROMPT),
        UserMessage(content=f"""Review:
\"\"\"{review_text}\"\"\"\n
Business Reply:
\"\"\"{business_response}\"\"\"""")
    ]
    judge_response = call_llm(messages=judge_messages, response_format="json_object")
    judge_response = json.loads(judge_response)

    return {
        "business_response": business_response,
        "tone": judge_response.get("tone", 0),
        "actionability": judge_response.get("actionability", 0),
        "business_response_justification": judge_response.get("justification", "")
    }
    

In [102]:
def process_row(row):
    """Runs one record end-to-end; returns dict. Designed for ThreadPoolExecutor."""
    start_time = time.time()
    row_id = row["row_id"]
    review = row["text"]

    rating_response = get_rating(review)
    key_points = extract_key_points(review)
    business_response = provide_business_response(review)

    end_time = time.time()
    return {
        "row_id": row_id,
        **rating_response,
        **business_response,
        **key_points,
        "latency_sec": round(end_time - start_time, 3),
    }

In [82]:
MAX_WORKERS = 16

all_response = []
rows = list(df.iter_rows(named=True))  # materialize so we can index safely

with cf.ThreadPoolExecutor(max_workers=MAX_WORKERS) as ex:
    # Submit all tasks at once
    futures = [ex.submit(process_row, row) for row in rows]

    # Iterate as they complete, but keep results in original order afterwards
    results_by_index = {}
    for i, fut in enumerate(tqdm(cf.as_completed(futures), total=len(futures), desc="Processing")):
        try:
            res = fut.result()
        except Exception as e:
            # Ensure we don't crash the whole run; record the error
            res = {"row_id": rows[i]["row_id"], "error": str(e)}
        results_by_index[i] = res

# Rebuild in original order
all_response = [results_by_index[i] for i in range(len(rows))]

Processing:  23%|██▎       | 1155/5000 [06:01<4:55:11,  4.61s/it]

('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))


Processing:  34%|███▎      | 1683/5000 [08:45<11:38,  4.75it/s]  

('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))


Processing: 100%|██████████| 5000/5000 [30:19<00:00,  2.75it/s]  


In [86]:
out_df = pl.DataFrame(all_response, strict=False)

In [87]:
out_df.head()

row_id,rating,business_response,tone,actionability,business_response_justification,complaint,compliment,faithfulness,key_point_justification,latency_sec
i64,str,str,i64,i64,str,list[str],list[str],i64,str,f64
14,"""5""","""""Dear valued guest, we're thri…",2,2,"""The business reply is polite, …",[],"[""beautiful"", ""beautiful canals"", … ""vanity area made for comfort and convenience""]",2,"""The key point accurately and s…",3.372
9,"""4""","""""Dear valued customer, we're t…",2,2,"""The business reply is polite, …","[""atrocious lines and checkout"", ""unorganized and picked through shelves and racks""]","[""ten times better than before"", ""reorganized and clean checkout lines"", … ""aesthetically pleasing displays""]",2,"""The key point accurately captu…",3.78
6,"""4""","""""Dear valued customer, thank y…",2,2,"""The business reply is polite, …","[""Atmosphere was very noisy""]","[""hamburgers were very good"", ""Service was very good""]",2,"""The key point accurately captu…",3.787
13,"""5""","""""Dear valued guest, We're th…",2,2,"""The business reply is polite, …",[],"[""good for groups"", ""friendly waitress"", … ""Foodie Fight""]",2,"""The key point accurately and s…",3.782
5,"""4""","""""Dear valued customer, thank y…",2,2,"""The business reply is polite, …","[""consistancy in the sauces is not head on""]","[""good selection for us vegitarians"", ""deliver on time"", ""very courteous""]",1,"""The key point partially captur…",3.906


In [85]:
all_df = df.join(out_df, on="row_id", how="left")
all_df.head()

row_id,label,text,rating,business_response,tone,actionability,business_response_justification,complaint,compliment,faithfulness,key_point_justification,latency_sec
u32,i64,str,str,str,i64,i64,str,list[str],list[str],i64,str,f64
0,4,"""The name is as good as the foo…","""4""","""""Thank you so much for your ki…",2,2,"""The business reply is polite, …",[],"[""The name is as good as the food"", ""worth a stop"", … ""a mom and pop Asian food place with all or your favorite taste liker teriyaki and orange chicken""]",0,"""The key point includes multipl…",4.136
1,4,"""I travel to Las Vegas monthly …","""5""","""""Dear valued customer, we're t…",2,2,"""The business reply is polite, …",[],"[""Food was great"", ""Was not sure what to expect ... Very interesting"", … ""The best places to eat are away from the strip and this is one of those places""]",2,"""The KEY_POINT accurately captu…",3.911
2,4,"""My mate and I just checked out…","""4""","""""Dear valued customer, Thank…",2,2,"""The business reply is polite, …","[""difficulty finding the location"", ""poor signage"", … ""overpriced Cinnamon Roasted Almonds ($9.00)""]","[""great food"", ""entertainment"", … ""colorful amusement rides""]",2,"""The key point accurately captu…",5.404
3,4,"""The food is excellent. I had t…","""4""","""""Dear valued customer, Thank…",2,2,"""The business reply is polite, …","[""wait for table can be longer at peak hours"", ""wait for refills"", … ""biscuit could have used some butter or jelly""]","[""food is excellent"", ""chicken and waffles were huge"", … ""server brought drinks right away""]",1,"""The key point partially captur…",5.2
4,4,"""these guys have really stepped…","""4""","""""Thank you so much for taking …",2,2,"""The business reply is polite, …","[""subpar"", ""disgusting"", … ""disgusting $ilver mine $ubs""]","[""really stepped up their game a notch"", ""great"", … ""good job fella's""]",1,"""The key point partially captur…",4.192


In [None]:
null_removed_df = all_df.drop_nulls()

(4995, 13)

In [103]:
null_removed_df = null_removed_df.with_columns(
    null_removed_df["rating"].cast(pl.Int64).alias("rating"),
)
null_removed_df.head()

row_id,label,text,rating,business_response,tone,actionability,business_response_justification,complaint,compliment,faithfulness,key_point_justification,latency_sec
u32,i64,str,i64,str,i64,i64,str,list[str],list[str],i64,str,f64
0,4,"""The name is as good as the foo…",4,"""""Thank you so much for your ki…",2,2,"""The business reply is polite, …",[],"[""The name is as good as the food"", ""worth a stop"", … ""a mom and pop Asian food place with all or your favorite taste liker teriyaki and orange chicken""]",0,"""The key point includes multipl…",4.136
1,4,"""I travel to Las Vegas monthly …",5,"""""Dear valued customer, we're t…",2,2,"""The business reply is polite, …",[],"[""Food was great"", ""Was not sure what to expect ... Very interesting"", … ""The best places to eat are away from the strip and this is one of those places""]",2,"""The KEY_POINT accurately captu…",3.911
2,4,"""My mate and I just checked out…",4,"""""Dear valued customer, Thank…",2,2,"""The business reply is polite, …","[""difficulty finding the location"", ""poor signage"", … ""overpriced Cinnamon Roasted Almonds ($9.00)""]","[""great food"", ""entertainment"", … ""colorful amusement rides""]",2,"""The key point accurately captu…",5.404
3,4,"""The food is excellent. I had t…",4,"""""Dear valued customer, Thank…",2,2,"""The business reply is polite, …","[""wait for table can be longer at peak hours"", ""wait for refills"", … ""biscuit could have used some butter or jelly""]","[""food is excellent"", ""chicken and waffles were huge"", … ""server brought drinks right away""]",1,"""The key point partially captur…",5.2
4,4,"""these guys have really stepped…",4,"""""Thank you so much for taking …",2,2,"""The business reply is polite, …","[""subpar"", ""disgusting"", … ""disgusting $ilver mine $ubs""]","[""really stepped up their game a notch"", ""great"", … ""good job fella's""]",1,"""The key point partially captur…",4.192
