In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
import polars as pl

from azure.ai.inference import ChatCompletionsClient
from azure.ai.inference.models import SystemMessage, UserMessage
from azure.core.credentials import AzureKeyCredential

In [3]:
splits = {'train': 'yelp_review_full/train-00000-of-00001.parquet', 'test': 'yelp_review_full/test-00000-of-00001.parquet'}
train_df = pl.read_parquet('hf://datasets/Yelp/yelp_review_full/' + splits['train'])
test_df = pl.read_parquet('hf://datasets/Yelp/yelp_review_full/' + splits['test'])

In [4]:
train_df.write_parquet("../data/train_yelp_review.parquet")
test_df.write_parquet("../data/test_yelp_review.parquet")

In [5]:
train_df.head()

label,text
i64,str
4,"""dr. goldberg offers everything…"
1,"""Unfortunately, the frustration…"
3,"""Been going to Dr. Goldberg for…"
3,"""Got a letter in the mail last …"
0,"""I don't know what Dr. Goldberg…"


In [6]:
# For Serverless API or Managed Compute endpoints
from os import getenv


client = ChatCompletionsClient(
    endpoint="https://portfolio-resource.services.ai.azure.com/models",
    credential=AzureKeyCredential(getenv("AZURE_AI_INFERENCE_KEY")),
    api_version="2024-05-01-preview"
)
model_name = "mistral-small-2503"

In [7]:
SYSTEM_PROMPT = """You are a precise sentiment rating assistant.
Given a single review, decide the star rating from 1-5 (1=very negative, 3=neutral/mixed, 5=very positive).
Return ONLY a strict JSON object with keys 'stars' (an integer 1-5) and 'explanation' (a brief string).
Do not include any extra keys or text outside JSON.
Return strict JSON: {{\"stars\": <int>, \"explanation\": \"<short reason>\"}}.
"""

ZERO_SHOT_USER_TMPL = """Classify the following review:
\"\"\"{review_text}\"\"\""""

In [8]:
first_review = train_df[0]
first_review

label,text
i64,str
4,"""dr. goldberg offers everything…"


In [9]:
first_review['text'].item()

"dr. goldberg offers everything i look for in a general practitioner.  he's nice and easy to talk to without being patronizing; he's always on time in seeing his patients; he's affiliated with a top-notch hospital (nyu) which my parents have explained to me is very important in case something happens and you need surgery; and you can get referrals to see specialists without having to see him first.  really, what more do you need?  i'm sitting here trying to think of any complaints i have about him, but i'm really drawing a blank."

In [10]:
response = client.complete(
    messages=[
        SystemMessage(content=SYSTEM_PROMPT),
        UserMessage(content=ZERO_SHOT_USER_TMPL.format_map({'review_text': first_review['text'].item()})),
    ],
    max_tokens=128,
    temperature=0.1,
    top_p=0.95,
    model=model_name,
    response_format="json_object"
)

In [11]:
output = response.choices[0].message.content

In [12]:
import json


json.loads(output)

{'stars': 5,
 'explanation': 'The reviewer praises the doctor extensively, highlighting multiple positive attributes and struggles to find any complaints.'}

## Few Shot

In [13]:
unique_entries = train_df.group_by("label").agg(pl.all().sample(2, with_replacement=False))

In [14]:
few_shot_examples = []
for row in unique_entries.iter_rows():
    for review in row[1]:
        few_shot_examples.append({
            "review_text": review,
            "rating": row[0] + 1
        })

In [15]:
for example in few_shot_examples:
    client.complete(
        messages=[
            SystemMessage(content="""You are an expert sentiment analyst.
    Understand the review provided by the user, the rating provided and provide a single line explanation as to why the review could have been provided.
    Respond with only the explantion and nothing else.
    """),
            UserMessage(content=f"Review: \"\"\"{example['review_text']}\"\"\"\nRating: {example['rating']}"),
        ],
        max_tokens=128,
        temperature=0.1,
        top_p=0.95,
        model=model_name,
        response_format="text"
    )
    explanation = response.choices[0].message.content
    example['explanation'] = explanation

In [16]:
few_shot_examples

[{'review_text': 'Ok, we switched rooms twice at HRH towers. My advice, stay away from the smoking rooms, they are tore back. All of the HRH and front staff at the main desk are awesome! They do care a lot about their guests. Remember approach determines response. Treat them well! We tipped housekeeping everyday. We love it here and our room after the second time is immaculate.  The spa is outstanding. I love it here. Oh but the WIFI sucks.',
  'rating': 4,
  'explanation': '{"stars": 5, "explanation": "The reviewer praises the doctor extensively, highlighting multiple positive attributes and struggles to find any complaints."}'},
 {'review_text': '4.5 if I could....\\n\\nSo its my birthday and my wife hunts down \\"Phoenix\'s Best Sushi\\" and comes up with this place near our home in Chandler.  Now I have friends just to the east of this intersection and I NEVER noticed this place before.  In an empty shopping center (basically) and very nondescript.\\n\\nSo we walk in and I was like

In [17]:
system_prompt = SYSTEM_PROMPT + "\nHere are the examples of reviews and their ratings: {examples}"

In [18]:
system_prompt = system_prompt.format_map({'examples': "\n\n".join([f"- Review: \"\"\"{example['review_text']}\"\"\"\nRating: {example['rating']}\nExplanation: {example['explanation']}" for example in few_shot_examples])})
few_shot_example = ZERO_SHOT_USER_TMPL.format_map({'review_text': first_review['text'].item()})

In [19]:
few_shot_example

'Classify the following review:\n"""dr. goldberg offers everything i look for in a general practitioner.  he\'s nice and easy to talk to without being patronizing; he\'s always on time in seeing his patients; he\'s affiliated with a top-notch hospital (nyu) which my parents have explained to me is very important in case something happens and you need surgery; and you can get referrals to see specialists without having to see him first.  really, what more do you need?  i\'m sitting here trying to think of any complaints i have about him, but i\'m really drawing a blank."""'

In [20]:
response = client.complete(
    messages=[
        SystemMessage(content=system_prompt),
        UserMessage(content=few_shot_example),
    ],
    max_tokens=128,
    temperature=0.1,
    top_p=0.95,
    model=model_name,
    response_format="json_object"
)

In [21]:
output = response.choices[0].message.content
json.loads(output)

{'stars': 5,
 'explanation': 'The reviewer praises the doctor extensively, highlighting multiple positive attributes and struggles to find any complaints.'}