In [1]:
import json
import numpy as np
import pandas as pd
import traceback
from dotenv import load_dotenv
from openai import OpenAI
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

load_dotenv()
client = OpenAI()

In [2]:
df = pd.read_csv("../data/Britannia.csv")
negative_reviews = df.loc[df["Negative_Review"] != "No Negative", "Negative_Review"]
positive_reviews = df.loc[df["Positive_Review"] != "No Positive", "Positive_Review"]
review_df = pd.DataFrame(
    {
        "Review": pd.concat([negative_reviews, positive_reviews], ignore_index=True),
        "Sentiment": ["Negative"] * len(negative_reviews)
        + ["Positive"] * len(positive_reviews),
    }
)
review_df.describe()

Unnamed: 0,Review,Sentiment
count,8361,8361
unique,7689,2
top,Location,Negative
freq,151,4262


In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    review_df["Review"],
    review_df["Sentiment"],
    test_size=0.2,
    random_state=42,
    stratify=review_df["Sentiment"],
)
print(X_train.shape, X_test.shape)

(6688,) (1673,)


In [4]:
def generate_batches(lst, batch_size):
    for i in range(0, len(lst), batch_size):
        yield lst[i : i + batch_size]

In [None]:
!mkdir -p ../model/llm/output

In [None]:
template = """Add a "label" field to each object in the JSON below, based on the sentiment of the "text" field. The "label" must be either "Positive" or "Negative" and should exclude any neutral classification. For ambiguous or mixed sentiments, assign the label that reflects the overall tone. Return the JSON formatted as pretty-printed text.

### Desired JSON structure:
{{
    "output": [
        {{
            "id": <integer>,           // Unique identifier for the object
            "text": <string>,          // A sentence or phrase to analyze
            "label": <"Positive"|"Negative"> // Sentiment of the text, must always be either "Positive" or "Negative", never be "Neural".
        }},
        ...
    ]
}}

### Input JSON:
{input_json}
"""

for batch_id, batch in enumerate(generate_batches(X_test.values, batch_size=50)):
    try:
        input_json = [{"id": i, "text": text} for i, text in enumerate(batch, start=1)]
        input_json = {"input": input_json}
        messages = [
            {
                "role": "user",
                "content": template.format(input_json=json.dumps(input_json, indent=4)),
            },
        ]

        chat_completion = client.chat.completions.create(
            messages=messages,
            model="gpt-4o-mini",
            temperature=0,
            response_format={"type": "json_object"},
        )
        response = chat_completion.choices[0].message.content
        response = json.loads(response)
        with open(f"../model/llm/output/{batch_id}.json", "w") as f:
            json.dump(response, f, indent=4)
    except Exception as e:
        error_message = traceback.format_exc()
        print("Caught an exception:", error_message)

In [5]:
y_pred = []
for batch_id, batch in enumerate(generate_batches(X_test.values, batch_size=50)):
    with open(f"../model/llm/output/{batch_id}.json", "r") as f:
        output = json.load(f)
    if "Neutral" in [result["label"] for result in output["output"]]:
        print(batch_id)
    y_pred.extend([result["label"] for result in output["output"]])

In [6]:
print(classification_report(y_test, y_pred, digits=4))

              precision    recall  f1-score   support

    Negative     0.7992    0.9660    0.8747       853
    Positive     0.9548    0.7476    0.8386       820

    accuracy                         0.8589      1673
   macro avg     0.8770    0.8568    0.8567      1673
weighted avg     0.8755    0.8589    0.8570      1673



In [7]:
wrong_idx = np.where(y_pred != y_test)[0]

wrong_pred_df = pd.DataFrame(X_test.iloc[wrong_idx])
wrong_pred_df["Predicted_Sentiment"] = np.array(y_pred)[wrong_idx]
wrong_pred_df["True_Sentiment"] = y_test.values[wrong_idx]

wrong_pred_df.to_csv("../data/wrong_pred_llm.csv", index=False)
wrong_pred_df

Unnamed: 0,Review,Predicted_Sentiment,True_Sentiment
6966,Coffee,Negative,Positive
5520,Hotel position location of the tube overall look,Negative,Positive
7518,not water,Negative,Positive
5376,The international hotel has very internationa...,Negative,Positive
2205,Complimentary Water in room Free wifi in room,Positive,Negative
...,...,...,...
5369,The foyer spa size of room,Negative,Positive
7720,They are not flexible,Negative,Positive
3067,Swimming pool,Positive,Negative
4951,Not much to be honest Location good for what ...,Negative,Positive
