In [15]:
import os

import pandas as pd
import numpy as np

from sklearn.metrics import classification_report, accuracy_score

from openai import OpenAI
from dotenv import load_dotenv

In [16]:
load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

DATASET_PATH = "reviews.csv"

TEST_SAMPLE_SIZE = 20

MODEL = "gpt-3.5-turbo"

SYS_PROMPT = """
You will be provided a movie review.
Your goal is to classify it as either "positive" or "negative".
Use exactly one word in your answer.
"""
USER_PROMPT = "Review: {}"

In [17]:
def translate_model_response(model_response: str) -> str:
    model_response = model_response.lower()
    
    if "positive" in model_response:
        return "positive"
    
    if "negative" in model_response:
        return "negative"
    
    return None

def classify_review(client: OpenAI, review: str) -> str:
    response = client.chat.completions.create(
        messages=[
            {"role": "system", "content": SYS_PROMPT.strip().replace("\n", " ")},
            {"role": "user", "content": USER_PROMPT.format(review)},
        ],
        model=MODEL,
    )
    
    return response.choices[0].message.content


In [18]:
openai_client = OpenAI(api_key=OPENAI_API_KEY)

In [19]:
dataset = pd.read_csv(DATASET_PATH, nrows=TEST_SAMPLE_SIZE)
dataset

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive
5,"Probably my all-time favorite movie, a story o...",positive
6,I sure would like to see a resurrection of a u...,positive
7,"This show was an amazing, fresh & innovative i...",negative
8,Encouraged by the positive comments about this...,negative
9,If you like original gut wrenching laughter yo...,positive


In [20]:
raw_predictions = np.array([classify_review(openai_client, review) for review in dataset["review"]])
raw_predictions

array(['Positive', 'Positive', 'positive', 'negative', 'Positive',
       'Positive', 'Positive', 'negative', 'negative', 'Positive',
       'Negative', 'Negative.', 'Negative', 'negative', 'Positive',
       'Negative', 'negative', 'negative', 'negative', 'Negative'],
      dtype='<U9')

In [21]:
predictions = np.array(list(map(translate_model_response, raw_predictions)))  
predictions

array(['positive', 'positive', 'positive', 'negative', 'positive',
       'positive', 'positive', 'negative', 'negative', 'positive',
       'negative', 'negative', 'negative', 'negative', 'positive',
       'negative', 'negative', 'negative', 'negative', 'negative'],
      dtype='<U8')

In [22]:
accuracy = accuracy_score(dataset["sentiment"], predictions)
accuracy

0.9

In [23]:
report = classification_report(dataset["sentiment"], predictions)
print(report)

              precision    recall  f1-score   support

    negative       0.83      1.00      0.91        10
    positive       1.00      0.80      0.89        10

    accuracy                           0.90        20
   macro avg       0.92      0.90      0.90        20
weighted avg       0.92      0.90      0.90        20
