In [None]:
### 0. Install requirements ###

!pip install openai pandas nltk scikit-learn

In [None]:
### 1. Data ####

import pandas as pd

!gdown <file_id>  # download the csv file

reviews = pd.read_csv('./reviews.csv')  # load reviews

text = reviews['text'][0]  # get first review

print(text)


In [None]:
### 2. Sentiment analysis with NLTK ###

import nltk
nltk.download('vader_lexicon')
from nltk.sentiment import SentimentIntensityAnalyzer

sia = SentimentIntensityAnalyzer()

def sentiment_analysis_nltk(text):

  score = sia.polarity_scores(text)

  if score['compound'] > 0:
      return 'positive'
  else:
      return 'negative'


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


In [None]:
### 3. Sentiment analysis with Scikit-Learn ###

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

features = reviews['text']
labels = reviews['sentiment']

X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, stratify=labels)

vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)

classifier = SVC()
classifier.fit(X_train, y_train)

def sentiment_analysis_scikit_learn(text):

  _ = vectorizer.transform([text])

  prediction = classifier.predict(_)

  if 'positive' in prediction[0]:
    return 'positive'
  else:
    return 'negative'


In [None]:
### 4. Sentiment analysis with ChatGPT ###

from openai import OpenAI


openai_key = None

client = OpenAI(
    # This is the default and can be omitted
    api_key=openai_key,
)

def sentiment_analysis_chatgpt(text):

  prompt = ('Classify the sentiment of this review: "{}"'.format(text) +
            'Sentiment is binary, either "positive" or "negative".' +
            'Return only the sentiment.')

  chat_completion = client.chat.completions.create(
      messages=[
          {
              "role": "user",
              "content": prompt,
          }
      ],
      model="gpt-3.5-turbo",
  )

  response_ = chat_completion.choices[0].message.content

  if 'positive' in response_.lower():
    return 'positive'
  else:
    return 'negative'

In [None]:
### 5. Compare performances ###

score_nltk = 0
score_scikit_learn = 0
score_chatgpt = 0

for j in range(len(reviews)):

  text = reviews['text'][j]
  sentiment = reviews['sentiment'][j]

  if sentiment_analysis_nltk(text) in [sentiment]:
    score_nltk += 1

  if sentiment_analysis_scikit_learn(text) in [sentiment]:
    score_scikit_learn += 1

  if sentiment_analysis_chatgpt(text) in [sentiment]:
    score_chatgpt += 1


score_nltk /= len(reviews)
score_scikit_learn /= len(reviews)
score_chatgpt /= len(reviews)

print('score_nltk', score_nltk)
print('score_scikit_learn', score_scikit_learn)
print('score_chatgpt', score_chatgpt)