In [264]:
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.text import Tokenizer, text_to_word_sequence
from tensorflow.keras.preprocessing.sequence import pad_sequences

import bz2
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score

%matplotlib inline

In [265]:
filename_test = 'reviews_test.bz2'

In [266]:
def read_texts_and_labels(file, max_lines):
  texts = []
  labels = []
  num_lines=0
  for line in bz2.BZ2File(file):
    x = line.decode("utf-8")
    label, text = x.split(" ", 1)
    #print(text)
    # Convert positive sentiment labels to 1 and negative sentiment labels to 0
    if label == "__label__1":
        label = 0
    elif label == "__label__2":
        label = 1
    texts.append(text.strip())
    labels.append(label)
    num_lines += 1
    if(num_lines >= max_lines): break
  #texts = np.array(texts)
  labels = np.array(labels)
  print(num_lines)
  return texts, labels

max_lines_test = 50
test_texts, test_labels = read_texts_and_labels(filename_test, max_lines_test)

50


In [267]:
train_texts = []
with open("train_texts.txt", "r") as f:
  for line in f:
    train_texts.append(line.strip())

In [268]:
num_of_words = 3000 # you may experiment with different numbers
tokenizer = Tokenizer(num_words=num_of_words)
tokenizer.fit_on_texts(train_texts)
test_sequences = tokenizer.texts_to_sequences(test_texts)
max_length_of_sequences = 235 # Obtained while model building

In [269]:
test_sequences = pad_sequences(test_sequences, maxlen=max_length_of_sequences, padding='pre')

In [270]:
model_NN = tf.keras.models.load_model('Model_NN_100000.h5')
model_GRU = tf.keras.models.load_model('Model_GRU_100000.h5')

In [271]:
from transformers import GPT2Tokenizer, GPT2ForSequenceClassification

# Load the pre-trained model
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2ForSequenceClassification.from_pretrained("gpt2")




Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [272]:
def predict_sentiment(text):
    #print("inside")
    # Encode the text
    encoded_text = tokenizer.encode(text, return_tensors="pt")
    # Predict the sentiment
    sentiment = model(encoded_text)[0]
    # Decode the sentiment
    #print(sentiment.argmax().item())
    return sentiment.argmax().item()

def gpt2_predict(test_texts):
# Test the model
    #print("here")
    labels = []
    for text in test_texts:
        #print(text)
        
        sentiment = predict_sentiment(text)
        labels.append(sentiment)
        #sentiment.append(predict_sentiment(text))
        #print("Sentiment: ",sentiment) # 1 for positive, 0 for negative
    return labels

In [273]:
from transformers import pipeline

def gpt3_predict(test_texts):
    score = pipeline("sentiment-analysis")
    labels = []
    for text in test_texts:
        if score(text)[0]['label'] == "POSITIVE":
            labels.append(1)
        else:
            labels.append(0)
    return labels

In [274]:
import math
import openai
import time

openai.api_key = '### USE YOUR OWN OPENAI API KEY'

gpt3p5_labels = []
def analyze_gpt35(text):
    messages = [
        {"role": "system", "content": """You are trained to analyze and detect the sentiment of given text. 
                                        If you're unsure of an answer, you can say "not sure" and recommend users to review manually."""},
        {"role": "user", "content": f"""Analyze the following product review and determine if the sentiment is: positive or negative. 
                                        Return answer in single word as either positive or negative: {text}"""}
        ]
   
    response = openai.ChatCompletion.create(
                      model="gpt-3.5-turbo",
                      messages=messages, 
                      max_tokens=1, 
                      n=1, 
                      stop=None, 
                      temperature=0)

    response_text = response.choices[0].message.content.strip().lower()

    return response_text

def gpt3p5_predict(test_texts):
    for text in test_texts:
        out = analyze_gpt35(text)
        print(out)
        if out == "positive":
            gpt3p5_labels.append(1)
        else:
            gpt3p5_labels.append(0)
        time.sleep(20)

In [275]:
# Implement code here
test_pred_ffn = model_NN.predict(test_sequences)
test_pred_gru = model_GRU.predict(test_sequences)
test_pred_gpt2 = np.array(gpt2_predict(test_texts))
test_pred_gpt3 = np.array(gpt3_predict(test_texts))

gpt3p5_predict(test_texts)
test_pred_gpt3p5 = np.array(gpt3p5_labels)
#print(test_pred_gpt2)

test_pred_labels_ffn = np.round(test_pred_ffn)
test_pred_labels_gru = np.round(test_pred_gru)
test_pred_labels_gpt2 = test_pred_gpt2.reshape(-1,1)
test_pred_labels_gpt3 = test_pred_gpt3.reshape(-1,1)
test_pred_labels_gpt3p5 = test_pred_gpt3p5.reshape(-1,1)

#print(test_pred_labels_ffn)
#print(test_pred_labels_gru)
#print(test_pred_labels_gpt2)
print(test_pred_labels_gpt3p5)




No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


positive
positive
negative
negative
positive
negative
negative
negative
positive
negative
positive
negative
negative
positive
negative
negative
positive
positive
positive
positive
negative
negative
positive
positive
negative
negative
positive
negative
positive
negative
positive
positive
positive
positive
positive
negative
positive
negative
positive
negative
positive
negative
positive
positive
positive
positive
negative
negative
negative
positive
[[1]
 [1]
 [0]
 [0]
 [1]
 [0]
 [0]
 [0]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [1]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]
 [0]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [0]
 [1]
 [0]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [0]
 [0]
 [0]
 [1]]


In [276]:
from sklearn.metrics import accuracy_score, f1_score

acc_ffn = accuracy_score(test_labels, test_pred_labels_ffn)
f1_ffn = f1_score(test_labels, test_pred_labels_ffn)

acc_gru = accuracy_score(test_labels, test_pred_labels_gru)
f1_gru = f1_score(test_labels, test_pred_labels_gru)

acc_gpt2 = accuracy_score(test_labels, test_pred_labels_gpt2)
f1_gpt2 = f1_score(test_labels, test_pred_labels_gpt2)

acc_gpt3 = accuracy_score(test_labels, test_pred_labels_gpt3)
f1_gpt3 = f1_score(test_labels, test_pred_labels_gpt3)

acc_gpt3p5 = accuracy_score(test_labels, test_pred_labels_gpt3p5)
f1_gpt3p5 = f1_score(test_labels, test_pred_labels_gpt3p5)

print("FFN model accuracy: {:.3f}".format(acc_ffn))
print("FFN model F1 score: {:.3f}".format(f1_ffn))

print("GRU model accuracy: {:.3f}".format(acc_gru))
print("GRU model F1 score: {:.3f}".format(f1_gru))

print("GPT2 model accuracy: {:.3f}".format(acc_gpt2))
print("GPT2 model F1 score: {:.3f}".format(f1_gpt2))

print("GPT3 model accuracy: {:.3f}".format(acc_gpt3))
print("GPT3 model F1 score: {:.3f}".format(f1_gpt3))

print("GPT3.5 model accuracy: {:.3f}".format(acc_gpt3p5))
print("GPT3.5 model F1 score: {:.3f}".format(f1_gpt3p5))

FFN model accuracy: 0.880
FFN model F1 score: 0.889
GRU model accuracy: 0.900
GRU model F1 score: 0.915
GPT2 model accuracy: 0.440
GPT2 model F1 score: 0.000
GPT3 model accuracy: 0.860
GPT3 model F1 score: 0.877
GPT3.5 model accuracy: 0.980
GPT3.5 model F1 score: 0.982
