In [1]:
# Run the jupyter notebook up down. Make sure all of the imported libraries in the first cell
# have been installed
# Note: Some code has been commented out if it became unused or was for debugging.
# Some code can be uncommented (like the model loading code, for more details, read the comments in project)

In [35]:
import pandas as pd
import numpy as np
from nltk.tokenize import word_tokenize, sent_tokenize
import random
import re
from sklearn.model_selection import train_test_split
from sklearn import metrics
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
import tensorflow_addons as tfa
from tensorflow.keras.utils import to_categorical
from keras.models import load_model
from official.nlp import optimization

In [3]:
# Load dataset
df = pd.read_csv('Reviews.csv') 

In [4]:
scores = df["Score"].tolist()
text = df["Text"].tolist()

def shuffle_two_lists(a, b):
    '''
    Shuffle two lists such that a[i] and b[i] are still paired together
    a: list a
    b: list b
    '''
    c = list(zip(a, b))
    random.shuffle(c)
    a, b = zip(*c)
    return list(a), list(b)
    
# reference: https://stackoverflow.com/questions/9662346/python-code-to-remove-html-tags-from-a-string
def remove_html_tags(string):
    html_regex = re.compile('<.*?>')
    cleantext = re.sub(html_regex, '', string)
    return cleantext


In [5]:
# Perform basic preprocessing of data that is needed for both vader and Bert evaluations.
# Note: Since Vader sentiment takes in sentences and provides a sentiment score, 
#       little text preprocessing will actually be common between Bert and vader.
text = list(map(lambda sentence: remove_html_tags(sentence), text))

In [6]:
# Split data into training and testing sets.
# Note, we didn't need to train the vader model because it is a rule based model. It does not change based
# on the text.
random.seed(42)
scores, text = shuffle_two_lists(scores, text)
X_train, X_test, y_train, y_test= train_test_split(text, scores, test_size=0.20, random_state=42)

In [7]:
# Take every sentence in X_test and evaluate it on vader sentiment.
vader = SentimentIntensityAnalyzer()

def vader_prediction(model, sentence):
    '''
    Return a score between 1 - 5 based on the vader model's sentiment analysis classification
    model: a vader sentiment intensity analyzer
    sentence: a string that represents a review
    '''
    sentiment_dict = model.polarity_scores(sentence)
    score = sentiment_dict['compound']
    if score <= 0.2:
        return 1
    if score <= 0.4:
        return 2
    if score <= 0.6:
        return 3
    if score <= 0.8:
        return 4
    if score <= 1.0:
        return 5

In [8]:
vader_predictions = list(map(lambda sentence: vader_prediction(vader, sentence), X_test))

In [9]:
# Print calculated recall, precision, and F-1 metrics for the vader model
print(metrics.classification_report(y_test, vader_predictions, digits=5))

              precision    recall  f1-score   support

           1    0.36620   0.54521   0.43813     10572
           2    0.09695   0.07716   0.08593      5923
           3    0.10158   0.09667   0.09907      8555
           4    0.15806   0.17230   0.16488     16024
           5    0.75225   0.70057   0.72549     72617

    accuracy                        0.53374    113691
   macro avg    0.29501   0.31838   0.30270    113691
weighted avg    0.54950   0.53374   0.53930    113691



In [10]:
# Now onto BERT. Most of the following code was modified from:
# https://www.tensorflow.org/text/tutorials/classify_text_with_bert#define_your_model
tfhub_handle_encoder = "https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-128_A-2/1"
tfhub_handle_preprocess = "https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3"

num_classes = 5

def build_classifier_model(num_classes):
    text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
    # handle preprocessingn needed for BERT
    preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')
    encoder_inputs = preprocessing_layer(text_input)
    # Run data through the following BERT encoder
    encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')
    outputs = encoder(encoder_inputs)
    net = outputs['pooled_output']
    # Dropout layer (not sure if this is necessary, it was in the link above and the textbook mentions
    # dropout layers being useful for NN performance so it was left, might change this later)
    net = tf.keras.layers.Dropout(0.1)(net)
    # Add a dense output layer that uses softmax as its activation
    net = tf.keras.layers.Dense(num_classes, activation=tf.keras.activations.softmax, name='classifier')(net)
    return tf.keras.Model(text_input, net)

# CategoricalCrossEntropy requires one-hot encoding of gold labels so we will need to do that when we create a 
# generator
loss = tf.keras.losses.CategoricalCrossentropy()
# Run weighted F1 score as test metric for optimization
model_metrics = tfa.metrics.F1Score(num_classes=num_classes, average="weighted")

In [11]:
# debug code
# text_test = ['this is such an amazing movie!']
# classifier_model = build_classifier_model(num_classes)
# bert_raw_result = classifier_model(tf.constant(text_test))
# print(bert_raw_result)
# print(len(X_train))

In [12]:
epochs = 5
steps_per_epoch = 2000
num_train_steps = steps_per_epoch * epochs
num_warmup_steps = int(0.01*num_train_steps)
init_lr = 3e-5
optimizer = optimization.create_optimizer(init_lr=init_lr,
                                          num_train_steps=num_train_steps,
                                          num_warmup_steps=num_warmup_steps,
                                          optimizer_type='adamw')

In [13]:
classifier_model = build_classifier_model(num_classes)
classifier_model.compile(optimizer=optimizer,
                         loss=loss,
                         metrics=model_metrics)

In [14]:
def data_generator(X: list, y: list, num_sequences_per_batch: int, num_classes: int) -> (list,list):
    '''
    Returns data generator to be used by feed_forward
    https://wiki.python.org/moin/Generators
    https://realpython.com/introduction-to-python-generators/
    
    '''
    linked_data = list(zip(X,y))
    while True:
        sample = random.sample(linked_data, num_sequences_per_batch)
        sample_X = np.array(list(map(lambda s: s[0], sample)))
        # NOTE: The y-class gets shifted by -1 to use to_categorical, we need to +1 to all predicted values later
        # to get the right class!
        sample_y = np.array(list(map(lambda s: s[1] - 1, sample)))
        yield (np.array(sample_X), to_categorical(sample_y, num_classes=num_classes))


In [15]:
# batch size
num_sequences_per_batch = 128
train_generator = data_generator(X_train, y_train, num_sequences_per_batch, num_classes)
history = classifier_model.fit(x=train_generator,
                               epochs=epochs,
                               steps_per_epoch=steps_per_epoch)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [22]:
# functions for save and loading models
# Uncomment if needed.
# save current model
# tf.keras.models.save_model(classifier_model, 'saved_weights', save_format='tf', overwrite=True, include_optimizer=False)
# load saved model
# NOTE: my submission includes my saved_weights folder which contain the weights I obtained at the end of the training
# session. If this code is uncommented, all references to the classifier model below should be renamed to loaded_model
# or vice-versa
# loaded_model = load_model('saved_weights')
# loaded_model.compile(optimizer=optimizer,
#                          loss=loss,
#                          metrics=metrics)





In [39]:
bert_predictions = classifier_model.predict(X_test)
bert_predictions = np.argmax(bert_predictions, axis=1) + 1

In [40]:
# Print calculated recall, precision, and F-1 metrics for the bert model
# Note, we need to add 1, because we shifted class names by 1 at train time
print(metrics.classification_report(y_test, bert_predictions, digits=5))

              precision    recall  f1-score   support

           1    0.63494   0.71746   0.67368     10572
           2    0.37303   0.16343   0.22728      5923
           3    0.40616   0.33139   0.36498      8555
           4    0.47348   0.24008   0.31861     16024
           5    0.81617   0.94461   0.87571     72617

    accuracy                        0.73735    113691
   macro avg    0.54075   0.47939   0.49205    113691
weighted avg    0.69708   0.73735   0.70619    113691

