# Sentiment Analysis with BERT
We attempt to carry out sentiment analysis of labelled data via BERT (Bidirectional Encoder Representations from Transformers)

In [1]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.24.0-py3-none-any.whl (5.5 MB)
[K     |████████████████████████████████| 5.5 MB 14.9 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 49.4 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 66.4 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.24.0


In [2]:
from transformers import AutoTokenizer, TFBertForSequenceClassification
from transformers import InputExample, InputFeatures
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

model = TFBertForSequenceClassification.from_pretrained("bert-base-uncased", from_pt=True)

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/440M [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
model.summary()

Model: "tf_bert_for_sequence_classification"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bert (TFBertMainLayer)      multiple                  109482240 
                                                                 
 dropout_37 (Dropout)        multiple                  0         
                                                                 
 classifier (Dense)          multiple                  1538      
                                                                 
Total params: 109,483,778
Trainable params: 109,483,778
Non-trainable params: 0
_________________________________________________________________


In [4]:
import tensorflow as tf
import pandas as pd

In [6]:
df = pd.read_csv('stock_data.csv')

In [7]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(df, test_size=0.2, random_state=111)
train, val = train_test_split(train, test_size=0.25, random_state=111)

def change_sent_labels():
  train['Sentiment'] = train.apply(lambda x: 0 if x[1] == -1 else x[1], axis=1)
  val['Sentiment'] = val.apply(lambda x: 0 if x[1] == -1 else x[1], axis=1)
change_sent_labels()

In [8]:
def convert_dataframes_to_examples():
  train_examples = train.apply(lambda x: InputExample(guid=None,
                                                    text_a = x['Text'],
                                                    text_b = None,
                                                    label = x['Sentiment']), axis=1)
  val_examples = val.apply(lambda x: InputExample(guid=None,
                                                    text_a = x['Text'],
                                                    text_b = None,
                                                    label = x['Sentiment']), axis=1)
  test_examples = test.apply(lambda x: InputExample(guid=None,
                                                    text_a = x['Text'],
                                                    text_b = None,
                                                    label = x['Sentiment']), axis=1)
  return [train_examples, val_examples, test_examples]

In [9]:
def convert_examples_to_tf_dataset(examples, max_length=128):
    features = [] # -> will hold InputFeatures to be converted later

    for e in examples:
        input_dict = tokenizer.encode_plus(
            e.text_a,
            add_special_tokens=True,
            max_length=max_length, 
            return_token_type_ids=True,
            return_attention_mask=True,
            pad_to_max_length=True, 
            truncation=True
        )

        input_ids, token_type_ids, attention_mask = (input_dict["input_ids"],
            input_dict["token_type_ids"], input_dict['attention_mask'])

        features.append(
            InputFeatures(
                input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, label=e.label
            )
        )

    def gen():
        for f in features:
            yield (
                {
                    "input_ids": f.input_ids,
                    "attention_mask": f.attention_mask,
                    "token_type_ids": f.token_type_ids,
                },
                f.label,
            )

    return tf.data.Dataset.from_generator(
        gen,
        ({"input_ids": tf.int32, "attention_mask": tf.int32, "token_type_ids": tf.int32}, tf.int64),
        (
            {
                "input_ids": tf.TensorShape([None]),
                "attention_mask": tf.TensorShape([None]),
                "token_type_ids": tf.TensorShape([None]),
            },
            tf.TensorShape([]),
        ),
    )

In [10]:
train_examples, val_examples, test_examples = convert_dataframes_to_examples()
train_data = convert_examples_to_tf_dataset(list(train_examples))
train_data = train_data.shuffle(100).batch(32).repeat(2)

val_data = convert_examples_to_tf_dataset(list(val_examples))
val_data = val_data.batch(32)



In [11]:
tf.random.set_seed(111)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalAccuracy('accuracy')])
model.fit(train_data, epochs=2, validation_data=val_data)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7effc68b77d0>

In [12]:
text_vals = test['Text'].values.tolist()
test_batch = tokenizer(text_vals, max_length=128, padding=True, truncation=True, return_tensors='tf')
test_outputs = model(test_batch)
test_predictions = tf.nn.softmax(test_outputs[0], axis=-1)
labels = [-1, 1]
label = tf.argmax(test_predictions, axis=1)
label = label.numpy()
test.insert(2, 'BERT Prediction', list(map(lambda x: -1 if x == 0 else 1, label)))

# Sentiment Analysis with Rule Based VADER 

In [13]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')

vader = SentimentIntensityAnalyzer()
test['VADER Prediction'] = test['Text'].apply \
  (lambda x: 1 if vader.polarity_scores(x)['compound'] >= 0 else -1)

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


## Comparison of Statistics for VADER and BERT on Test Set

In [14]:
def create_confusion_matrix(x, col):
  if x[1] == 1 and x[col] == 1:
    return 'TP'
  elif x[1] == -1 and x[col] == 1:
    return 'FP'
  elif x[1] == 1 and x[col] == -1:
    return 'FN'
  elif x[1] == -1 and x[col] == -1:
    return 'TN'
  else:
    return 0
def get_statistics(conf_vals, label):
  print(label)
  print(conf_vals)
  accuracy = (conf_vals['TP'] + conf_vals['TN']) / (conf_vals['TP'] + conf_vals['TN'] + conf_vals['FP'] + conf_vals['FN'])
  precision = conf_vals['TP'] / (conf_vals['TP'] + conf_vals['FP'])
  recall = conf_vals['TP'] / (conf_vals['TP'] + conf_vals['FN'])
  f1_score = 2*precision*recall / (precision + recall)
  print('Accuracy: ', round(100 * accuracy, 2),'%',
        '\nPrecision: ', round(100 * precision, 2),'%',
        '\nRecall: ', round(100 * recall, 2),'%',
        '\nF1 Score: ', round(100 * f1_score, 2),'%')

In [15]:
test['VADER Accuracy'] = test.apply(lambda x: 1 if x[1] == x[2] else 0, axis=1)
test['VADER Confusion Matrix'] = test.apply(lambda x: create_confusion_matrix(x, 'VADER Prediction'), axis=1)
vader_conf_vals = test['VADER Confusion Matrix'].value_counts().to_dict()
get_statistics(vader_conf_vals, 'VADER')

test['BERT Accuracy'] = test.apply(lambda x: 1 if x[1] == x[2] else 0, axis=1)
test['BERT Confusion Matrix'] = test.apply(lambda x: create_confusion_matrix(x, 'BERT Prediction'), axis=1)
bert_conf_vals = test['BERT Confusion Matrix'].value_counts().to_dict()
get_statistics(bert_conf_vals, 'BERT')

VADER
{'TP': 627, 'FP': 251, 'TN': 160, 'FN': 121}
Accuracy:  67.9 % 
Precision:  71.41 % 
Recall:  83.82 % 
F1 Score:  77.12 %
BERT
{'TP': 652, 'TN': 299, 'FP': 112, 'FN': 96}
Accuracy:  82.05 % 
Precision:  85.34 % 
Recall:  87.17 % 
F1 Score:  86.24 %
