# Sentiment Analysis
* Unsupervised Lexicon-Based Models
    1. Bing Liu's Lexicon
    2. MPQA Subjectivity Lexicon
    3. Pattern Lexicon
    4. TextBlob Lexicon
    5. AFINN Lexicon
    6. SentiWordNet Lexicon
    7. VADER Lexicon
* Classifying Sentiment with Supervised Learning
* Traditional Supervised Machine Learning Models
* Newer Supervised Deep Learning Models
* Advanced Supervised Deep Learning Models
* Analyzing Sentiment Causation
    1. Interpreting Predictive Models
    2. Analyzing Topic Models

## Unsupervised Lexicon-Based Models

In [3]:
import pandas as pd
import numpy as np
import text_normalizer as tn
import model_evaluation_utils as meu

np.set_printoptions(precision=2, linewidth=80)

# import dataset
dataset = pd.read_csv('data/movie_reviews.csv')
reviews = np.array(dataset['review'])
sentiments = np.array(dataset['sentiment'])

# extract data for model evaluation
test_reviews = reviews[35000:]
test_sentiments = sentiments[35000:]
sample_review_ids = [7626, 3533, 13010]

In [2]:
# TextBlob Lexicon
import textblob

for review, sentiment in zip(test_reviews[sample_review_ids], test_sentiments[sample_review_ids]):
    print('REVIEW:', review)
    print('Actual Sentiment:', sentiment)
    print('Predicted Sentiment polarity:', textblob.TextBlob(review).sentiment.polarity)
    print('-'*60)

REVIEW: no comment - stupid movie, acting average or worse... screenplay - no sense at all... SKIP IT!
Actual Sentiment: negative
Predicted Sentiment polarity: -0.3625
------------------------------------------------------------
REVIEW: I don't care if some people voted this movie to be bad. If you want the Truth this is a Very Good Movie! It has every thing a movie should have. You really should Get this one.
Actual Sentiment: positive
Predicted Sentiment polarity: 0.16666666666666674
------------------------------------------------------------
REVIEW: Worst horror film ever but funniest film ever rolled in one you have got to see this film it is so cheap it is unbeliaveble but you have to see it really!!!! P.s watch the carrot
Actual Sentiment: positive
Predicted Sentiment polarity: -0.037239583333333326
------------------------------------------------------------


In [4]:
from importlib import reload
reload(meu)
sentiment_polarity = [textblob.TextBlob(review).sentiment.polarity for review in test_reviews]
predicted_sentiments = ['positive' if score >= 0.1 else 'negative' 
                            for score in sentiment_polarity]
meu.display_model_performance_metrics(true_labels=test_sentiments, predicted_labels=predicted_sentiments, classes=['positive', 'negative'])

Model Performance metrics:
------------------------------
Accuracy: 0.7669
Precision: 0.767
Recall: 0.7669
F1 Score: 0.7668

Model Classification report:
------------------------------
              precision    recall  f1-score   support

    positive       0.76      0.78      0.77      7510
    negative       0.77      0.76      0.76      7490

   micro avg       0.77      0.77      0.77     15000
   macro avg       0.77      0.77      0.77     15000
weighted avg       0.77      0.77      0.77     15000


Prediction Confusion Matrix:
------------------------------
                 Predicted:         
                   positive negative
Actual: positive       5835     1675
        negative       1822     5668


In [7]:
## AFINN Lexicon
from afinn import Afinn

afn = Afinn(emoticons=True)

# compute polarity of chosen four sample reviews
for review, sentiment in zip(test_reviews[sample_review_ids], test_sentiments[sample_review_ids]):
    print('REVIEW:', review)
    print('Actual Sentiment:', sentiment)
    print('Predicted Sentiment polarity:', afn.score(review))
    print('-'*60)

REVIEW: no comment - stupid movie, acting average or worse... screenplay - no sense at all... SKIP IT!
Actual Sentiment: negative
Predicted Sentiment polarity: -7.0
------------------------------------------------------------
REVIEW: I don't care if some people voted this movie to be bad. If you want the Truth this is a Very Good Movie! It has every thing a movie should have. You really should Get this one.
Actual Sentiment: positive
Predicted Sentiment polarity: 3.0
------------------------------------------------------------
REVIEW: Worst horror film ever but funniest film ever rolled in one you have got to see this film it is so cheap it is unbeliaveble but you have to see it really!!!! P.s watch the carrot
Actual Sentiment: positive
Predicted Sentiment polarity: -3.0
------------------------------------------------------------


In [8]:
# predict sentiment on complete test dataset
sentiment_polarity = [afn.score(review) for review in test_reviews]
predicted_sentiments = ['positive' if score >= 1.0 else 'negative' for score in sentiment_polarity]

# evaluate model performance using utility function
meu.display_model_performance_metrics(true_labels=test_sentiments, predicted_labels=predicted_sentiments, classes=['positive', 'negative'])

Model Performance metrics:
------------------------------
Accuracy: 0.7118
Precision: 0.7289
Recall: 0.7118
F1 Score: 0.7062

Model Classification report:
------------------------------
              precision    recall  f1-score   support

    positive       0.67      0.85      0.75      7510
    negative       0.79      0.57      0.67      7490

   micro avg       0.71      0.71      0.71     15000
   macro avg       0.73      0.71      0.71     15000
weighted avg       0.73      0.71      0.71     15000


Prediction Confusion Matrix:
------------------------------
                 Predicted:         
                   positive negative
Actual: positive       6376     1134
        negative       3189     4301


In [9]:
## SentiWordNet Lexicon
from nltk.corpus import sentiwordnet as swn

awesome = list(swn.senti_synsets('awesome', 'a'))[0]
print('Positive Polarity Score:', awesome.pos_score())
print('Negative Polarity Score:', awesome.neg_score())
print('Objective Score:', awesome.obj_score())

Positive Polarity Score: 0.875
Negative Polarity Score: 0.125
Objective Score: 0.0


In [12]:
def analyze_sentiment_sentiwordnet_lexicon(review, verbose=False):
    # tokenize and POS tag text tokens
    tagged_text = [(token.text, token.tag_) for token in tn.nlp(review)]
    pos_score = neg_score = token_count = obj_score = 0
    # get wordnet synsets based on POS tags
    # get sentiment scores if synsets are found
    for word, tag in tagged_text:
        ss_set = None
        if 'NN' in tag and list(swn.senti_synsets(word, 'n')):
            ss_set = list(swn.senti_synsets(word, 'n'))[0]
        elif 'VB' in tag and list(swn.senti_synsets(word, 'v')):
            ss_set = list(swn.senti_synsets(word, 'v'))[0]
        elif 'JJ' in tag and list(swn.senti_synsets(word, 'a')):
            ss_set = list(swn.senti_synsets(word, 'a'))[0]
        elif 'RB' in tag and list(swn.senti_synsets(word, 'r')):
            ss_set = list(swn.senti_synsets(word, 'r'))[0]
        # if senti-synset if found
        if ss_set:
            # add scores for all found synsets
            pos_score += ss_set.pos_score()
            neg_score += ss_set.neg_score()
            obj_score += ss_set.obj_score()
            token_count += 1
    # aggregate final score
    final_score = pos_score - neg_score
    norm_final_score = round(float(final_score) / token_count, 2)
    final_sentiment = 'positive' if norm_final_score >= 0 else 'negative'
    if verbose:
        norm_obj_score = round(float(obj_score) / token_count, 2)
        norm_pos_score = round(float(pos_score) / token_count, 2)
        norm_neg_score = round(float(neg_score) / token_count, 2)
        # to display results in a nice table
        sentiment_frame = pd.DataFrame([[final_sentiment, norm_obj_score, norm_pos_score, 
                                         norm_neg_score, norm_final_score]], 
            columns=pd.MultiIndex(levels=[['SENTIMENT STATS:'], ['Predicted Sentiment', 'Objectivity', 
                                                                 'Positive', 'Negative', 'Overall']], 
                                  labels=[[0,0,0,0,0], [0,1,2,3,4]]))
        print(sentiment_frame)
    return final_sentiment

In [13]:
for review, sentiment in zip(test_reviews[sample_review_ids], test_sentiments[sample_review_ids]):
    print('REVIEW:', review)
    print('Actual Sentiment:', sentiment)
    pred = analyze_sentiment_sentiwordnet_lexicon(review, verbose=True)
    print('-'*60)

REVIEW: no comment - stupid movie, acting average or worse... screenplay - no sense at all... SKIP IT!
Actual Sentiment: negative
     SENTIMENT STATS:                                      
  Predicted Sentiment Objectivity Positive Negative Overall
0            negative        0.74      0.1     0.17   -0.07
------------------------------------------------------------
REVIEW: I don't care if some people voted this movie to be bad. If you want the Truth this is a Very Good Movie! It has every thing a movie should have. You really should Get this one.
Actual Sentiment: positive
     SENTIMENT STATS:                                      
  Predicted Sentiment Objectivity Positive Negative Overall
0            positive        0.74      0.2     0.06    0.14
------------------------------------------------------------
REVIEW: Worst horror film ever but funniest film ever rolled in one you have got to see this film it is so cheap it is unbeliaveble but you have to see it really!!!! P.s watch 

In [14]:
norm_test_reviews = tn.normalize_corpus(test_reviews)
predicted_sentiments = [analyze_sentiment_sentiwordnet_lexicon(review, verbose=False) for review in norm_test_reviews]
meu.display_model_performance_metrics(true_labels=test_sentiments, predicted_labels=predicted_sentiments, classes=['positive', 'negative'])

Model Performance metrics:
------------------------------
Accuracy: 0.6296
Precision: 0.6722
Recall: 0.6296
F1 Score: 0.6049

Model Classification report:
------------------------------
              precision    recall  f1-score   support

    positive       0.59      0.88      0.70      7510
    negative       0.76      0.38      0.51      7490

   micro avg       0.63      0.63      0.63     15000
   macro avg       0.67      0.63      0.60     15000
weighted avg       0.67      0.63      0.60     15000


Prediction Confusion Matrix:
------------------------------
                 Predicted:         
                   positive negative
Actual: positive       6601      909
        negative       4647     2843


In [15]:
## VADER Lexicon
from nltk.sentiment.vader import SentimentIntensityAnalyzer

def analyze_sentiment_vader_lexicon(review, 
                                    threshold=0.1,
                                    verbose=False):
    # pre-process text
    review = tn.strip_html_tags(review)
    review = tn.remove_accented_chars(review)
    review = tn.expand_contractions(review)
    
    # analyze the sentiment for review
    analyzer = SentimentIntensityAnalyzer()
    scores = analyzer.polarity_scores(review)
    # get aggregate scores and final sentiment
    agg_score = scores['compound']
    final_sentiment = 'positive' if agg_score >= threshold else 'negative'
    
    if verbose:
        # display detailed sentiment statistics
        positive = str(round(scores['pos'], 2)*100)+'%'
        final = round(agg_score, 2)
        negative = str(round(scores['neg'], 2)*100)+'%'
        neutral = str(round(scores['neu'], 2)*100)+'%'
        sentiment_frame = pd.DataFrame([[final_sentiment, final, positive, negative, neutral]],                     columns=pd.MultiIndex(levels=[['SENTIMENT STATS:'], 
                    ['Predicted Sentiment', 'Polarity Score', 'Positive', 'Negative', 'Neutral']], 
               labels=[[0,0,0,0,0],[0,1,2,3,4]]))
        print(sentiment_frame)
    
    return final_sentiment

In [16]:
for review, sentiment in zip(test_reviews[sample_review_ids], test_sentiments[sample_review_ids]):
    print('REVIEW:', review)
    print('Actual Sentiment:', sentiment)
    pred = analyze_sentiment_vader_lexicon(review, threshold=0.4, verbose=True)
    print('-'*60)

REVIEW: no comment - stupid movie, acting average or worse... screenplay - no sense at all... SKIP IT!
Actual Sentiment: negative
     SENTIMENT STATS:                                         
  Predicted Sentiment Polarity Score Positive Negative Neutral
0            negative           -0.8     0.0%    40.0%   60.0%
------------------------------------------------------------
REVIEW: I don't care if some people voted this movie to be bad. If you want the Truth this is a Very Good Movie! It has every thing a movie should have. You really should Get this one.
Actual Sentiment: positive
     SENTIMENT STATS:                                                     
  Predicted Sentiment Polarity Score Positive             Negative Neutral
0            negative          -0.16    16.0%  14.000000000000002%   69.0%
------------------------------------------------------------
REVIEW: Worst horror film ever but funniest film ever rolled in one you have got to see this film it is so cheap it is unb

In [18]:
predicted_sentiments = [analyze_sentiment_vader_lexicon(review, threshold=0.4, verbose=False) 
                            for review in test_reviews]
meu.display_model_performance_metrics(true_labels=test_sentiments, predicted_labels=predicted_sentiments, 
                                      classes=['positive', 'negative'])

Model Performance metrics:
------------------------------
Accuracy: 0.7109
Precision: 0.7235
Recall: 0.7109
F1 Score: 0.7067

Model Classification report:
------------------------------
              precision    recall  f1-score   support

    positive       0.67      0.83      0.74      7510
    negative       0.78      0.59      0.67      7490

   micro avg       0.71      0.71      0.71     15000
   macro avg       0.72      0.71      0.71     15000
weighted avg       0.72      0.71      0.71     15000


Prediction Confusion Matrix:
------------------------------
                 Predicted:         
                   positive negative
Actual: positive       6235     1275
        negative       3061     4429


## Classifying Sentiment with Supervised Learning

In [9]:
import pandas as pd
import numpy as np
import modules.text_normalizer as tn
import modules.model_evaluation_utils as meu
import nltk
np.set_printoptions(precision=2, linewidth=80)

# import dataset
dataset = pd.read_csv('data/movie_reviews.csv')

# take a peek at the data
print(dataset.head())
reviews = np.array(dataset['review'])
sentiments = np.array(dataset['sentiment'])

review sentiment
0  One of the other reviewers has mentioned that ...  positive
1  A wonderful little production. <br /><br />The...  positive
2  I thought this was a wonderful way to spend ti...  positive
3  Basically there's a family where a little boy ...  negative
4  Petter Mattei's "Love in the Time of Money" is...  positive


In [10]:
# build train and test datasets
train_reviews = reviews[:35000]
train_sentiments = sentiments[:35000]
test_reviews = reviews[35000:]
test_sentiments = sentiments[35000:]

# normalize datasets
#stop_words = nltk.corpus.stopwords.words('english')
#stop_words.remove('no')
#stop_words.remove('but')
#stop_words.remove('not')

#norm_train_reviews = tn.normalize_corpus(train_reviews)
#norm_test_reviews = tn.normalize_corpus(test_reviews)

In [2]:
import pickle

# save objects
filename1 = 'data/norm_train_reviews.pkl'
#pickle.dump(norm_train_reviews, open(filename1, 'wb'))
filename2 = 'data/norm_test_reviews.pkl'
#pickle.dump(norm_test_reviews, open(filename2, 'wb'))

# load objects
norm_train_reviews = pickle.load(open(filename1, 'rb'))
norm_test_reviews = pickle.load(open(filename2, 'rb'))

## Traditional Supervised Machine Learning Models

In [7]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

# build BOW features on train reviews
cv = CountVectorizer(binary=False, min_df=0.0, max_df=1.0, ngram_range=(1,2))
cv_train_features = cv.fit_transform(norm_train_reviews)

# build TFIDF features on train reviews
tv = TfidfVectorizer(use_idf=True, min_df=0.0, max_df=1.0, ngram_range=(1,2), sublinear_tf=True)
tv_train_features = tv.fit_transform(norm_train_reviews)

# transform test reviews into features
cv_test_features = cv.transform(norm_test_reviews)
tv_test_features = tv.transform(norm_test_reviews)

print('BOW model:> Train features shape:', cv_train_features.shape, 
      ' Test features shape:', cv_test_features.shape)
print('TFIDF model:> Train features shape:', tv_train_features.shape, 
      ' Test features shape:', tv_test_features.shape)

BOW model:> Train features shape: (35000, 2116271)  Test features shape: (15000, 2116271)
TFIDF model:> Train features shape: (35000, 2116271)  Test features shape: (15000, 2116271)


In [8]:
# initialize logistic regression and support vector machines
from sklearn.linear_model import SGDClassifier, LogisticRegression

lr = LogisticRegression(penalty='l2', max_iter=100, C=1)
svm = SGDClassifier(loss='hinge', max_iter=100)

In [20]:
# Logistic Regressioin model on BOW features
lr_bow_predictions = meu.train_predict_model(classifier=lr, train_features=cv_train_features, 
                                             train_labels=train_sentiments, test_features=cv_test_features, 
                                             test_labels=test_sentiments)
meu.display_model_performance_metrics(true_labels=test_sentiments, predicted_labels=lr_bow_predictions, 
                                      classes=['positive', 'negative'])

Model Performance metrics:
------------------------------
Accuracy: 0.898
Precision: 0.898
Recall: 0.898
F1 Score: 0.898

Model Classification report:
------------------------------
              precision    recall  f1-score   support

    positive       0.89      0.90      0.90      7510
    negative       0.90      0.89      0.90      7490

    accuracy                           0.90     15000
   macro avg       0.90      0.90      0.90     15000
weighted avg       0.90      0.90      0.90     15000


Prediction Confusion Matrix:
------------------------------
                 Predicted:         
                   positive negative
Actual: positive       6780      730
        negative        800     6690


In [21]:
# Logistic Regression model on TF-IDF features
lr_tfidf_predictions = meu.train_predict_model(classifier=lr, train_features=tv_train_features, 
                                               train_labels=train_sentiments, test_features=tv_test_features, 
                                               test_labels=test_sentiments)
meu.display_model_performance_metrics(true_labels=test_sentiments, predicted_labels=lr_tfidf_predictions, 
                                      classes=['positive', 'negative'])

Model Performance metrics:
------------------------------
Accuracy: 0.8887
Precision: 0.889
Recall: 0.8887
F1 Score: 0.8886

Model Classification report:
------------------------------
              precision    recall  f1-score   support

    positive       0.88      0.90      0.89      7510
    negative       0.90      0.87      0.89      7490

    accuracy                           0.89     15000
   macro avg       0.89      0.89      0.89     15000
weighted avg       0.89      0.89      0.89     15000


Prediction Confusion Matrix:
------------------------------
                 Predicted:         
                   positive negative
Actual: positive       6778      732
        negative        938     6552


## Newer Supervised Deep Learning Models

In [22]:
import gensim
import keras
from keras.models import Sequential
from keras.layers import Dropout, Activation, Dense
from keras.layers.normalization import BatchNormalization
from sklearn.preprocessing import LabelEncoder

In [23]:
le = LabelEncoder()
num_classes = 2
# tokenize train reviews & encode train labels
tokenized_train = [tn.tokenizer.tokenize(text) for text in norm_train_reviews]
y_tr = le.fit_transform(train_sentiments)
y_train = keras.utils.to_categorical(y_tr, num_classes)
# tokenize test reviews & encode test labels
tokenized_test = [tn.tokenizer.tokenize(text) for text in norm_test_reviews]
y_ts = le.fit_transform(test_sentiments)
y_test = keras.utils.to_categorical(y_ts, num_classes)

# print class label encoding map and encoded labels
print('Sentiment class label map:', dict(zip(le.classes_, le.transform(le.classes_))))
print('Sample test label transformation:\n'+'-'*35, 
      '\nActual Labels:', test_sentiments[:3], 
      '\nEncoded Labels:', y_ts[:3], 
      '\nOne hot encoded Labels:\n', y_test[:3])

Sentiment class label map: {'negative': 0, 'positive': 1}
Sample test label transformation:
----------------------------------- 
Actual Labels: ['negative' 'positive' 'negative'] 
Encoded Labels: [0 1 0] 
One hot encoded Labels:
 [[1. 0.]
 [0. 1.]
 [1. 0.]]


In [24]:
# build word2vec model
w2v_num_features = 512
w2v_model = gensim.models.Word2Vec(tokenized_train, size=w2v_num_features, window=150, min_count=10, sample=1e-3)

In [25]:
# function to help compute averaged word vector representations for any corpus of text documents
def averaged_word2vec_vectorizer(corpus, model, num_features):
    vocabulary = set(model.wv.index2word)
    def average_word_vectors(words, model, vocabulary, num_features):
        feature_vector = np.zeros((num_features,), dtype='float64')
        nwords = 0.
        for word in words:
            if word in vocabulary:
                nwords = nwords + 1.
                feature_vector = np.add(feature_vector, model[word])
        if nwords:
            feature_vector = np.divide(feature_vector, nwords)
        return feature_vector
    
    features = [average_word_vectors(tokenized_sentence, model, vocabulary, num_features) for tokenized_sentence in corpus]
    return np.array(features)

In [26]:
# generate averaged word vector features from word2vec model
avg_wv_train_features = averaged_word2vec_vectorizer(corpus=tokenized_train, model=w2v_model, num_features=w2v_num_features)
avg_wv_test_features = averaged_word2vec_vectorizer(corpus=tokenized_test, model=w2v_model, num_features=w2v_num_features)

In [27]:
# feature engineering with GloVe model
train_nlp = [tn.nlp_vec(item) for item in norm_train_reviews]
train_glove_features = np.array([item.vector for item in train_nlp])

test_nlp = [tn.nlp_vec(item) for item in norm_test_reviews]
test_glove_features = np.array([item.vector for item in test_nlp])

In [28]:
# check feature vector dimensions
print('Word2Vec model:> Train features shape:', avg_wv_train_features.shape, 
      ' Test features shape:', avg_wv_test_features.shape)
print('GloVe model:> Train features shape:', train_glove_features.shape, 
      ' Test features shape:', test_glove_features.shape)

Word2Vec model:> Train features shape: (35000, 512)  Test features shape: (15000, 512)
GloVe model:> Train features shape: (35000, 300)  Test features shape: (15000, 300)


In [29]:
# function to build desired DNN model
def construct_deepnn_architecture(num_input_features):
    dnn_model = Sequential()
    dnn_model.add(Dense(512, input_shape=(num_input_features,), kernel_initializer='glorot_uniform'))
    dnn_model.add(BatchNormalization())
    dnn_model.add(Activation('relu'))
    dnn_model.add(Dropout(0.2))

    dnn_model.add(Dense(512, kernel_initializer='glorot_uniform'))
    dnn_model.add(BatchNormalization())
    dnn_model.add(Activation('relu'))
    dnn_model.add(Dropout(0.2))

    dnn_model.add(Dense(512, kernel_initializer='glorot_uniform'))
    dnn_model.add(BatchNormalization())
    dnn_model.add(Activation('relu'))
    dnn_model.add(Dropout(0.2))

    dnn_model.add(Dense(2))
    dnn_model.add(Activation('softmax'))
    
    dnn_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    return dnn_model

In [32]:
# build DNN model based on Word2Vec input feature representations
w2v_dnn = construct_deepnn_architecture(num_input_features=w2v_num_features)

# visualize DNN model architecture
#from IPython.display import SVG
#from keras.utils.vis_utils import model_to_dot

#SVG(model_to_dot(w2v_dnn, show_shapes=True, show_layer_names=False, rankdir='TB').create(prog='dot', format='svg'))

In [33]:
batch_size = 100
# uncomment to run again - else load pickle object in following cell
# w2v_dnn.fit(avg_wv_train_features, y_train, epochs=10, batch_size=batch_size, shuffle=True, validation_split=0.1, verbose=1)

Train on 31500 samples, validate on 3500 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x19ef1fe6688>

In [34]:
y_pred = w2v_dnn.predict_classes(avg_wv_test_features)
predictions = le.inverse_transform(y_pred)
meu.display_model_performance_metrics(true_labels=test_sentiments, predicted_labels=predictions, classes=['positive', 'negative'])

Model Performance metrics:
------------------------------
Accuracy: 0.8751
Precision: 0.8752
Recall: 0.8751
F1 Score: 0.8751

Model Classification report:
------------------------------
              precision    recall  f1-score   support

    positive       0.88      0.86      0.87      7510
    negative       0.87      0.89      0.88      7490

    accuracy                           0.88     15000
   macro avg       0.88      0.88      0.88     15000
weighted avg       0.88      0.88      0.88     15000


Prediction Confusion Matrix:
------------------------------
                 Predicted:         
                   positive negative
Actual: positive       6494     1016
        negative        858     6632


In [38]:
import pickle
# save object
filename = 'models/w2v_dnn.pkl'
pickle.dump(w2v_dnn, open(filename, 'wb'))

# load object
# w2v_dnn = pickle.load(open(filename, 'rb'))

In [39]:
# build DNN model
glove_dnn = construct_deepnn_architecture(num_input_features=300)
# train DNN model on GloVe training features
batch_size = 100
# uncomment to run again - else load pickle object in following cell
# glove_dnn.fit(train_glove_features, y_train, epochs=5, batch_size=batch_size, shuffle=True, validation_split=0.1, verbose=1)
# get predictions on test reviews
y_pred = glove_dnn.predict_classes(test_glove_features)
predictions = le.inverse_transform(y_pred)
# evaluate model performance
meu.display_model_performance_metrics(true_labels=test_sentiments, predicted_labels=predictions, classes=['positive', 'negative'])

Train on 31500 samples, validate on 3500 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model Performance metrics:
------------------------------
Accuracy: 0.8301
Precision: 0.8424
Recall: 0.8301
F1 Score: 0.8286

Model Classification report:
------------------------------
              precision    recall  f1-score   support

    positive       0.91      0.74      0.81      7510
    negative       0.78      0.92      0.84      7490

    accuracy                           0.83     15000
   macro avg       0.84      0.83      0.83     15000
weighted avg       0.84      0.83      0.83     15000


Prediction Confusion Matrix:
------------------------------
                 Predicted:         
                   positive negative
Actual: positive       5526     1984
        negative        565     6925


In [40]:
import pickle
# save object
filename = 'models/glove_dnn.pkl'
# pickle.dump(glove_dnn, open(filename, 'wb'))

# load object
# glove_dnn = pickle.load(open(filename, 'rb'))

## Advanced Supervised Deep Learning Models

In [41]:
# tokenize datasets such that each text review is decomposed into corresponding tokens
tokenized_train = [tn.tokenizer.tokenize(text) for text in norm_train_reviews]
tokenized_test = [tn.tokenizer.tokenize(text) for text in norm_test_reviews]

In [42]:
# create vocabulary
from collections import Counter

# build word to index vocabulary
token_counter = Counter([token for review in tokenized_train for token in review])
vocab_map = {item[0]: index+1 for index, item in enumerate(dict(token_counter).items())}
max_index = np.max(list(vocab_map.values()))
vocab_map['PAD_INDEX'] = 0
vocab_map['NOT_FOUND_INDEX'] = max_index + 1
vocab_size = len(vocab_map)

# view vocabulary size and part of the vocabulary map
print('Vocabulary Size:', vocab_size)
print('Sample slice of vocabulary map:', dict(list(vocab_map.items())[10:20]))

Vocabulary Size: 84660
Sample slice of vocabulary map: {'first': 11, 'thing': 12, 'strike': 13, 'brutality': 14, 'unflinche': 15, 'scene': 16, 'violence': 17, 'set': 18, 'word': 19, 'go': 20}


In [43]:
# encode text sentiment class labelsinto numeric representations
from keras.preprocessing import sequence
from sklearn.preprocessing import LabelEncoder

# get max length of train corpus and initialize label encoder
le = LabelEncoder()
num_classes = 2 # positive:1, negative:0
max_len = np.max([len(review) for review in tokenized_train])

## train reviews data corpus
# convert tokenized text reviews to numeric vectors
train_X = [[vocab_map[token] for token in tokenized_review] for tokenized_review in tokenized_train]
train_X = sequence.pad_sequences(train_X, maxlen=max_len) # pad

## train prediction class labels
# convert text sentiment labels (negative/positive) to binary encodings (0/1)
train_y = le.fit_transform(train_sentiments)

## test reviews data corpus
# convert tokenized text reviews to numeric vectors
test_X = [[vocab_map[token] if vocab_map.get(token) else vocab_map['NOT_FOUND_INDEX'] for token in tokenized_review] for tokenized_review in tokenized_test]
test_X = sequence.pad_sequences(test_X, maxlen=max_len)

## test prediction class labels
# convert text sentiment labels (negative/positive) to binary encodings (0/1)
test_y = le.transform(test_sentiments)

# view vector shapes
print('Max length of train review vectors:', max_len)
print('Train review vectors shape:', train_X.shape, ' Test review vectors shape:', test_X.shape)

Max length of train review vectors: 1419
Train review vectors shape: (35000, 1419)  Test review vectors shape: (15000, 1419)


In [44]:
# construct model architecture
from keras.models import Sequential
from keras.layers import Dense, Embedding, Dropout, SpatialDropout1D
from keras.layers import LSTM

EMBEDDING_DIM = 128 # dimension for dense embeddings for each token
LSTM_DIM = 64 # total LSTM units

model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=EMBEDDING_DIM, input_length=max_len))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(LSTM_DIM, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [45]:
batch_size = 100
# model.fit(train_X, train_y, epochs=5, batch_size=batch_size, shuffle=True, validation_split=0.1, verbose=1)

Train on 31500 samples, validate on 3500 samples
Epoch 1/5
 1800/31500 [>.............................] - ETA: 3:06:52 - loss: 0.6903 - accuracy: 0.5294

KeyboardInterrupt: 

In [47]:
# import pickle
# save object
# filename = 'models/embedding.pkl'
# pickle.dump(model, open(filename, 'wb'))

# load object
# model = pickle.load(open(filename, 'rb'))

In [48]:
# predict sentiments on test data
# pred_test = model.predict_classes(test_X)
# predictions = le.inverse_transform(pred_test.flatten())
# evaluate model performance
# meu.display_model_performance_metrics(true_labels=test_sentiments, predicted_labels=predictions, classes=['positive', 'negative'])

## Analyzing Sentiment Causation

In [11]:
## Interpreting Predictive Models
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline

# build BOW features on train reviews
cv = CountVectorizer(binary=False, min_df=0.0, max_df=1.0, ngram_range=(1,2))
cv_train_features = cv.fit_transform(norm_train_reviews)
# build Logistic Regression model
lr = LogisticRegression()
lr.fit(cv_train_features, train_sentiments)

# build Text Classification Pipeline
lr_pipeline = make_pipeline(cv, lr)

# save the list of prediction classes (positive, negative)
classes = list(lr_pipeline.classes_)