In [31]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.naive_bayes import BernoulliNB, MultinomialNB
from sklearn.model_selection import train_test_split

In [2]:
yelp_df = pd.read_csv('yelp_labelled.txt', delimiter='\n')

In [3]:
yelp_df['sentiment'] = yelp_df['review'].str[-1]
yelp_df['sentiment'] = pd.to_numeric(yelp_df['sentiment'])
yelp_df['review'] = yelp_df['review'].str[:-3]

In [4]:
yelp_df.head()

Unnamed: 0,review,sentiment
0,Wow... Loved this place,1
1,Crust is not good,0
2,Not tasty and the texture was just nasty,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


# Version 1 (original) - Positive, neutral, and negative keywords

In [5]:
sentiment_keywords = ['the', 'not', "don't", 'good', 'would', 'never', 'time', 'ever', 'badly',
                      'minutes', "won't", 'bad', 'much', 'again', 'worst', 'disappointed', 
                      'really', 'slow', 'waited', 'wait', 'bland', 'flavor', 'experience', 'best', 
                      'terrible', 'rude', 'cold', 'taste', 'overpriced', 'poor', 'mediocre', 
                      'management', 'off', 'impressed', 'money', 'horrible', 'sick', 'waiter', 'tasted', 'nasty', 
                      'great', 'angry', 'honestly', 'care', 'disgusted', 'recommended', 'not worth', 'amazing', 
                      'liked', 'hour', 'dirty', 'clean', 'unfortunately', 'worse', 'friendly', 'love', 'loved', 
                      'absolutely', 'excellent', 'recommend', 'wonderful', 'delicious', 'fantastic', 'incredible', 
                      'nice', 'disappointment', 'tasteless', 'enjoyed', 'waste', 'authentic', 'homemade', 'worse',
                     'pleasant', 'pleased', 'outstanding', 'generous', 'insulted', 'soggy', 'lacking', 'sad',
                     'stale', 'helpful', 'sucks', 'beautiful', 'i', 'very', 'food', 'rotten', 'spoiled', 'stomach',
                     'only', 'fresh']
sentiment_phrases_2word = ['be back', 'i would', "wont' be", 'very good', 'i love', 'would not', 
                    'so good', 'not good', 'here again', 'will never', 'good food', 'great place', 'great food', 
                     'really good', 'customer service', '5 stars', 'at best', 'not like', 'at all', 'thumbs up',
                    'great service', 'definitely not', 'definitely will', 'highly recommend', 'highly recommended',
                          'very disappointing', 'not pleasant', 'not pleased', 'not helpful', 'good quality',
                          'bad quality', 'not fresh', 'not nice']

sentiment_phrases_3word = ["won't be back", 'will be back', "won't be going", 'will never ever', 
                           "won't be disappointed", 'back anytime soon', 'would not recommend', 'service was slow', ]

In [6]:
for word in sentiment_keywords:
    yelp_df[word] = yelp_df['review'].str.contains(word, case=False)

for phrase in sentiment_phrases_2word:
    yelp_df[phrase] = yelp_df['review'].str.contains(phrase, case=False)

for phrase in sentiment_phrases_3word:
    yelp_df[phrase] = yelp_df['review'].str.contains(phrase, case=False)

In [12]:
data = yelp_df[sentiment_keywords + sentiment_phrases_2word + sentiment_phrases_3word]
target = yelp_df['sentiment']

X = data
Y = target

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.2, random_state=200)

bnb = BernoulliNB()

bnb.fit(X_train, Y_train)

training_pred = bnb.predict(X_train)
test_pred = bnb.predict(X_test)

print('This model correctly classified {}% of the training set.'.format(100*round((Y_train == training_pred).sum()/800,2)))
print('This model correctly classified {}% of the test set.'.format(100*round((Y_test == test_pred).sum()/200,2)))

training_confusion_matrix = pd.crosstab(Y_train, training_pred)
training_negative_reviews = training_confusion_matrix.iloc[0,0]
training_false_negative_reviews = training_confusion_matrix.iloc[1,0]
training_positive_reviews = training_confusion_matrix.iloc[1,1]
training_false_positive_reviews = training_confusion_matrix.iloc[0,1]

training_sensitivity = training_negative_reviews/(training_negative_reviews + training_false_negative_reviews)
training_specificity = training_positive_reviews/(training_positive_reviews + training_false_positive_reviews)

print('-----------------------------------------------------------------------')
print('{}% of the negative review predictions were correct in the training set.'.format(round(training_sensitivity*100, 0)))
print('{}% of the positive review predictions were correct in the training set.'.format(round(training_specificity*100, 0)))

test_confusion_matrix = pd.crosstab(Y_test, test_pred)
test_negative_reviews = test_confusion_matrix.iloc[0,0]
test_false_negative_reviews = test_confusion_matrix.iloc[1,0]
test_positive_reviews = test_confusion_matrix.iloc[1,1]
test_false_positive_reviews = test_confusion_matrix.iloc[0,1]

test_sensitivity = test_negative_reviews/(test_negative_reviews + test_false_negative_reviews)
test_specificity = test_positive_reviews/(test_positive_reviews + test_false_positive_reviews)
print('-----------------------------------------------------------------------')
print('{}% of the negative review predictions were correct in the test set.'.format(round(test_sensitivity*100, 0)))
print('{}% of the positive review predictions were correct in the test set.'.format(round(test_specificity*100, 0)))

This model correctly classified 82.0% of the training set.
This model correctly classified 76.0% of the test set.
-----------------------------------------------------------------------
88.0% of the negative review predictions were correct in the training set.
77.0% of the positive review predictions were correct in the training set.
-----------------------------------------------------------------------
83.0% of the negative review predictions were correct in the test set.
71.0% of the positive review predictions were correct in the test set.


The model seems to be slightly overfit

# Version 2 - only negative keywords

In [13]:
sentiment_keywords = ['bad', 'worst', 'disappointed', 'slow', 'waited', 'wait', 'bland', 'slow', 'waited', 'wait',
                      'bland', 'terrible', 'rude', 'cold', 'overpriced', 'poor', 'mediocre', 'horrible', 'sick',
                      'nasty', 'angry', 'disgusted', 'amazing', 'hour', 'dirty', 'clean', 'unfortunately', 'worse']
                        
sentiment_phrases_2word = ['not worth', 'bad service', 'not good', 'not great', 'not tasty', 'no good', 'really bad', 'very bad'
                          'not good', 'bad experience', 'horrible experience', 'not great', 'very disappointed',
                          'not impressed']

sentiment_phrases_3word = ["won't be back", "won't be going", 'will never ever', 'not very tasty'
                           "wont' be back anytime soon", 'would not recommend', 'service was slow']

In [14]:
for word in sentiment_keywords:
    yelp_df[word] = yelp_df['review'].str.contains(word, case=False)

for phrase in sentiment_phrases_2word:
    yelp_df[phrase] = yelp_df['review'].str.contains(phrase, case=False)

for phrase in sentiment_phrases_3word:
    yelp_df[phrase] = yelp_df['review'].str.contains(phrase, case=False)

In [15]:
data = yelp_df[sentiment_keywords + sentiment_phrases_2word + sentiment_phrases_3word]
target = yelp_df['sentiment']

In [16]:
X = data
Y = target

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.2, random_state=200)

bnb = BernoulliNB()

bnb.fit(X_train, Y_train)

training_pred = bnb.predict(X_train)
test_pred = bnb.predict(X_test)

print('This model correctly classified {}% of the training set.'.format(100*round((Y_train == training_pred).sum()/800,2)))
print('This model correctly classified {}% of the test set.'.format(100*round((Y_test == test_pred).sum()/200,2)))
print('---------------------------------------------------------')

training_confusion_matrix = pd.crosstab(Y_train, training_pred)
training_negative_reviews = training_confusion_matrix.iloc[0,0]
training_false_negative_reviews = training_confusion_matrix.iloc[1,0]
training_positive_reviews = training_confusion_matrix.iloc[1,1]
training_false_positive_reviews = training_confusion_matrix.iloc[0,1]

training_sensitivity = training_negative_reviews/(training_negative_reviews + training_false_negative_reviews)
training_specificity = training_positive_reviews/(training_positive_reviews + training_false_positive_reviews)

print('{}% of the negative review predictions were correct in the training set.'.format(round(training_sensitivity*100, 0)))
print('{}% of the positive review predictions were correct in the training set.'.format(round(training_specificity*100, 0)))

test_confusion_matrix = pd.crosstab(Y_test, test_pred)
test_negative_reviews = test_confusion_matrix.iloc[0,0]
test_false_negative_reviews = test_confusion_matrix.iloc[1,0]
test_positive_reviews = test_confusion_matrix.iloc[1,1]
test_false_positive_reviews = test_confusion_matrix.iloc[0,1]

test_sensitivity = test_negative_reviews/(test_negative_reviews + test_false_negative_reviews)
test_specificity = test_positive_reviews/(test_positive_reviews + test_false_positive_reviews)
print('-----------------------------------------------------------------------')
print('{}% of the negative review predictions were correct in the test set.'.format(round(test_sensitivity*100, 0)))
print('{}% of the positive review predictions were correct in the test set.'.format(round(test_specificity*100, 0)))

This model correctly classified 66.0% of the training set.
This model correctly classified 68.0% of the test set.
---------------------------------------------------------
89.0% of the negative review predictions were correct in the training set.
61.0% of the positive review predictions were correct in the training set.
-----------------------------------------------------------------------
98.0% of the negative review predictions were correct in the test set.
59.0% of the positive review predictions were correct in the test set.


## Results of version 2 model compared to original model
- The model is no longer overfit
- overall accuracy declined to 68% from 76%.
- sensitivity increased to 91% from 84%. This means the negative keywords helped identify the negative reviews.
- specificity decreased tp 60% from 74%. This means the negative keywords hindered indentifying the positive reviews.

# Version 3 - Only positive keywords

In [17]:
sentiment_keywords = ['good', 'best', 'impressive', 'great', 'amazing', 'liked', 'clean', 'friendly', 'love', 'loved', 
                      'absolutely', 'excellent', 'recommend', 'wonderful', 'delicious', 'fantastic', 'incredible', 
                      'nice']
sentiment_phrases_2word = ['best ever', 'so good', 'really great', 'very good', 'i love', 'would not', 
                           'good food', 'great place', 'great food', 'really good', 'customer service', '5 stars',
                           'great service', 'definitely will', 'highly recommend', 'highly recommended']

sentiment_phrases_3word = ['will be back', "won't be disappointed"]

In [18]:
for word in sentiment_keywords:
    yelp_df[word] = yelp_df['review'].str.contains(word, case=False)

for phrase in sentiment_phrases_2word:
    yelp_df[phrase] = yelp_df['review'].str.contains(phrase, case=False)

for phrase in sentiment_phrases_3word:
    yelp_df[phrase] = yelp_df['review'].str.contains(phrase, case=False)

In [19]:
data = yelp_df[sentiment_keywords + sentiment_phrases_2word + sentiment_phrases_3word]
target = yelp_df['sentiment']

In [20]:
X = data
Y = target

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.2, random_state=200)

bnb = BernoulliNB()

bnb.fit(X_train, Y_train)

training_pred = bnb.predict(X_train)
test_pred = bnb.predict(X_test)

print('This model correctly classified {}% of the training set.'.format(100*round((Y_train == training_pred).sum()/800,2)))
print('This model correctly classified {}% of the test set.'.format(100*round((Y_test == test_pred).sum()/200,2)))
print('---------------------------------------------------------')

training_confusion_matrix = pd.crosstab(Y_train, training_pred)
training_negative_reviews = training_confusion_matrix.iloc[0,0]
training_false_negative_reviews = training_confusion_matrix.iloc[1,0]
training_positive_reviews = training_confusion_matrix.iloc[1,1]
training_false_positive_reviews = training_confusion_matrix.iloc[0,1]

training_sensitivity = training_negative_reviews/(training_negative_reviews + training_false_negative_reviews)
training_specificity = training_positive_reviews/(training_positive_reviews + training_false_positive_reviews)

print('{}% of the negative review predictions were correct in the training set.'.format(round(training_sensitivity*100, 0)))
print('{}% of the positive review predictions were correct in the training set.'.format(round(training_specificity*100, 0)))

test_confusion_matrix = pd.crosstab(Y_test, test_pred)
test_negative_reviews = test_confusion_matrix.iloc[0,0]
test_false_negative_reviews = test_confusion_matrix.iloc[1,0]
test_positive_reviews = test_confusion_matrix.iloc[1,1]
test_false_positive_reviews = test_confusion_matrix.iloc[0,1]

test_sensitivity = test_negative_reviews/(test_negative_reviews + test_false_negative_reviews)
test_specificity = test_positive_reviews/(test_positive_reviews + test_false_positive_reviews)
print('-----------------------------------------------------------------------')
print('{}% of the negative review predictions were correct in the test set.'.format(round(test_sensitivity*100, 0)))
print('{}% of the positive review predictions were correct in the test set.'.format(round(test_specificity*100, 0)))

This model correctly classified 75.0% of the training set.
This model correctly classified 72.0% of the test set.
---------------------------------------------------------
68.0% of the negative review predictions were correct in the training set.
87.0% of the positive review predictions were correct in the training set.
-----------------------------------------------------------------------
68.0% of the negative review predictions were correct in the test set.
85.0% of the positive review predictions were correct in the test set.


## Results of version 3 model compared to original model
- The model is less overfit than the original
- overall accuracy declined to 72% from 76%.
- sensitivity decreased to 68% from 84%. This means the positive keywords hindered identifying the negative reviews.
- specificity increased to 87% from 74%. This means the positive keywords helped in identifying the positive reviews.

# Version 4 - Only phrases  (both positive and negative)

In [21]:
sentiment_phrases_2word = ['be back', 'i would', "wont' be", 'very good', 'i love', 'would not', 
                    'so good', 'not good', 'here again', 'will never', 'good food', 'great place', 'great food', 
                     'really good', 'customer service', '5 stars', 'at best', 'not like', 'at all', 'no good',
                    'great service', 'definitely not', 'definitely will', 'highly recommend', 'highly recommended',
                          "definitely won't"]

sentiment_phrases_3word = ['not very good', "won't be back", 'will be back', "won't be going", 'will never ever', 
                           "won't be disappointed", 'back anytime soon', 'would not recommend', 'service was slow',
                          'not so good', 'not good food', 'really great food', 'not great food', 'not be back',
                          'not to like', 'bad customer service', 'great customer service']

In [22]:
for phrase in sentiment_phrases_2word:
    yelp_df[phrase] = yelp_df['review'].str.contains(phrase, case=False)

for phrase in sentiment_phrases_3word:
    yelp_df[phrase] = yelp_df['review'].str.contains(phrase, case=False)

In [23]:
data = yelp_df[sentiment_phrases_2word + sentiment_phrases_3word]
target = yelp_df['sentiment']

In [24]:
X = data
Y = target

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.2, random_state=200)

bnb = BernoulliNB()

bnb.fit(X_train, Y_train)

training_pred = bnb.predict(X_train)
test_pred = bnb.predict(X_test)

print('This model correctly classified {}% of the training set.'.format(100*round((Y_train == training_pred).sum()/800,2)))
print('This model correctly classified {}% of the test set.'.format(100*round((Y_test == test_pred).sum()/200,2)))
print('---------------------------------------------------------')

training_confusion_matrix = pd.crosstab(Y_train, training_pred)
training_negative_reviews = training_confusion_matrix.iloc[0,0]
training_false_negative_reviews = training_confusion_matrix.iloc[1,0]
training_positive_reviews = training_confusion_matrix.iloc[1,1]
training_false_positive_reviews = training_confusion_matrix.iloc[0,1]

training_sensitivity = training_negative_reviews/(training_negative_reviews + training_false_negative_reviews)
training_specificity = training_positive_reviews/(training_positive_reviews + training_false_positive_reviews)

print('{}% of the negative review predictions were correct in the training set.'.format(round(training_sensitivity*100, 0)))
print('{}% of the positive review predictions were correct in the training set.'.format(round(training_specificity*100, 0)))

test_confusion_matrix = pd.crosstab(Y_test, test_pred)
test_negative_reviews = test_confusion_matrix.iloc[0,0]
test_false_negative_reviews = test_confusion_matrix.iloc[1,0]
test_positive_reviews = test_confusion_matrix.iloc[1,1]
test_false_positive_reviews = test_confusion_matrix.iloc[0,1]

test_sensitivity = test_negative_reviews/(test_negative_reviews + test_false_negative_reviews)
test_specificity = test_positive_reviews/(test_positive_reviews + test_false_positive_reviews)
print('-----------------------------------------------------------------------')
print('{}% of the negative review predictions were correct in the test set.'.format(round(test_sensitivity*100, 0)))
print('{}% of the positive review predictions were correct in the test set.'.format(round(test_specificity*100, 0)))

This model correctly classified 56.00000000000001% of the training set.
This model correctly classified 53.0% of the test set.
---------------------------------------------------------
86.0% of the negative review predictions were correct in the training set.
54.0% of the positive review predictions were correct in the training set.
-----------------------------------------------------------------------
93.0% of the negative review predictions were correct in the test set.
50.0% of the positive review predictions were correct in the test set.


## Results of version 4 model compared to original model
- overall accuracy declined to 56% from 76%.
- sensitivity increased to 87% from 84%. This means the phrases helped in identifying the negative reviews.
- specificity decreased to 53% from 74%. This means the phrases hinder identifying the positive reviews.

# Version 5 - Multinomial instead of Bernoulli parameter

In [25]:
sentiment_keywords = ['the', 'not', "don't", 'good', 'would', 'never', 'time', 'ever', 'badly',
                      'minutes', "won't", 'bad', 'much', 'again', 'worst', 'disappointed', 
                      'really', 'slow', 'waited', 'wait', 'bland', 'flavor', 'experience', 'best', 
                      'terrible', 'rude', 'cold', 'taste', 'overpriced', 'poor', 'mediocre', 
                      'management', 'off', 'impressed', 'money', 'horrible', 'sick', 'waiter', 'tasted', 'nasty', 
                      'great', 'angry', 'honestly', 'care', 'disgusted', 'recommended', 'not worth', 'amazing', 
                      'liked', 'hour', 'dirty', 'clean', 'unfortunately', 'worse', 'friendly', 'love', 'loved', 
                      'absolutely', 'excellent', 'recommend', 'wonderful', 'delicious', 'fantastic', 'incredible', 
                      'nice', 'disappointment', 'tasteless', 'enjoyed', 'waste', 'authentic', 'homemade', 'worse',
                     'pleasant', 'pleased', 'outstanding', 'generous', 'insulted', 'soggy', 'lacking', 'sad',
                     'stale', 'helpful', 'sucks', 'beautiful', 'i', 'very', 'food', 'rotten', 'spoiled', 'stomach',
                     'only', 'fresh']
sentiment_phrases_2word = ['be back', 'i would', "wont' be", 'very good', 'i love', 'would not', 
                    'so good', 'not good', 'here again', 'will never', 'good food', 'great place', 'great food', 
                     'really good', 'customer service', '5 stars', 'at best', 'not like', 'at all', 'thumbs up',
                    'great service', 'definitely not', 'definitely will', 'highly recommend', 'highly recommended',
                          'very disappointing', 'not pleasant', 'not pleased', 'not helpful', 'good quality',
                          'bad quality', 'not fresh', 'not nice']

sentiment_phrases_3word = ["won't be back", 'will be back', "won't be going", 'will never ever', 
                           "won't be disappointed", 'back anytime soon', 'would not recommend', 'service was slow', ]

In [26]:
for word in sentiment_keywords:
    yelp_df[word] = yelp_df['review'].str.contains(word, case=False)

for phrase in sentiment_phrases_2word:
    yelp_df[phrase] = yelp_df['review'].str.contains(phrase, case=False)

for phrase in sentiment_phrases_3word:
    yelp_df[phrase] = yelp_df['review'].str.contains(phrase, case=False)

In [27]:
data = yelp_df[sentiment_keywords + sentiment_phrases_2word + sentiment_phrases_3word]
target = yelp_df['sentiment']

In [32]:
X = data
Y = target

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.2, random_state=200)

bnb = MultinomialNB()

bnb.fit(X_train, Y_train)

training_pred = bnb.predict(X_train)
test_pred = bnb.predict(X_test)

print('This model correctly classified {}% of the training set.'.format(100*round((Y_train == training_pred).sum()/800,2)))
print('This model correctly classified {}% of the test set.'.format(100*round((Y_test == test_pred).sum()/200,2)))
print('---------------------------------------------------------')

training_confusion_matrix = pd.crosstab(Y_train, training_pred)
training_negative_reviews = training_confusion_matrix.iloc[0,0]
training_false_negative_reviews = training_confusion_matrix.iloc[1,0]
training_positive_reviews = training_confusion_matrix.iloc[1,1]
training_false_positive_reviews = training_confusion_matrix.iloc[0,1]

training_sensitivity = training_negative_reviews/(training_negative_reviews + training_false_negative_reviews)
training_specificity = training_positive_reviews/(training_positive_reviews + training_false_positive_reviews)

print('{}% of the negative review predictions were correct in the training set.'.format(round(training_sensitivity*100, 0)))
print('{}% of the positive review predictions were correct in the training set.'.format(round(training_specificity*100, 0)))

test_confusion_matrix = pd.crosstab(Y_test, test_pred)
test_negative_reviews = test_confusion_matrix.iloc[0,0]
test_false_negative_reviews = test_confusion_matrix.iloc[1,0]
test_positive_reviews = test_confusion_matrix.iloc[1,1]
test_false_positive_reviews = test_confusion_matrix.iloc[0,1]

test_sensitivity = test_negative_reviews/(test_negative_reviews + test_false_negative_reviews)
test_specificity = test_positive_reviews/(test_positive_reviews + test_false_positive_reviews)
print('-----------------------------------------------------------------------')
print('{}% of the negative review predictions were correct in the test set.'.format(round(test_sensitivity*100, 0)))
print('{}% of the positive review predictions were correct in the test set.'.format(round(test_specificity*100, 0)))

This model correctly classified 81.0% of the training set.
This model correctly classified 76.0% of the test set.
---------------------------------------------------------
88.0% of the negative review predictions were correct in the training set.
76.0% of the positive review predictions were correct in the training set.
-----------------------------------------------------------------------
83.0% of the negative review predictions were correct in the test set.
71.0% of the positive review predictions were correct in the test set.


The results are very similar to the original model

# Version 6 (using the vaderSentiment library)

In [33]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

vader_pos_sentiment = []
vader_neg_sentiment = []
vader_compound_sentiment = []

analyzer = SentimentIntensityAnalyzer()

for review in yelp_df['review']:
    vader_pos_sentiment.append(analyzer.polarity_scores(review)['pos'])
    vader_neg_sentiment.append(analyzer.polarity_scores(review)['neg'])
    vader_compound_sentiment.append(analyzer.polarity_scores(review)['compound'])

yelp_df['vader_pos_sentiment'] = vader_pos_sentiment
yelp_df['vader_neg_sentiment'] = vader_neg_sentiment
yelp_df['vader_compound_sentiment'] = vader_compound_sentiment

In [35]:
yelp_df['pos_minus_neg'] = yelp_df['vader_pos_sentiment'] - yelp_df['vader_neg_sentiment']

data = yelp_df[['pos_minus_neg']]
target = yelp_df['sentiment']

X = data
Y = target

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.2, random_state=200)

bnb = BernoulliNB()

bnb.fit(X_train, Y_train)

training_pred = bnb.predict(X_train)
test_pred = bnb.predict(X_test)

print('This model correctly classified {}% of the training set.'.format(100*round((Y_train == training_pred).sum()/800,2)))
print('This model correctly classified {}% of the test set.'.format(100*round((Y_test == test_pred).sum()/200,2)))
print('---------------------------------------------------------')

training_confusion_matrix = pd.crosstab(Y_train, training_pred)
training_negative_reviews = training_confusion_matrix.iloc[0,0]
training_false_negative_reviews = training_confusion_matrix.iloc[1,0]
training_positive_reviews = training_confusion_matrix.iloc[1,1]
training_false_positive_reviews = training_confusion_matrix.iloc[0,1]

training_sensitivity = training_negative_reviews/(training_negative_reviews + training_false_negative_reviews)
training_specificity = training_positive_reviews/(training_positive_reviews + training_false_positive_reviews)

print('{}% of the negative review predictions were correct in the training set.'.format(round(training_sensitivity*100, 0)))
print('{}% of the positive review predictions were correct in the training set.'.format(round(training_specificity*100, 0)))

test_confusion_matrix = pd.crosstab(Y_test, test_pred)
test_negative_reviews = test_confusion_matrix.iloc[0,0]
test_false_negative_reviews = test_confusion_matrix.iloc[1,0]
test_positive_reviews = test_confusion_matrix.iloc[1,1]
test_false_positive_reviews = test_confusion_matrix.iloc[0,1]

test_sensitivity = test_negative_reviews/(test_negative_reviews + test_false_negative_reviews)
test_specificity = test_positive_reviews/(test_positive_reviews + test_false_positive_reviews)
print('-----------------------------------------------------------------------')
print('{}% of the negative review predictions were correct in the test set.'.format(round(test_sensitivity*100, 0)))
print('{}% of the positive review predictions were correct in the test set.'.format(round(test_specificity*100, 0)))

This model correctly classified 81.0% of the training set.
This model correctly classified 82.0% of the test set.
---------------------------------------------------------
81.0% of the negative review predictions were correct in the training set.
81.0% of the positive review predictions were correct in the training set.
-----------------------------------------------------------------------
82.0% of the negative review predictions were correct in the test set.
82.0% of the positive review predictions were correct in the test set.


## Results of version 6 model compared to original model
- The model is no longer overfit
- overall accuracy increased to 82% from 76%.
- sensitivity decreased to 81% from 84%. This means the negative keywords helped identify the negative reviews.
- specificity increased tp 82% from 74%. This means the negative keywords hindered indentifying the positive reviews.

Version 6 was the best performing model. It appears the vader sentiment analyzer is the best feature for predicting sentiment.