In [1]:
# Run this every time you open the spreadsheet
%load_ext autoreload
%autoreload 2
from collections import Counter
import lib
import random

# Load and inspect the data

In [2]:
# Load the data.
# This function returns reviews and test_reviews, both lists of reviews
reviews, test_reviews = lib.read_data()

# Train a Naive Bayes classifier

In [3]:
prob_positive, token_probs_positive = lib.calc_probs(reviews, "positive")
prob_negative, token_probs_negative = lib.calc_probs(reviews, "negative")

### Most discriminative words

In [4]:
# For each sentiment c, print out the tokens that maximize P(c|token)

prior_probs = {'positive': prob_positive, 'negative': prob_negative}
token_probs = {'positive': token_probs_positive, 'negative': token_probs_negative}

lib.most_discriminative(reviews, token_probs, prior_probs)

MOST DISCRIMINATIVE TOKEN: 

TOKEN                P(positive|token)
pleaser.             1.0000
excellant.           1.0000
bacterium.           1.0000
sync.                1.0000
dependence.          1.0000
b.f.f.               1.0000
hitch.               1.0000
chang's.             1.0000
antioxidents.        1.0000
stingers.            1.0000

TOKEN                P(negative|token)
nonreturnable.       1.0000
3-stars.             1.0000
inexcusable.         1.0000
crone's.             1.0000
martek.              1.0000
2-stars.             1.0000
spiritually.         1.0000
uninspiring.         1.0000
thirstier.           1.0000
unemployment.        1.0000



# Test the Naive Bayes classifier

### Prediction Results

In [5]:
# Generate a list of (review, prediction) pairs
predictions = [(review, lib.classify_nb(review, prior_probs, token_probs)) for review in test_reviews]

In [6]:
# Compare true labels and predicted labels in a table
lib.show_predictions(predictions)

Unnamed: 0,Content,True sentiment,Predicted sentiment
0,"it took me a while to find treats that my cats would eat . these go beyond that for my cats . they actually demand them . i give them several treats in the morning and again in the evening . they keep count , and if they are shorted one or two , they follow me around the house until the issue is resolved . it is nice that they are good dental treats as well ( good checkups at the vet -- maybe these are the reason ) . i switch up the flavors from time to time , all of which they love . i highly recommend them to anyone wanting to treat their pet to a little something .",positive,positive
1,very cute . the dogs loved the taste and humans love the funny dog fortunes . this is made by sojos . i feed my dogs their food which is also great .,positive,positive
2,"great taffy at a great price . there was a wide assortment of yummy taffy . delivery was very quick . if your a taffy lover , this is a deal .",positive,positive
3,"my daughter moved in with her cat . some how , we now have 3 cats , that is , she now has 3 cats . the vet gave us some greenies when we took them in for checkups . i was n't sure what they were and the cats did n't seem to like them at first . that is , until they actually tasted them . that changed things . so far , i have tried the oven roasted chicken , ocean fish , tempting tuna and succulent beef on them . the cats love them and my daughter is angry with me for spoiling the cats .",positive,positive
4,"these candies are flat out delicious ! i 've read all the reviews here and ca n't understand how there could be a 1 star review of this product ! ! just because you personally do n't care for how a product tastes does not lend to a bad review . the fault is your own for purchasing a product do n't even like ! < br / > < br / > as for my review , these candies are awesome ! i personally do n't care for the black raspberry , but i can just give those to my wife ! i 'm really surprised these are n't more expensive as i feel they should be more of a gourmet type candy ! at such a great price for 12 packs , i 'm in candy heaven ! ! < br / > < br / > in fact , i buy 24 at a time to save on shipping to make it even cheaper ! < br / > < br / > if you like raspberry you will love this product . plane and simple .",positive,positive


### F1 score

In [7]:
# Get average F1 score for the test set
lib.evaluate(predictions)

positive
Precision:  97.71392158034132
Recall:  87.57317247287264
F1:  92.3660457980781

negative
Precision:  73.86751735785326
Recall:  94.48871819491119
F1:  82.91521853607162

Average F1:  87.64063216707487


In [8]:
# Get average F1 score for the TRAINING set.
# Compare with average F1 for test set above. What's the reason for the difference?

trainset_predictions = [(review, lib.classify_nb(review, prior_probs, token_probs)) for review in reviews] # maps each training tweet to its predicted label
lib.evaluate(trainset_predictions)

positive
Precision:  99.92474440630679
Recall:  96.95674436207396
F1:  98.41837299641374

negative
Precision:  92.36491996291373
Recall:  99.80205190102595
F1:  95.9395722018205

Average F1:  97.17897259911712


### Confusion matrix

In [9]:
lib.show_confusion_matrix(predictions)

Unnamed: 0,positive,negative
positive,49069,6963
negative,1148,19682


### Visualizing individual tweets

In [10]:
random_review = random.choice(list(test_reviews))
lib.visualize_review(random_review, prior_probs, token_probs)