# Analyze Product Sentiment

In [1]:
import turicreate as tc

# Read some product review data

In [2]:
products = tc.SFrame('amazon_baby.sframe')
products

name,review,rating
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0


# Data Exploration

### Build the word count vector for each review

In [3]:
products['word_count'] = tc.text_analytics.count_words(products['review'])
products

name,review,rating,word_count
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0,"{'handles': 1.0, 'stripping': 1.0, ..."
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'recommend': 1.0, 'disappointed': 1.0, ..."
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'quilt': 1.0, 'the': 1.0, 'than': 1.0, 'fu ..."
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'tool': 1.0, 'clever': 1.0, 'binky': 2.0, ..."
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'rock': 1.0, 'many': 1.0, 'headaches': 1.0, ..."
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'thumb': 1.0, 'or': 1.0, 'break': 1.0, 'trying': ..."
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'for': 1.0, 'barnes': 1.0, 'at': 1.0, 'is': ..."
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'right': 1.0, 'because': 1.0, 'questions': 1.0, ..."
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'like': 1.0, 'and': 1.0, 'changes': 1.0, 'the': ..."
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'in': 1.0, 'pages': 1.0, 'out': 1.0, 'run': 1.0, ..."


### Most popular product

In [4]:
products.groupby('name', operations={'count': tc.aggregate.COUNT()}).sort('count', ascending=False)

name,count
Vulli Sophie the Giraffe Teether ...,785
"Simple Wishes Hands-Free Breastpump Bra, Pink, ...",562
Infant Optics DXR-5 2.4 GHz Digital Video Baby ...,561
Baby Einstein Take Along Tunes ...,547
Cloud b Twilight Constellation Night ...,520
"Fisher-Price Booster Seat, Blue/Green/Gray ...",489
Fisher-Price Rainforest Jumperoo ...,450
"Graco Nautilus 3-in-1 Car Seat, Matrix ...",419
Leachco Snoogle Total Body Pillow ...,388
"Regalo Easy Step Walk Thru Gate, White ...",374


### Explore Vulli Sophie

In [5]:
giraffe_reviews = products[products['name'] == 'Vulli Sophie the Giraffe Teether']
giraffe_reviews

name,review,rating,word_count
Vulli Sophie the Giraffe Teether ...,He likes chewing on all the parts especially the ...,5.0,"{'purchase': 1.0, 'teething': 1.0, ..."
Vulli Sophie the Giraffe Teether ...,My son loves this toy and fits great in the diaper ...,5.0,"{'a': 1.0, 'is': 1.0, 'when': 1.0, 'him': 1.0, ..."
Vulli Sophie the Giraffe Teether ...,There really should be a large warning on the ...,1.0,"{'made': 1.0, 'of': 1.0, 'packaging': 1.0, 'no': ..."
Vulli Sophie the Giraffe Teether ...,All the moms in my moms' group got Sophie for ...,5.0,"{'another': 1.0, 'out': 1.0, 'run': 1.0, 'lost': ..."
Vulli Sophie the Giraffe Teether ...,I was a little skeptical on whether Sophie was ...,5.0,"{'disappointed': 1.0, 'will': 1.0, 'take': ..."
Vulli Sophie the Giraffe Teether ...,I have been reading about Sophie and was going ...,5.0,"{'late': 1.0, 'perfect': 1.0, 'pack': 1.0, 'on ..."
Vulli Sophie the Giraffe Teether ...,My neice loves her sophie and has spent hours ...,5.0,"{'delight': 1.0, 'in': 1.0, 'other': 1.0, ..."
Vulli Sophie the Giraffe Teether ...,What a friendly face! And those mesmerizing ...,5.0,"{'inside': 1.0, 'water': 1.0, 'don': 1.0, 'up': ..."
Vulli Sophie the Giraffe Teether ...,We got this just for my son to chew on instea ...,5.0,"{'its': 1.0, 'fine': 1.0, 'is': 1.0, 'which': 1.0, ..."
Vulli Sophie the Giraffe Teether ...,"My baby seems to like this toy, but I could ...",3.0,"{'off': 1.0, 'have': 2.0, 'of': 1.0, 'some': 1.0, ..."


In [6]:
giraffe_reviews['rating'].show()

# Build a sentiment classifier

### Define positive and negative sentiment

In [7]:
# ignore all 3-star reviews
products = products[products['rating'] != 3]

# positive sentiment = 4-star or 5-star reviews
products['sentiment'] = products['rating'] >= 4

### Build word count vector

In [8]:
products['word_count'] = tc.text_analytics.count_words(products['review'])
products

name,review,rating,word_count,sentiment
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'recommend': 1.0, 'disappointed': 1.0, ...",1
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'quilt': 1.0, 'the': 1.0, 'than': 1.0, 'fu ...",1
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'tool': 1.0, 'clever': 1.0, 'binky': 2.0, ...",1
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'rock': 1.0, 'many': 1.0, 'headaches': 1.0, ...",1
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'thumb': 1.0, 'or': 1.0, 'break': 1.0, 'trying': ...",1
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'for': 1.0, 'barnes': 1.0, 'at': 1.0, 'is': ...",1
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'right': 1.0, 'because': 1.0, 'questions': 1.0, ...",1
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'like': 1.0, 'and': 1.0, 'changes': 1.0, 'the': ...",1
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'in': 1.0, 'pages': 1.0, 'out': 1.0, 'run': 1.0, ...",1
"Baby Tracker&reg; - Daily Childcare Journal, ...",I love this journal and our nanny uses it ...,4.0,"{'tracker': 1.0, 'now': 1.0, 'its': 1.0, 'sti ...",1


### Let's train the sentiment classifier

In [9]:
train_data, test_data = products.random_split(.8, seed=0)

In [10]:
sentiment_model = tc.logistic_classifier.create(train_data, 
                                                target='sentiment', 
                                                features=['word_count'],
                                                validation_set=test_data)

# Evaluate the sentiment model

In [11]:
sentiment_model.evaluate(test_data, metric='roc_curve')

{'roc_curve': Columns:
 	threshold	float
 	fpr	float
 	tpr	float
 	p	int
 	n	int
 
 Rows: 1001
 
 Data:
 +-----------+--------------------+--------------------+-------+------+
 | threshold |        fpr         |        tpr         |   p   |  n   |
 +-----------+--------------------+--------------------+-------+------+
 |    0.0    |        1.0         |        1.0         | 27976 | 5328 |
 |   0.001   | 0.698948948948949  | 0.9933514440949385 | 27976 | 5328 |
 |   0.002   | 0.6619744744744744 | 0.9922076065198742 | 27976 | 5328 |
 |   0.003   | 0.6385135135135135 | 0.9909922790963683 | 27976 | 5328 |
 |   0.004   |  0.62256006006006  | 0.9903846153846154 | 27976 | 5328 |
 |   0.005   | 0.6112987987987988 | 0.989848441521304  | 27976 | 5328 |
 |   0.006   | 0.6002252252252253 | 0.9894195024306548 | 27976 | 5328 |
 |   0.007   | 0.589527027027027  | 0.9888475836431226 | 27976 | 5328 |
 |   0.008   | 0.5816441441441441 | 0.9883828996282528 | 27976 | 5328 |
 |   0.009   | 0.574512012012012

# Applying the learned model to understand sentiment for Giraffe

In [12]:
giraffe_reviews['predicted_sentiment'] = sentiment_model.predict(giraffe_reviews, output_type='probability')

In [13]:
giraffe_reviews.head()

name,review,rating,word_count,predicted_sentiment
Vulli Sophie the Giraffe Teether ...,He likes chewing on all the parts especially the ...,5.0,"{'purchase': 1.0, 'teething': 1.0, ...",0.9993655365682544
Vulli Sophie the Giraffe Teether ...,My son loves this toy and fits great in the diaper ...,5.0,"{'a': 1.0, 'is': 1.0, 'when': 1.0, 'him': 1.0, ...",0.9998633791689672
Vulli Sophie the Giraffe Teether ...,There really should be a large warning on the ...,1.0,"{'made': 1.0, 'of': 1.0, 'packaging': 1.0, 'no': ...",0.2545268197490874
Vulli Sophie the Giraffe Teether ...,All the moms in my moms' group got Sophie for ...,5.0,"{'another': 1.0, 'out': 1.0, 'run': 1.0, 'lost': ...",0.9165688083895838
Vulli Sophie the Giraffe Teether ...,I was a little skeptical on whether Sophie was ...,5.0,"{'disappointed': 1.0, 'will': 1.0, 'take': ...",0.6855768205778352
Vulli Sophie the Giraffe Teether ...,I have been reading about Sophie and was going ...,5.0,"{'late': 1.0, 'perfect': 1.0, 'pack': 1.0, 'on ...",0.99999994452112
Vulli Sophie the Giraffe Teether ...,My neice loves her sophie and has spent hours ...,5.0,"{'delight': 1.0, 'in': 1.0, 'other': 1.0, ...",0.9979351181092758
Vulli Sophie the Giraffe Teether ...,What a friendly face! And those mesmerizing ...,5.0,"{'inside': 1.0, 'water': 1.0, 'don': 1.0, 'up': ...",0.9999745004834412
Vulli Sophie the Giraffe Teether ...,We got this just for my son to chew on instea ...,5.0,"{'its': 1.0, 'fine': 1.0, 'is': 1.0, 'which': 1.0, ...",0.9460144428329624
Vulli Sophie the Giraffe Teether ...,"My baby seems to like this toy, but I could ...",3.0,"{'off': 1.0, 'have': 2.0, 'of': 1.0, 'some': 1.0, ...",0.3830113614133147


# Sort the reviews based on the predicted sentiment and explore

In [14]:
giraffe_reviews = giraffe_reviews.sort('predicted_sentiment', ascending=False)
giraffe_reviews.head()

name,review,rating,word_count,predicted_sentiment
Vulli Sophie the Giraffe Teether ...,I'll be honest...I bought this toy because all the ...,4.0,"{'around': 1.0, 'explore': 1.0, 'they': ...",1.0
Vulli Sophie the Giraffe Teether ...,As a mother of 16month old twins; I bought ...,5.0,"{'will': 1.0, '15months': 1.0, 'would': 2.0, ...",1.0
Vulli Sophie the Giraffe Teether ...,"Sophie, oh Sophie, your time has come. My ...",5.0,"{'11': 1.0, 'prisrob': 1.0, '12': 1.0, 'who': ...",1.0
Vulli Sophie the Giraffe Teether ...,We got this little giraffe as a gift from a ...,5.0,"{'out': 1.0, 've': 1.0, 'would': 1.0, 'enough': ...",0.9999999999998376
Vulli Sophie the Giraffe Teether ...,"As every mom knows, you always want to give your ...",5.0,"{'whether': 1.0, 'neutral': 1.0, 'gend ...",0.9999999999998284
Vulli Sophie the Giraffe Teether ...,My Mom-in-Law bought Sophie for my son whe ...,5.0,"{'penny': 1.0, 'little': 1.0, 'perfect': 1.0, ...",0.9999999999997958
Vulli Sophie the Giraffe Teether ...,"My 4 month old son is teething, and I've tried ...",4.0,"{'worth': 1.0, 'works': 1.0, 'teether': 1.0, ...",0.9999999999994914
Vulli Sophie the Giraffe Teether ...,Let me just start off by addressing the choking ...,5.0,"{'question': 1.0, 'must': 1.0, 'overall': 1.0, ...",0.9999999999941254
Vulli Sophie the Giraffe Teether ...,I'm not sure why Sophie is such a hit with the ...,4.0,"{'makers': 1.0, 'or': 1.0, 'take': 1.0, 'can': ...",0.999999999987423
Vulli Sophie the Giraffe Teether ...,"I admit, I didn't get Sophie the Giraffe at ...",4.0,"{'dye': 1.0, 'of': 1.0, 'cause': 1.0, 'fade': ...",0.9999999999829476


In [15]:
giraffe_reviews[1]['review']

"As a mother of 16month old twins; I bought Sophie [1 for each, of course] when they were 4 months old after careful reading of all reviews. I heard great things about Sophie and wanted to give her a try. At 4 months babies can't do much more than grasp and semi gnaw on Sophie. For many months I had to squeeze Sophie myself [which I personally enjoyed] and set it on their laps. They LOVED Sophie. The squeak is LOUD and sounds exactly like a dog's squeaky chew toy, just for those who are wondering.As they grew and their motor skills developed to each milestone, Sophie gained more and more individual babytime. The twins were able to squeeze her themselves and chew on her around the clock. They love to throw her, stretch her, squeeze her, chew on her, drool on her... you name it, they have done it. One of the two Sophie's took an extended vacation out in the back yard [unbeknownst to me] and once found, a little water had her looking like a champ again... ready to face another day of play

In [16]:
giraffe_reviews[-1]['review']

"This children's toy is nostalgic and very cute. However, there is a distinct rubber smell and a very odd taste, yes I tried it, that my baby did not enjoy. Also, if it is soiled it is extremely difficult to clean as the rubber is a kind of porus material and does not clean well. The final thing is the squeaking device inside which stopped working after the first couple of days. I returned this item feeling I had overpaid for a toy that was defective and did not meet my expectations. Please do not be swayed by the cute packaging and hype surounding it as I was. One more thing, I was given a full refund from Amazon without any problem."

# Quiz

In [17]:
products = tc.SFrame('amazon_baby.sframe')
products['sentiment'] = products['rating'] >= 4
selected_words = ['awesome', 'great', 'fantastic', 'amazing', 'love', 'horrible', 'bad', 'terrible', 'awful', 'wow', 'hate']

In [18]:
def count_word(review, key):
    count = 0
    for word in review.split(' '):
        if word == key:
            count += 1
    return count

for key in selected_words:
    products[key] = products['review'].apply(lambda review: count_word(review, key))
    
products

name,review,rating,sentiment,awesome,great,fantastic,amazing
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0,0,0,0,0,0
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,1,0,0,0,0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,1,0,0,0,0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,1,0,0,0,0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,1,0,1,0,0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,1,0,1,0,0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,1,0,0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,1,0,0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,1,0,0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,1,0,0,0,0

love,horrible,bad,terrible,awful,wow,hate
0,0,0,0,0,0,0
1,0,0,0,0,0,0
0,0,0,0,0,0,0
2,0,0,0,0,0,0
0,0,0,0,0,0,0
0,0,0,0,0,0,0
0,0,0,0,0,0,0
0,0,0,0,0,0,0
0,0,0,0,0,0,0
0,0,0,0,0,0,0


In [19]:
train_data, test_data = products.random_split(.8, seed=0)

In [20]:
selected_words_model = tc.logistic_classifier.create(train_data,
                                                target='sentiment',
                                                features=selected_words,
                                                validation_set=test_data)

In [21]:
selected_words_model.coefficients
                                                

name,index,class,value,stderr
(intercept),,1,0.98060077603295,0.0069997921618984
awesome,,1,0.8597672625274816,0.0898892741591317
great,,1,0.5671612346628275,0.0165890922057549
fantastic,,1,0.6183110330162787,0.1189229293812324
amazing,,1,0.8894556897621998,0.1099420760843797
love,,1,1.0740643189855907,0.021695384733323
horrible,,1,-1.6557192293910517,0.0977867733719431
bad,,1,-0.8696413682945263,0.0384457745332442
terrible,,1,-1.5919667397191406,0.0940640518769197
awful,,1,-1.3503262418878448,0.1314515997598642


In [22]:
selected_words_model.evaluate(test_data)

{'accuracy': 0.7669757294791383,
 'auc': 0.6143080582943469,
 'confusion_matrix': Columns:
 	target_label	int
 	predicted_label	int
 	count	int
 
 Rows: 4
 
 Data:
 +--------------+-----------------+-------+
 | target_label | predicted_label | count |
 +--------------+-----------------+-------+
 |      0       |        0        |  266  |
 |      1       |        0        |  146  |
 |      0       |        1        |  8399 |
 |      1       |        1        | 27859 |
 +--------------+-----------------+-------+
 [4 rows x 3 columns],
 'f1_score': 0.8670307953254595,
 'log_loss': 0.5233959542578556,
 'precision': 0.7683545700259253,
 'recall': 0.9947866452419211,
 'roc_curve': Columns:
 	threshold	float
 	fpr	float
 	tpr	float
 	p	int
 	n	int
 
 Rows: 1001
 
 Data:
 +-----------+-----+-----+-------+------+
 | threshold | fpr | tpr |   p   |  n   |
 +-----------+-----+-----+-------+------+
 |    0.0    | 1.0 | 1.0 | 28005 | 8665 |
 |   0.001   | 1.0 | 1.0 | 28005 | 8665 |
 |   0.002   | 1

In [23]:
count = {}
for key in selected_words:
    count[key] = sum(products[key])
    

In [24]:
max_count = 0
max_key = ""
for key, val in count.items():
    if val > max_count:
        max_count = val
        max_key = key
print(max_key, max_count)

great 37056


In [25]:
min_count = 99999999999
min_key = ""
for key, val in count.items():
    if val < min_count:
        min_count = val
        min_key = key
print(min_key, min_count)

wow 54


In [26]:
selected_words_model.coefficients.sort('value', ascending=False).print_rows(num_rows=20)

+-------------+-------+-------+---------------------+----------------------+
|     name    | index | class |        value        |        stderr        |
+-------------+-------+-------+---------------------+----------------------+
|     love    |  None |   1   |  1.0740643189855907 | 0.02169538473332301  |
| (intercept) |  None |   1   |   0.98060077603295  | 0.006999792161898424 |
|   amazing   |  None |   1   |  0.8894556897621998 | 0.10994207608437971  |
|   awesome   |  None |   1   |  0.8597672625274816 | 0.08988927415913178  |
|  fantastic  |  None |   1   |  0.6183110330162787 | 0.11892292938123247  |
|    great    |  None |   1   |  0.5671612346628275 | 0.01658909220575496  |
|     wow     |  None |   1   | 0.19604191867674337 | 0.39796988190066035  |
|     bad     |  None |   1   | -0.8696413682945263 | 0.03844577453324423  |
|     hate    |  None |   1   | -1.2004160084332827 | 0.07162960956147725  |
|    awful    |  None |   1   | -1.3503262418878448 |  0.1314515997598642  |

In [27]:
products = products[products['rating'] != 3]
products['word_count'] = tc.text_analytics.count_words(products['review'])
train_data, test_data = products.random_split(.8, seed=0)
sentiment_model = tc.logistic_classifier.create(train_data, 
                                                target='sentiment', 
                                                features=['word_count'],
                                                validation_set=test_data)
selected_words_model = tc.logistic_classifier.create(train_data,
                                                target='sentiment',
                                                features=selected_words,
                                                validation_set=test_data)


In [28]:
print(sentiment_model.evaluate(test_data)['accuracy'])
print(selected_words_model.evaluate(test_data)['accuracy'])

0.9176975738650012
0.8418208023060293


In [29]:
sentiment_model.coefficients.sort('value', ascending=True).print_rows(num_rows=20)

+------------+---------------+-------+---------------------+--------+
|    name    |     index     | class |        value        | stderr |
+------------+---------------+-------+---------------------+--------+
| word_count |   transpired  |   1   |  -30.43220274792726 |  None  |
| word_count |    themobi    |   1   | -29.792418298033866 |  None  |
| word_count |    cobbbler   |   1   | -28.871157357028306 |  None  |
| word_count |    hahaaaa    |   1   | -28.370984587413886 |  None  |
| word_count |  wheelbarrow  |   1   | -28.291157373142198 |  None  |
| word_count |   athlectic   |   1   |  -28.0682080953371  |  None  |
| word_count |     than1     |   1   | -26.799285044557525 |  None  |
| word_count |    resewing   |   1   | -26.694738766222038 |  None  |
| word_count |     anyow     |   1   | -26.627965022738735 |  None  |
| word_count |   swadddlers  |   1   | -26.591800031830868 |  None  |
| word_count |     strage    |   1   | -25.347384615193604 |  None  |
| word_count |     s

In [30]:
def readFile(fileName):
        fileObj = open(fileName, "r") #opens the file in read mode
        words = fileObj.read().splitlines() #puts the file into an array
        fileObj.close()
        return dict.fromkeys(words)

In [31]:
positive_words = readFile('positive-words.txt')
negative_words = readFile('negative-words.txt')

In [32]:
def count_words(r, d):
    count = 0
    for w in r.split(' '):
        if w in d:
            count += 1
    return count

products['count_pos'] = products['review'].apply(lambda x: count_words(x, positive_words))
products['count_neg'] = products['review'].apply(lambda x: count_words(x, negative_words))

In [33]:

products

name,review,rating,sentiment,awesome,great,fantastic,amazing
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,1,0,0,0,0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,1,0,0,0,0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,1,0,0,0,0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,1,0,1,0,0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,1,0,1,0,0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,1,0,0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,1,0,0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,1,0,0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,1,0,0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",I love this journal and our nanny uses it ...,4.0,1,0,0,0,0

love,horrible,bad,terrible,awful,wow,hate,word_count,count_pos,count_neg
1,0,0,0,0,0,0,"{'recommend': 1.0, 'disappointed': 1.0, ...",3,0
0,0,0,0,0,0,0,"{'quilt': 1.0, 'the': 1.0, 'than': 1.0, 'fu ...",4,0
2,0,0,0,0,0,0,"{'tool': 1.0, 'clever': 1.0, 'binky': 2.0, ...",9,1
0,0,0,0,0,0,0,"{'rock': 1.0, 'many': 1.0, 'headaches': 1.0, ...",3,0
0,0,0,0,0,0,0,"{'thumb': 1.0, 'or': 1.0, 'break': 1.0, 'trying': ...",6,2
0,0,0,0,0,0,0,"{'for': 1.0, 'barnes': 1.0, 'at': 1.0, 'is': ...",0,0
0,0,0,0,0,0,0,"{'right': 1.0, 'because': 1.0, 'questions': 1.0, ...",2,0
0,0,0,0,0,0,0,"{'like': 1.0, 'and': 1.0, 'changes': 1.0, 'the': ...",0,0
0,0,0,0,0,0,0,"{'in': 1.0, 'pages': 1.0, 'out': 1.0, 'run': 1.0, ...",2,0
2,0,0,0,0,0,0,"{'tracker': 1.0, 'now': 1.0, 'its': 1.0, 'sti ...",6,1


In [34]:
products['baseline'] = products.apply(lambda x: x['count_pos'] > x['count_neg'])

In [35]:
print(products[products['baseline'] == products['sentiment']].num_rows()/products.num_rows())

0.7693101132220304


In [36]:
diaper_reviews = products[products['name'] == 'Baby Trend Diaper Champ']
diaper_reviews['predicted_sentiment'] = sentiment_model.predict(diaper_reviews, output_type='probability')
diaper_reviews.sort('predicted_sentiment', ascending=False).print_rows(num_rows=5)

+-------------------------+-------------------------------+--------+-----------+
|           name          |             review            | rating | sentiment |
+-------------------------+-------------------------------+--------+-----------+
| Baby Trend Diaper Champ | I read a review below that... |  4.0   |     1     |
| Baby Trend Diaper Champ | I have never written a rev... |  5.0   |     1     |
| Baby Trend Diaper Champ | I originally put this item... |  5.0   |     1     |
| Baby Trend Diaper Champ | Baby Luke can turn a clean... |  5.0   |     1     |
| Baby Trend Diaper Champ | Diaper Champ or Diaper Gen... |  5.0   |     1     |
+-------------------------+-------------------------------+--------+-----------+
+---------+-------+-----------+---------+------+----------+-----+----------+-------+
| awesome | great | fantastic | amazing | love | horrible | bad | terrible | awful |
+---------+-------+-----------+---------+------+----------+-----+----------+-------+
|    0    |   0 

In [37]:
diaper_reviews = products[products['name'] == 'Baby Trend Diaper Champ']
diaper_reviews['predicted_sentiment'] = selected_words_model.predict(diaper_reviews, output_type='probability')
diaper_reviews.sort('predicted_sentiment', ascending=False).print_rows(num_rows=5)


+-------------------------+-------------------------------+--------+-----------+
|           name          |             review            | rating | sentiment |
+-------------------------+-------------------------------+--------+-----------+
| Baby Trend Diaper Champ | I received my Diaper Champ... |  5.0   |     1     |
| Baby Trend Diaper Champ | I love this diaper pale an... |  5.0   |     1     |
| Baby Trend Diaper Champ | I love this diaper pail. I... |  4.0   |     1     |
| Baby Trend Diaper Champ | I've worked with kids more... |  5.0   |     1     |
| Baby Trend Diaper Champ | I have a two-year-old son ... |  5.0   |     1     |
+-------------------------+-------------------------------+--------+-----------+
+---------+-------+-----------+---------+------+----------+-----+----------+-------+
| awesome | great | fantastic | amazing | love | horrible | bad | terrible | awful |
+---------+-------+-----------+---------+------+----------+-----+----------+-------+
|    0    |   0 

In [38]:
selected_words_model.coefficients.print_rows(num_rows=20)

+-------------+-------+-------+----------------------+----------------------+
|     name    | index | class |        value         |        stderr        |
+-------------+-------+-------+----------------------+----------------------+
| (intercept) |  None |   1   |  1.4508720017816668  |  0.0084384785855882  |
|   awesome   |  None |   1   |  0.8752716575545818  | 0.11388296333725201  |
|    great    |  None |   1   |  0.7428343911447416  | 0.022531705253868255 |
|  fantastic  |  None |   1   |  0.7484721044249082  | 0.15500133746022535  |
|   amazing   |  None |   1   |  0.8774493635313587  |  0.1359880711494705  |
|     love    |  None |   1   |   1.3159105199755    | 0.030766002897234822 |
|   horrible  |  None |   1   | -1.8649709697226837  | 0.10145679016304061  |
|     bad     |  None |   1   |  -0.945237661836772  | 0.04355094018526647  |
|   terrible  |  None |   1   | -1.9359996801606387  |  0.0994189529807115  |
|    awful    |  None |   1   | -1.6819776853094814  |  0.138671

In [39]:
diaper_reviews.sort('predicted_sentiment', ascending=True)[3]['review']

"I chose the Diaper Champ because you are able to use standard trash bags.  I hate to buy replacement items such as bags ( i usually forget).  I works great.  The only thing is when it is full it is full.  It is very difficult to squeeze anything else into it.  I have not noticed any lingering oder in the baby's room and would recommend this item."

In [44]:
print(selected_words_model.predict(diaper_reviews, output_type='probability').sort(ascending=False))

[0.995497471975126, 0.9859703218937612, 0.9834176176408542, 0.9834176176408542, 0.9834176176408542, 0.9834176176408542, 0.9834176176408542, 0.9834176176408542, 0.9834176176408542, 0.9709601643878867, 0.9709601643878867, 0.9709601643878867, 0.9709601643878867, 0.9709601643878867, 0.9709601643878867, 0.9709601643878867, 0.9709601643878867, 0.9709601643878867, 0.9709601643878867, 0.949623499536843, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.940854195420995, 0.9408541954