# Analyze Product Sentiment

In [30]:
import turicreate
import os

# Read product review data

In [31]:
products = turicreate.SFrame('../datasets/amazon_baby.sframe')

## Identify most and least selected words

In [32]:
products['word_count'] = turicreate.text_analytics.count_words(products['review'])

In [33]:
selected_words = ['awesome', 'great', 'fantastic', 'amazing', 'love', 'horrible', 'bad', 'terrible', 'awful', 'wow', 'hate']

for word in selected_words:
    def awesome_count(row):
        if word in row:
            return row[word]
        else:
            return 0
            
    products[word] = products['word_count'].apply(awesome_count)

In [34]:
most_used = (0, '') # most used
least_used = (1000000, '') # least used

for word in selected_words:   
    v = products[word].sum()
    if v > most_used[0]:
        most_used = (v, word)
    if v < least_used[0]:
        least_used = (v, word)
print(most_used)
print(least_used)

(59536.0, 'great')
(461, 'wow')


## Train sentiment classifier

In [35]:
# Train sentiment classifier
products = products[products['rating']!= 3]
products['sentiment'] = products['rating'] >= 4

train_data, test_data = products.random_split(.8, seed=0)

In [36]:
selected_words_model = turicreate.logistic_classifier.create(train_data, target='sentiment', features=selected_words, validation_set=test_data)

## Identify most positive and negative reviews based on model

In [41]:
most_positive = (-1000000,'')
most_negative = (1000000,'')

for c in selected_words_model.coefficients:
    if c['value'] > most_positive[0]:
        most_positive = (c['value'], c['name'])
    if c['value'] < most_negative[0]:
        most_negative = (c['value'], c['name'])        
        
print(most_positive)
print(most_negative)
        

(1.359268866922504, 'love')
(-2.251335236759102, 'horrible')


## Evaluate accuracy of selected_words_model

In [42]:
selected_words_model.evaluate(test_data)['accuracy']

0.8463848186404036

## Accuracy of majority class classifier

In [43]:
test_data[test_data['rating'] > 3].num_rows() / float(test_data.num_rows())

0.8400192169108815

## Apply sentiment classifier

In [52]:
products['predicted_sentiment'] = selected_words_model.predict(products, output_type = 'probability')
products






name,review,rating,word_count,awesome,great
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'and': 3.0, 'love': 1.0, 'it': 3.0, 'highly': ...",0.0,0.0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'and': 2.0, 'quilt': 1.0, 'it': 1.0, ...",0.0,0.0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'and': 3.0, 'ingenious': 1.0, 'love': 2.0, 'is': ...",0.0,0.0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'and': 2.0, 'this': 2.0, 'all': 2.0, 'love': 1.0, ...",0.0,1.0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'and': 2.0, 'cute': 1.0, 'help': 2.0, 'habit': ...",0.0,1.0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'shop': 1.0, 'be': 1.0, 'is': 1.0, 'bound': 1.0, ...",0.0,0.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'and': 2.0, 'all': 1.0, 'right': 1.0, 'able': ...",0.0,0.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'and': 1.0, 'fantastic': 1.0, 'help': 1.0, 'gi ...",0.0,0.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'pre': 1.0, 'all': 1.0, 'standarad': 1.0, ...",0.0,0.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",I love this journal and our nanny uses it ...,4.0,"{'all': 2.0, 'forget': 1.0, 'just': 1.0, 'fo ...",0.0,0.0

fantastic,amazing,love,horrible,bad,terrible,awful,wow,hate,sentiment,predicted_sentiment
0.0,0.0,1.0,0,0.0,0,0,0,0,1,0.93678192448
0.0,0.0,0.0,0,0.0,0,0,0,0,1,0.791928837062
0.0,0.0,2.0,0,0.0,0,0,0,0,1,0.982962080391
0.0,0.0,1.0,0,0.0,0,0,0,0,1,0.972318678886
0.0,0.0,0.0,0,0.0,0,0,0,0,1,0.900218694809
0.0,0.0,0.0,0,0.0,0,0,0,0,1,0.791928837062
0.0,0.0,0.0,0,0.0,0,0,0,0,1,0.791928837062
1.0,0.0,0.0,0,0.0,0,0,0,0,1,0.902242741062
0.0,0.0,0.0,0,0.0,0,0,0,0,1,0.791928837062
0.0,0.0,2.0,0,0.0,0,0,0,0,1,0.982962080391


In [53]:
baby_reviews = products[products['name']== 'Baby Trend Diaper Champ']
baby_reviews = baby_reviews.sort('predicted_sentiment', ascending=False)
baby_reviews[0]

#x = products[products['name']=='Baby Trend Diaper Champ']
#print(x)

{'amazing': 0.0,
 'awesome': 0.0,
 'awful': 0,
 'bad': 0.0,
 'fantastic': 0.0,
 'great': 1.0,
 'hate': 0,
 'horrible': 0,
 'love': 3.0,
 'name': 'Baby Trend Diaper Champ',
 'predicted_sentiment': 0.9981253623335121,
 'rating': 4.0,
 'review': 'I LOVE LOVE LOVE this product! It is SO much easier to use than the Diaper Genie, (you need a PHD in poopy to figure out how to use the darn thing!) and it even takes the same bags as my kitchen trash can, shich is super convenient, and cost efficient as I can buy them in bulk.The only reason for not rating it a 5 star was that I did have one small problem with it. The foam gasket in the barrell which keeps the poopy smell inside the unit ripped somehow, and it got VERY stinky. HOWEVER, I contacted the manufacturer though their website, and received an email back the same day stating that this was unusual, and that replacement gaskets were on their way to me. They arrived inside of a week and after replacing, it works great again! (They even sent