# Predicting sentiment from product reviews

# Fire up GraphLab Create
(See [Getting Started with SFrames](/notebooks/Week%201/Getting%20Started%20with%20SFrames.ipynb) for setup instructions)

In [21]:
import graphlab

In [22]:
# Limit number of worker processes. This preserves system memory, which prevents hosted notebooks from crashing.
graphlab.set_runtime_config('GRAPHLAB_DEFAULT_NUM_PYLAMBDA_WORKERS', 4)

# Read some product review data

Loading reviews for a set of baby products. 

In [23]:
products = graphlab.SFrame('amazon_baby.gl/')
# len(products)

183531

# Let's explore this data together

Data includes the product name, the review text and the rating of the review. 

In [24]:
products.head()
# products.tail()

name,review,rating
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0


# Build the word count vector for each review

In [25]:
products['word_count'] = graphlab.text_analytics.count_words(products['review'])

In [26]:
products.head()


name,review,rating,word_count
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0,"{'and': 5, '6': 1, 'stink': 1, 'because' ..."
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'and': 3, 'love': 1, 'it': 2, 'highly': 1, ..."
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'and': 2, 'quilt': 1, 'it': 1, 'comfortable': ..."
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'ingenious': 1, 'and': 3, 'love': 2, ..."
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'and': 2, 'parents!!': 1, 'all': 2, 'puppet.': ..."
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'and': 2, 'this': 2, 'her': 1, 'help': 2, ..."
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'shop': 1, 'noble': 1, 'is': 1, 'it': 1, 'as': ..."
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'and': 2, 'all': 1, 'right': 1, 'when': 1, ..."
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'and': 1, 'help': 1, 'give': 1, 'is': 1, ' ..."
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'journal.': 1, 'nanny': 1, 'standarad': 1, ..."


In [8]:
selected_words = ['awesome', 'great', 'fantastic', 'amazing', 'love', 'horrible', 'bad', 'terrible', 'awful', 'wow', 'hate']

In [54]:
def awesome_count(word_count):
    if 'awesome' in word_count:
        return word_count['awesome']
    else:
        return 0

def great_count(word_count):
    if 'great' in word_count:
        return word_count['great']
    else:
        return 0
    
def fantastic_count(word_count):
    if 'fantastic' in word_count:
        return word_count['fantastic']
    else:
        return 0
    
def amazing_count(word_count):
    if 'amazing' in word_count:
        return word_count['amazing']
    else:
        return 0
    
def love_count(word_count):
    if 'love' in word_count:
        return word_count['love']
    else:
        return 0
    
def horrible_count(word_count):
    if 'horrible' in word_count:
        return word_count['horrible']
    else:
        return 0
    
def bad_count(word_count):
    if 'bad' in word_count:
        return word_count['bad']
    else:
        return 0
    
def terrible_count(word_count):
    if 'terrible' in word_count:
        return word_count['terrible']
    else:
        return 0
    
def awful_count(word_count):
    if 'awful' in word_count:
        return word_count['awful']
    else:
        return 0
    
def wow_count(word_count):
    if 'wow' in word_count:
        return word_count['wow']
    else:
        return 0
    
def hate_count(word_count):
    if 'hate' in word_count:
        return word_count['hate']
    else:
        return 0
    
def the_count(word_count):
    if 'the' in word_count:
        return word_count['the']
    else:
        return 0

In [60]:

products['awesome'] = products['word_count'].apply(awesome_count)

products['great'] = products['word_count'].apply(great_count)

products['fantastic'] = products['word_count'].apply(fantastic_count)

products['amazing'] = products['word_count'].apply(amazing_count)

products['love'] = products['word_count'].apply(love_count)

products['horrible'] = products['word_count'].apply(horrible_count)

products['bad'] = products['word_count'].apply(bad_count)

products['terrible'] = products['word_count'].apply(terrible_count)

products['awful'] = products['word_count'].apply(awful_count)

products['wow'] = products['word_count'].apply(wow_count)

products['hate'] = products['word_count'].apply(hate_count)

products['the'] = products['word_count'].apply(the_count)

In [29]:
graphlab.canvas.set_target('ipynb')

In [61]:
products.head()

name,review,rating,word_count,awesome,great,fantastic
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'and': 3, 'love': 1, 'it': 2, 'highly': 1, ...",0,0,0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'and': 2, 'quilt': 1, 'it': 1, 'comfortable': ...",0,0,0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'ingenious': 1, 'and': 3, 'love': 2, ...",0,0,0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'and': 2, 'parents!!': 1, 'all': 2, 'puppet.': ...",0,1,0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'and': 2, 'this': 2, 'her': 1, 'help': 2, ...",0,1,0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'shop': 1, 'noble': 1, 'is': 1, 'it': 1, 'as': ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'and': 2, 'all': 1, 'right': 1, 'when': 1, ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'and': 1, 'help': 1, 'give': 1, 'is': 1, ' ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'journal.': 1, 'nanny': 1, 'standarad': 1, ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",I love this journal and our nanny uses it ...,4.0,"{'all': 1, 'forget': 1, 'just': 1, 'food': 1, ...",0,0,0

amazing,love,horrible,bad,terrible,awful,wow,hate,sentiment,the
0,1,0,0,0,0,0,0,1,0
0,0,0,0,0,0,0,0,1,1
0,2,0,0,0,0,0,0,1,7
0,0,0,0,0,0,0,0,1,0
0,0,0,0,0,0,0,0,1,6
0,0,0,0,0,0,0,0,1,2
0,0,0,0,0,0,0,0,1,2
0,0,0,0,0,0,0,0,1,1
0,0,0,0,0,0,0,0,1,1
0,2,0,0,0,0,0,0,1,9


In [32]:
products.head()

name,review,rating,word_count,awesome,great,fantastic
Planetwise Flannel Wipes,"These flannel wipes are OK, but in my opinion ...",3.0,"{'and': 5, '6': 1, 'stink': 1, 'because' ...",0,0,0
Planetwise Wipe Pouch,it came early and was not disappointed. i love ...,5.0,"{'and': 3, 'love': 1, 'it': 2, 'highly': 1, ...",0,0,0
Annas Dream Full Quilt with 2 Shams ...,Very soft and comfortable and warmer than it ...,5.0,"{'and': 2, 'quilt': 1, 'it': 1, 'comfortable': ...",0,0,0
Stop Pacifier Sucking without tears with ...,This is a product well worth the purchase. I ...,5.0,"{'ingenious': 1, 'and': 3, 'love': 2, ...",0,0,0
Stop Pacifier Sucking without tears with ...,All of my kids have cried non-stop when I tried to ...,5.0,"{'and': 2, 'parents!!': 1, 'all': 2, 'puppet.': ...",0,1,0
Stop Pacifier Sucking without tears with ...,"When the Binky Fairy came to our house, we didn't ...",5.0,"{'and': 2, 'this': 2, 'her': 1, 'help': 2, ...",0,1,0
A Tale of Baby's Days with Peter Rabbit ...,"Lovely book, it's bound tightly so you may no ...",4.0,"{'shop': 1, 'noble': 1, 'is': 1, 'it': 1, 'as': ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",Perfect for new parents. We were able to keep ...,5.0,"{'and': 2, 'all': 1, 'right': 1, 'when': 1, ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",A friend of mine pinned this product on Pinte ...,5.0,"{'and': 1, 'help': 1, 'give': 1, 'is': 1, ' ...",0,0,0
"Baby Tracker&reg; - Daily Childcare Journal, ...",This has been an easy way for my nanny to record ...,4.0,"{'journal.': 1, 'nanny': 1, 'standarad': 1, ...",0,0,0

amazing,love,horrible,bad,terrible,awful,wow,hate
0,0,0,0,0,0,0,0
0,1,0,0,0,0,0,0
0,0,0,0,0,0,0,0
0,2,0,0,0,0,0,0
0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0
0,0,0,0,0,0,0,0


In [34]:
for index in range(len(selected_words)):
    print "%s : %d" % (selected_words[index], products[selected_words[index]].sum())

awesome : 2090
great : 45206
fantastic : 932
amazing : 1363
love : 42065
horrible : 734
bad : 3724
terrible : 748
awful : 383
wow : 144
hate : 1220


# Examining the reviews for most-sold product:  'Vulli Sophie the Giraffe Teether'

In [72]:
giraffe_reviews = products[products['name'] == 'Vulli Sophie the Giraffe Teether']
#conditional within an SFrame, takes a field(column) within a row in that SFrame and returns all the rows that satisfy that conditional 
diaper_champ_reviews = products[products['name'] == 'Baby Trend Diaper Champ']

In [59]:
len(giraffe_reviews)

785

In [73]:
diaper_champ_reviews['rating'].show(view='Categorical')

# Build a sentiment classifier

In [74]:
products['rating'].show(view='Categorical')

## Define what's a positive and a negative sentiment

We will ignore all reviews with rating = 3, since they tend to have a neutral sentiment.  Reviews with a rating of 4 or higher will be considered positive, while the ones with rating of 2 or lower will have a negative sentiment.   

In [75]:
# ignore all 3* reviews
products = products[products['rating'] != 3]

In [76]:
# positive sentiment = 4* or 5* reviews
products['sentiment'] = products['rating'] >=4
#creates a new column 'sentiment' of a binary classifier based on the conditional: 1 if 'rating' >= 4 else 0 

dtype: int
Rows: 166752
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... ]

## Let's train the sentiment classifier

In [67]:
train_data,test_data = products.random_split(.8, seed=0)

In [68]:
sentiment_model = graphlab.logistic_classifier.create(train_data,
                                                     target='sentiment',
                                                     features=['word_count'],
                                                     validation_set=test_data)
selected_words_model = graphlab.logistic_classifier.create(train_data,
                                                     target='sentiment',
                                                     features=selected_words,
                                                     validation_set=test_data)
baseline_model = graphlab.logistic_classifier.create(train_data,
                                                     target='sentiment',
                                                     features=['the'],
                                                     validation_set=test_data)

In [45]:
selected_words_model['coefficients']

name,index,class,value,stderr
(intercept),,1,1.36728315229,0.00861805467824
awesome,,1,1.05800888878,0.110865296265
great,,1,0.883937894898,0.0217379527921
fantastic,,1,0.891303090304,0.154532343591
amazing,,1,0.892802422508,0.127989503231
love,,1,1.39989834302,0.0287147460124
horrible,,1,-1.99651800559,0.0973584169028
bad,,1,-0.985827369929,0.0433603009142
terrible,,1,-2.09049998487,0.0967241912229
awful,,1,-1.76469955631,0.134679803365


# Evaluate the sentiment model

In [69]:
print sentiment_model.evaluate(test_data) #metric='roc_curve')
print selected_words_model.evaluate(test_data)
print baseline_model.evaluate(test_data)

{'f1_score': 0.9500349343413533, 'auc': 0.9446492867438502, 'recall': 0.9477766657134686, 'precision': 0.9523039902309378, 'log_loss': 0.26106698432422715, 'roc_curve': Columns:
	threshold	float
	fpr	float
	tpr	float
	p	int
	n	int

Rows: 100001

Data:
+-----------+----------------+----------------+-------+------+
| threshold |      fpr       |      tpr       |   p   |  n   |
+-----------+----------------+----------------+-------+------+
|    0.0    |      1.0       |      1.0       | 27976 | 5328 |
|   1e-05   | 0.909346846847 | 0.998856162425 | 27976 | 5328 |
|   2e-05   | 0.896021021021 | 0.998748927652 | 27976 | 5328 |
|   3e-05   | 0.886448948949 | 0.998462968259 | 27976 | 5328 |
|   4e-05   | 0.879692192192 | 0.998284243637 | 27976 | 5328 |
|   5e-05   | 0.875187687688 | 0.998212753789 | 27976 | 5328 |
|   6e-05   | 0.872184684685 | 0.998177008865 | 27976 | 5328 |
|   7e-05   | 0.868618618619 | 0.998034029168 | 27976 | 5328 |
|   8e-05   | 0.864677177177 | 0.997998284244 | 27976 |

In [46]:
sentiment_model.show(view='Evaluation')

# Applying the learned model to understand sentiment for Giraffe

In [77]:
giraffe_reviews['predicted_sentiment'] = sentiment_model.predict(giraffe_reviews, output_type='probability')
diaper_champ_reviews['predicted_sentiment'] = sentiment_model.predict(diaper_champ_reviews, output_type = 'probability')

In [73]:
giraffe_reviews.head()

name,review,rating,word_count,predicted_sentiment
Vulli Sophie the Giraffe Teether ...,He likes chewing on all the parts especially the ...,5.0,"{'and': 1, 'all': 1, 'because': 1, 'it': 1, ...",0.999513023521
Vulli Sophie the Giraffe Teether ...,My son loves this toy and fits great in the diaper ...,5.0,"{'and': 1, 'right': 1, 'help': 1, 'just': 1, ...",0.999320678306
Vulli Sophie the Giraffe Teether ...,There really should be a large warning on the ...,1.0,"{'and': 2, 'all': 1, 'would': 1, 'latex.': 1, ...",0.013558811687
Vulli Sophie the Giraffe Teether ...,All the moms in my moms' group got Sophie for ...,5.0,"{'and': 2, 'one!': 1, 'all': 1, 'love': 1, ...",0.995769474148
Vulli Sophie the Giraffe Teether ...,I was a little skeptical on whether Sophie was ...,5.0,"{'and': 3, 'all': 1, 'months': 1, 'old': 1, ...",0.662374415673
Vulli Sophie the Giraffe Teether ...,I have been reading about Sophie and was going ...,5.0,"{'and': 6, 'seven': 1, 'already': 1, 'love': 1, ...",0.999997148186
Vulli Sophie the Giraffe Teether ...,My neice loves her sophie and has spent hours ...,5.0,"{'and': 4, 'drooling,': 1, 'love': 1, ...",0.989190989536
Vulli Sophie the Giraffe Teether ...,What a friendly face! And those mesmerizing ...,5.0,"{'and': 3, 'chew': 1, 'be': 1, 'is': 1, ...",0.999563518413
Vulli Sophie the Giraffe Teether ...,We got this just for my son to chew on instea ...,5.0,"{'chew': 2, 'seemed': 1, 'because': 1, 'about.': ...",0.970160542725
Vulli Sophie the Giraffe Teether ...,"My baby seems to like this toy, but I could ...",3.0,"{'and': 2, 'already': 1, 'some': 1, 'it': 3, ...",0.195367644588


## Sort the reviews based on the predicted sentiment and explore

In [78]:
giraffe_reviews = giraffe_reviews.sort('predicted_sentiment', ascending=False)
diaper_champ_reviews = diaper_champ_reviews.sort('predicted_sentiment', ascending=False)

In [79]:
diaper_champ_reviews.head()

name,review,rating,word_count,awesome,great,fantastic
Baby Trend Diaper Champ,Baby Luke can turn a clean diaper to a dirty ...,5.0,"{'all': 1, 'less': 1, ""friend's"": 1, '(which': ...",0,0,0
Baby Trend Diaper Champ,I LOOOVE this diaper pail! Its the easies ...,5.0,"{'just': 1, 'over': 1, 'rweek': 1, 'sooo': 1, ...",0,0,0
Baby Trend Diaper Champ,We researched all of the different types of di ...,4.0,"{'all': 2, 'just': 4, ""don't"": 2, 'one,': 1, ...",0,0,0
Baby Trend Diaper Champ,My baby is now 8 months and the can has been ...,5.0,"{""don't"": 1, 'able': 2, 'over': 1, 'soon': 1, ...",0,2,0
Baby Trend Diaper Champ,"This is absolutely, by far, the best diaper ...",5.0,"{'just': 3, 'money': 1, 'still': 3, 'fine': 1, ...",0,0,0
Baby Trend Diaper Champ,Diaper Champ or Diaper Genie? That was my ...,5.0,"{'son': 2, 'all': 1, 'bags.': 1, 'son,': 1, ...",0,0,0
Baby Trend Diaper Champ,Wow! This is fabulous. It was a toss-up between ...,5.0,"{'and': 4, 'this': 3, 'stink': 1, 'garbage' ...",0,0,0
Baby Trend Diaper Champ,I originally put this item on my baby registry ...,5.0,"{'lysol': 1, 'all': 2, 'bags.': 1, 'feedback': ...",0,0,0
Baby Trend Diaper Champ,Two girlfriends and two family members put me ...,5.0,"{'just': 1, '-': 3, 'both': 1, 'results': 1, ...",0,0,0
Baby Trend Diaper Champ,I am one of those super- critical shoppers who ...,5.0,"{'all': 1, 'humid': 1, 'just': 1, 'less': 1, ...",0,0,0

amazing,love,horrible,bad,terrible,awful,wow,hate,sentiment,the,predicted_sentiment
0,0,0,0,0,0,0,0,1,17,0.999999937267
0,1,0,0,0,0,0,0,1,14,0.999999917406
0,0,0,1,0,0,0,0,1,13,0.999999899509
0,0,0,1,0,0,0,0,1,10,0.999999836182
0,2,0,0,0,0,0,0,1,14,0.999999824745
0,0,0,0,0,0,0,0,1,7,0.999999759315
0,0,0,0,0,0,0,0,1,9,0.999999692111
0,0,0,0,0,0,0,0,1,20,0.999999642488
0,0,1,0,0,0,0,0,1,21,0.999999604504
0,1,0,0,0,0,0,0,1,25,0.999999486804


## Most positive reviews for the giraffe

In [81]:
selected_words_model.predict(diaper_champ_reviews[0:1], output_type = 'probability')

dtype: float
Rows: 1
[0.796940851290673]

In [83]:
diaper_champ_reviews[0:1]['review']

dtype: str
Rows: 1
['Baby Luke can turn a clean diaper to a dirty diaper in 3 seconds flat. The diaper champ turns the smelly diaper into "what diaper smell" in less time than that. I hesitated and wondered what I REALLY needed for the nursery. This is one of the best purchases we made. The champ, the baby bjorn, fluerville diaper bag, and graco pack and play bassinet all vie for the best baby purchase.Great product, easy to use, economical, effective, absolutly fabulous.UpdateI knew that I loved the champ, and useing the diaper genie at a friend's house REALLY reinforced that!! There is no comparison, the chanp is easy and smell free, the genie was difficult to use one handed (which is absolutly vital if you have a little one on a changing pad) and there was a deffinite odor eminating from the genieplus we found that the quick tie garbage bags where the ties are integrated into the bag work really well because there isn't any added bulk around the sealing edge of the champ.']

## Show most negative reviews for giraffe

In [84]:
diaper_champ_reviews[0:1]

name,review,rating,word_count,awesome,great,fantastic
Baby Trend Diaper Champ,Baby Luke can turn a clean diaper to a dirty ...,5.0,"{'all': 1, 'less': 1, ""friend's"": 1, '(which': ...",0,0,0

amazing,love,horrible,bad,terrible,awful,wow,hate,sentiment,the,predicted_sentiment
0,0,0,0,0,0,0,0,1,17,0.999999937267


In [50]:
giraffe_reviews[-2]['review']

"This children's toy is nostalgic and very cute. However, there is a distinct rubber smell and a very odd taste, yes I tried it, that my baby did not enjoy. Also, if it is soiled it is extremely difficult to clean as the rubber is a kind of porus material and does not clean well. The final thing is the squeaking device inside which stopped working after the first couple of days. I returned this item feeling I had overpaid for a toy that was defective and did not meet my expectations. Please do not be swayed by the cute packaging and hype surounding it as I was. One more thing, I was given a full refund from Amazon without any problem."