# Analyze Product Sentiment

In [None]:
import turicreate as tc

# Read product review data

In [None]:
products = tc.SFrame('data/amazon_baby.sframe')

# Explore data

In [None]:
products

In [None]:
(products.groupby('name', operations={'count': tc.aggregate.COUNT()})
.sort('count', ascending=False))

# Examine the reviews for the most-reviewed product

In [None]:
giraffe_reviews = products[products['name'] == 'Vulli Sophie the Giraffe Teether']

In [None]:
giraffe_reviews

In [None]:
len(giraffe_reviews)

In [None]:
giraffe_reviews['rating'].show()

# Building a sentiment classifier

## Build word count vectors

In [None]:
products['word_count'] = tc.text_analytics.count_words(products['review'])

In [None]:
products

# Define what is positive and negative sentiment

In [None]:
products['rating'].show()

In [None]:
#ignore all 3*  reviews
products = products[products['rating'] != 3]

In [None]:
#positive sentiment = 4-star or 5-star reviews
products['sentiment'] = products['rating'] >= 4

In [None]:
products

In [None]:
products['sentiment'].show()

# Train our sentiment classifier

In [None]:
train_data, test_data = products.random_split(.8, seed=0)

In [None]:
sentiment_model = tc.logistic_classifier.create(train_data, 
                                                target='sentiment', 
                                                features=['word_count'],
                                                validation_set=test_data)

# Evaluate the sentiment model

In [None]:
ev = sentiment_model.evaluate(test_data)

In [None]:
import matplotlib.pyplot as plt

plt.plot(ev['roc_curve']['fpr'],
         ev['roc_curve']['tpr'],
         color ='cornflowerblue',
         label='ROC Curve class (area = %0.2f)' %ev['auc'])
plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc="lower right")
plt.title('ROC curve')
plt.show()

# Apply the sentiment classifier to better understand the Giraffe reviews

In [None]:
products['predicted_sentiment'] = sentiment_model.predict(products, output_type='probability')

In [None]:
products

In [None]:
giraffe_reviews = products[products['name'] == 'Vulli Sophie the Giraffe Teether']

In [None]:
giraffe_reviews

# Sort the Giraffe reviews according to predicted sentiment

In [None]:
giraffe_reviews = giraffe_reviews.sort('predicted_sentiment', ascending=False)

In [None]:
giraffe_reviews

In [None]:
giraffe_reviews.tail()

## Show the most positive reviews

In [None]:
giraffe_reviews[0]['review']

In [None]:
giraffe_reviews[1]['review']

# Most negative reivews

In [None]:
giraffe_reviews[-1]['review']

In [None]:
giraffe_reviews[-2]['review']