# Analyzing product sentiment

In [None]:
import graphlab

# Read some product review data

In [None]:
products = graphlab.SFrame('amazon_baby.gl')

# Let's explore this data together

In [None]:
products.head()

# Build word count vector for each review

In [None]:
products['word_count'] = graphlab.text_analytics.count_words(products['review'])

In [None]:
products.head()

In [None]:
graphlab.canvas.set_target('ipynb')

In [None]:
products['name'].show()

# Explore Vulli Sophie

In [None]:
giraffe_reviews = products[products['name'] == 'Vulli Sophie the Giraffe Teether']

In [None]:
len(giraffe_reviews)

In [None]:
giraffe_reviews['rating'].show(view='Categorical')

# Build a sentiment classifier

In [None]:
products['rating'].show(view='Categorical')

# Define what's a positive and a negative sentiment

In [None]:
products = products[products['rating'] != 3]

In [None]:
products['sentiment'] = products['rating'] >= 4

In [None]:
products.head()

# Let's train the sentiment classifier

In [None]:
train_data, test_data = products.random_split(.8, seed=0)

In [None]:
sentiment_model = graphlab.logistic_classifier.create(train_data, 
                                                      target='sentiment', 
                                                      features=['word_count'], 
                                                      validation_set=test_data)

# Evaluate the sentiment model

In [None]:
sentiment_model.evaluate(test_data, metric='roc_curve')

In [None]:
sentiment_model.show(view='Evaluation')

# Apply the model to understand sentiment for Giraffe

In [None]:
giraffe_reviews['predicted_sentiment'] = sentiment_model.predict(giraffe_reviews, output_type='probability')

In [None]:
giraffe_reviews.head()

# Sort the reviews based on predicted sentiment and explore

In [None]:
giraffe_reviews = giraffe_reviews.sort('predicted_sentiment', ascending=False)

In [None]:
giraffe_reviews.head()

In [None]:
giraffe_reviews[0]['review']

In [None]:
giraffe_reviews[1]['review']

# Show most negative reviews

In [None]:
giraffe_reviews[-1]['review']

In [None]:
giraffe_reviews[-2]['review']

In [None]:
selected_words = ['awesome', 'great', 'fantastic', 'amazing', 'love', 'horrible', 'bad', 'terrible', 'awful', 'wow', 'hate']
total_score = {}
for word in selected_words:
    score = lambda dict: dict[word] if word in dict else 0
    products[word] = products['word_count'].apply(score)
    total_score[word] = products[word].sum()

In [None]:
products.head()

In [None]:
print total_score

In [None]:
train_data,test_data = products.random_split(.8, seed=0)

In [None]:
selected_words_model = graphlab.logistic_classifier.create(train_data, 
                                                      target='sentiment', 
                                                      features=selected_words, 
                                                      validation_set=test_data)

In [None]:
selected_words_model['coefficients'].sort('value', ascending=False)

In [None]:
selected_words_model.evaluate(test_data)

In [None]:
sentiment_model.evaluate(test_data)

In [None]:
diaper_champ_reviews = products[products['name'] == 'Baby Trend Diaper Champ']

In [None]:
diaper_champ_reviews['predicted_sentiment'] = sentiment_model.predict(diaper_champ_reviews, output_type='probability')

In [None]:
diaper_champ_reviews.head()

In [None]:
diaper_champ_reviews.sort('predicted_sentiment', ascending=False)

In [None]:
diaper_champ_reviews[0]['predicted_sentiment']

In [None]:
diaper_champ_reviews['predicted_sentiment_selected_words'] = selected_words_model.predict(diaper_champ_reviews, output_type='probability')

In [None]:
diaper_champ_reviews.sort('predicted_sentiment_selected_words', ascending=False)


In [None]:
diaper_champ_reviews[0]['predicted_sentiment_selected_words']

In [None]:
diaper_champ_reviews[0]

In [None]:
print total_score