# Analyze Product Sentiment

In [None]:
import turicreate

# Read product review data

In [None]:
products = turicreate.SFrame('./amazon_baby.sframe')

# Explore data

In [None]:
products

In [None]:
products.groupby('name',operations={'count':turicreate.aggregate.COUNT()}).sort('count',ascending=False)

# Examine the reivews for the most-reviewed product

In [None]:
giraffe_reviews = products[products['name']=='Vulli Sophie the Giraffe Teether']

In [None]:
giraffe_reviews

In [None]:
len(giraffe_reviews)

In [None]:
giraffe_reviews['rating'].show()

# Building a sentiment classifier

## Build word count vectors

In [None]:
products['word_count'] = turicreate.text_analytics.count_words(products['review'])

In [None]:
products

# Define what is positive and negative sentiment

In [None]:
products['rating'].show()

In [None]:
#ignore all 3*  reviews
products = products[products['rating']!= 3]

In [None]:
#positive sentiment = 4-star or 5-star reviews
products['sentiment'] = products['rating'] >= 4

In [None]:
products

In [None]:
products['sentiment'].show()

# Train our sentiment classifier

In [None]:
train_data,test_data = products.random_split(.8,seed=0)

In [None]:
sentiment_model = turicreate.logistic_classifier.create(train_data,target='sentiment', features=['word_count'], validation_set=test_data)

# Apply the sentiment classifier to better understand the Giraffe reviews

In [None]:
products['predicted_sentiment'] = sentiment_model.predict(products, output_type = 'probability')

In [None]:
products

In [None]:
giraffe_reviews = products[products['name']== 'Vulli Sophie the Giraffe Teether']

In [None]:
giraffe_reviews

# Sort the Giraffe reviews according to predicted sentiment

In [None]:
giraffe_reviews = giraffe_reviews.sort('predicted_sentiment', ascending=False)

In [None]:
giraffe_reviews

In [None]:
giraffe_reviews.tail()

## Show the most positive reviews

In [None]:
giraffe_reviews[0]['review']

In [None]:
giraffe_reviews[1]['review']

# Most negative reivews

In [None]:
giraffe_reviews[-1]['review']

In [None]:
giraffe_reviews[-2]['review']

In [None]:
diaper_champ_reviews = products[products['name'] == 'Baby Trend Diaper Champ']

In [None]:
diaper_champ_reviews = diaper_champ_reviews.sort('review', ascending=False)

In [None]:
diaper_champ_reviews[0:1]['review']

# Test Assignment

## 1. Use .apply() to build a new feature with the counts for each of the selected_words

In [None]:
selected_words = ['awesome', 'great', 'fantastic', 'amazing', 'love', 'horrible', 'bad', 'terrible', 'awful', 'wow', 'hate']

In [None]:
def wordCount(word, words_count):
  if word in words_count:
    return words_count[word]
  else:
    return 0

In [None]:
for selected_word in selected_words:
  products['{0}'.format(selected_word)] = products['word_count'].apply(lambda word_count: wordCount(selected_word, word_count))

In [None]:
for selected_word in selected_words:
  print(selected_word, ': ', products['{0}'.format(selected_word)].sum())

## 2. Create a new sentiment analysis model using only the selected_words as features

In [None]:
train_data, test_data = products.random_split(.8, seed=0)

In [None]:
new_sentiment_model = turicreate.logistic_classifier.create(train_data, target='sentiment', features=selected_words, validation_set=test_data)

In [None]:
coefficient_values = new_sentiment_model.coefficients.sort('value', ascending=True)
coefficient_values.print_rows(num_rows=len(coefficient_values))

## 3. Comparing the accuracy of different sentiment analysis model

In [None]:
new_sentiment_model.evaluate(test_data)

In [None]:
sentiment_model.evaluate(test_data)

In [None]:
print((test_data['sentiment'] == +1).sum() / len(test_data['sentiment']))

## 4. Interpreting the difference in performance between the models

In [None]:
diaper_champ_reviews = diaper_champ_reviews.sort('sentiment', ascending=False)
diaper_champ_reviews

In [None]:
diaper_champ_reviews = diaper_champ_reviews.sort('predicted_sentiment', ascending=False)
diaper_champ_reviews

In [None]:
new_sentiment_model.predict(diaper_champ_reviews[0], output_type='probability')

In [None]:
products['predicted_sentiment_v2'] = new_sentiment_model.predict(products, output_type = 'probability')

In [None]:
products.sort('predicted_sentiment_v2', ascending=False)