### Sentiment analysis of reviews
Run vader package to identify sentiment of review sentences. Categorize these based on matching NMF topics for skincare concerns

In [1]:
from pymongo import MongoClient
from pprint import pprint

import numpy as np
import pandas as pd
import re
import nltk

import pickle
import json

In [2]:
from bson.objectid import ObjectId

In [3]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.feature_extraction.text import CountVectorizer 
from sklearn.decomposition import TruncatedSVD
from sklearn.decomposition import NMF
from sklearn.metrics.pairwise import cosine_similarity

In [4]:
from nltk.tokenize import sent_tokenize

In [5]:
# stemming
from nltk.stem.lancaster import LancasterStemmer
from nltk.stem.porter import PorterStemmer
from nltk.stem.snowball import SnowballStemmer
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /Users/Jocelyn/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [6]:
# cursor = db.skin_care_face_products.find()
# products = list(cursor)
# products[0]

In [7]:
# cursor = db.skin_care_face_prod_rev.find()
# products = list(cursor)
# products[0]

In [8]:
# import product list
with open('pickles/product_list.pickle', 'rb') as to_read:
    product_list = pickle.load(to_read)

with open('pickles/review_list.pickle', 'rb') as to_read:
    review_list = pickle.load(to_read)
    
with open('pickles/all_review_list.pickle', 'rb') as to_read:
    all_review_list = pickle.load(to_read)
# product_list.head(5)

with open('pickles/topic_words_dict.pickle', 'rb') as to_read:
    topic_words_dict = pickle.load(to_read)

In [9]:
# test with subset
# all_review_list = all_review_list.iloc[0:5].copy()

### tokenize sentences of all product reviews

In [10]:
all_review_list.shape

(2241, 3)

In [11]:
all_review_list.head(5)

Unnamed: 0,asin,title,all_review_text
0,B0000Y3NO6,"DERMAdoctor Calm, Cool & Corrected anti-rednes...",I gave it a shot for a while until the small b...
1,B00012C5RS,"DERMAdoctor Picture Porefect Pore Minimizer, 1...","I haven't splurged on a full-size bottle yet, ..."
2,B0001EKTTC,"Glytone Rejuvenating Mask, 3 oz.",Great product . Gets the job done. Leaves your...
3,B0001EL5Q8,"PCA SKIN Rejuvenating Serum, 1 fl. oz.",I was very happy with the promptness with whic...
4,B0001EL5JA,PCA SKIN Protecting Hydrator Broad Spectrum S...,I bought the Hydrator with SPF after having a ...


In [12]:
#insert spaces in sentences that don't have spaces
all_review_list['all_review_text'] = [re.sub('\.(?![\s,.])','. ',x) for x in all_review_list['all_review_text']]
# tokenize
all_review_list['sentences'] = [sent_tokenize(x) for x in all_review_list['all_review_text']]

In [13]:
all_review_list.iloc[0]['sentences']

["I gave it a shot for a while until the small bottle was completely empty, and it just didn't seem to make a difference for me.",
 "The cream feels cool on the skin, which is nice, but overall I didn't notice a change.",
 'After trying all the drugstore rosacea products to moisturize my skin and calm the redness and itching down, I gave up hope that anything would work.',
 'I have moderate rosacea that sometimes becomes severely inflamed across my cheeks, nose, and above my nose.',
 'Some products would give me relief from symptoms for a week or so, and then stop working.',
 'I tried creams, natural and vitamin supplements, mineral creams, yoga, everything I remotely thought might help.',
 "This product is the only thing that has EVER worked, and it works quickly (drastically reduces redness within an hour usually- two hours at my worst inflammation), leaves no greasiness, and doesn't mess with my allergies.",
 "It is the most expensive beauty product I've ever bought, so it was risky

### assign sentiment composite score for each sentence

In [14]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [15]:
analyzer = SentimentIntensityAnalyzer()
# for sentence in all_review_list.iloc[0]['sentences']:
# #     vs = analyzer.polarity_scores(sentence)
#     print(vs['compound'])
#     print(analyzer.polarity_scores(sentence)['compound'])
#     print("{:-<65} {}".format(sentence, str(vs)))

In [16]:
def compound_score(x):
    score_list = []
    for sentence in x:
#         for sent in sentence:
        score_list.append(analyzer.polarity_scores(sentence)['compound'])
    return score_list

In [17]:
all_review_list['compound_scores'] = all_review_list['sentences'].apply(compound_score)
# all_review_list['compound_scores'] = [[starter.append(analyzer.polarity_scores(sentence)['compound']) for sentence in text] for text in all_review_list['all_review_text']]


### separate into pos and neg reviews

In [18]:
def split_text_pos_sent(x):
    pos_list=[]
    for j,score in enumerate(x['compound_scores']):
        if score > 0:
            pos_list.append(x['sentences'][j])
    return pos_list
    
def split_text_pos_score(x):
    pos_scores=[]
    for j,score in enumerate(x['compound_scores']):
        if score > 0:
            pos_scores.append(x['compound_scores'][j])
    return pos_scores

def split_text_neg_sent(x):
    neg_list=[]
    for j,score in enumerate(x['compound_scores']):
        if score <= 0:
            neg_list.append(x['sentences'][j])
    return neg_list

def split_text_neg_score(x):
    neg_scores=[]
    for j,score in enumerate(x['compound_scores']):
        if score <= 0:
            neg_scores.append(x['compound_scores'][j])
    return neg_scores

In [19]:
all_review_list['pos_sents'] = all_review_list[['compound_scores','sentences']].apply(split_text_pos_sent,axis=1)
all_review_list['pos_scores'] = all_review_list[['compound_scores','sentences']].apply(split_text_pos_score,axis=1)
all_review_list['neg_sents'] = all_review_list[['compound_scores','sentences']].apply(split_text_neg_sent,axis=1)
all_review_list['neg_scores'] = all_review_list[['compound_scores','sentences']].apply(split_text_neg_score,axis=1)

In [20]:
all_review_list

Unnamed: 0,asin,title,all_review_text,sentences,compound_scores,pos_sents,pos_scores,neg_sents,neg_scores
0,B0000Y3NO6,"DERMAdoctor Calm, Cool & Corrected anti-rednes...",I gave it a shot for a while until the small b...,[I gave it a shot for a while until the small ...,"[-0.2716, 0.3716, 0.6369, -0.6597, 0.1621, 0.7...","[The cream feels cool on the skin, which is ni...","[0.3716, 0.6369, 0.1621, 0.7506, 0.8242, 0.510...",[I gave it a shot for a while until the small ...,"[-0.2716, -0.6597, -0.6358, -0.2365, 0.0, 0.0,..."
1,B00012C5RS,"DERMAdoctor Picture Porefect Pore Minimizer, 1...","I haven't splurged on a full-size bottle yet, ...","[I haven't splurged on a full-size bottle yet,...","[0.3612, 0.0, 0.5994, 0.0, 0.0, 0.0, 0.4939, 0...","[I haven't splurged on a full-size bottle yet,...","[0.3612, 0.5994, 0.4939, 0.3384, 0.646, 0.3182...","[I received a sample of this, so the effects I...","[0.0, 0.0, 0.0, 0.0, 0.0, -0.5267, 0.0, -0.153..."
2,B0001EKTTC,"Glytone Rejuvenating Mask, 3 oz.",Great product . Gets the job done. Leaves your...,"[Great product ., Gets the job done., Leaves y...","[0.6249, 0.0, 0.4173, 0.6194, 0.8555, 0.5574, ...","[Great product ., Leaves your skin absolutely ...","[0.6249, 0.4173, 0.6194, 0.8555, 0.5574, 0.722...","[Gets the job done., This is a keeper!!!!!.., ...","[0.0, 0.0, -0.5267, 0.0, -0.296, 0.0, -0.34, -..."
3,B0001EL5Q8,"PCA SKIN Rejuvenating Serum, 1 fl. oz.",I was very happy with the promptness with whic...,[I was very happy with the promptness with whi...,"[0.8479, 0.0, -0.2481, 0.1511, 0.8934, 0.0, 0....",[I was very happy with the promptness with whi...,"[0.8479, 0.1511, 0.8934, 0.5859, 0.5719, 0.790...",[Time will tell as to whether or not a 70 year...,"[0.0, -0.2481, 0.0, -0.4019, 0.0, 0.0, -0.4019..."
4,B0001EL5JA,PCA SKIN Protecting Hydrator Broad Spectrum S...,I bought the Hydrator with SPF after having a ...,[I bought the Hydrator with SPF after having a...,"[0.6249, 0.0, 0.3612, 0.8114, 0.6696, 0.1027, ...",[I bought the Hydrator with SPF after having a...,"[0.6249, 0.3612, 0.8114, 0.6696, 0.1027, 0.440...",[I thought the Hydrator would be a similar tex...,"[0.0, 0.0, -0.2755, 0.0, -0.4003, 0.0, 0.0, -0..."
...,...,...,...,...,...,...,...,...,...
2236,B01HBS87ZS,COSMEDIX Simply Brilliant 24/7 Brightening Tre...,an esthetician recommended this product to me ...,[an esthetician recommended this product to me...,"[0.2023, 0.0, 0.0, 0.0, -0.6369, 0.8172, 0.0, ...",[an esthetician recommended this product to me...,"[0.2023, 0.8172, 0.5859, 0.2023, 0.4659, 0.6369]",[I apply a light layer to my entire face every...,"[0.0, 0.0, 0.0, -0.6369, 0.0, 0.0, -0.0516, 0...."
2237,B01HBS7WW2,"COSMEDIX Phytoharmony, Balancing Moisturizer, ...",I reordered the Cosmedix Emulsion and received...,[I reordered the Cosmedix Emulsion and receive...,"[0.0, 0.2732, 0.0, 0.0, -0.7351, 0.0, 0.5994, ...",[I decided to try this and it doesn't work nea...,"[0.2732, 0.5994, 0.4215, 0.7698]",[I reordered the Cosmedix Emulsion and receive...,"[0.0, 0.0, 0.0, -0.7351, 0.0, 0.0, 0.0, -0.439..."
2238,B01HEESSHG,L'Occitane 20-Piece Summer Treat,Everything in this pack is exceThe perfume see...,[Everything in this pack is exceThe perfume se...,"[0.7845, 0.2263, 0.4404, 0.7351, 0.128, 0.0, 0...",[Everything in this pack is exceThe perfume se...,"[0.7845, 0.2263, 0.4404, 0.7351, 0.128, 0.5673...",[If I had realized I probably wouldn't have bo...,"[0.0, -0.2023]"
2239,B01HBS7XP8,COSMEDIX Benefit Balance Antioxidant Infused T...,Great product! Removes layer of dirt that most...,"[Great product!, Removes layer of dirt that mo...","[0.6588, -0.34, 0.0, 0.0, 0.0, 0.0]",[Great product!],[0.6588],[Removes layer of dirt that most soaps do not ...,"[-0.34, 0.0, 0.0, 0.0, 0.0]"


### match sentences to topic words

topic 3: sensitive skin<br>
topic 4: acne control<br>
topic 5: oily skin<br>
topic 6: anti-aging<br>

In [21]:
topic_words_dict2 = {'topic_3':['sensit','irrit'],
                     'topic_4':['acn','breakout','blem','pimpl'],
                     'topic_5':['moist','greasy','oil'],
                     'topic_6':['line','wrinkl']}

In [24]:
# create new columns for sentences and scores that map to each topic

# for i,top in enumerate(topics):
for key,val in topic_words_dict2.items():
    def topic_map_sent(x):
        sent_list=[]
        for j,sent in enumerate(x['pos_sents']):
            if any(word in sent for word in val):
                sent_list.append(sent)
    #             score_list.append(all_review_list.iloc[0]['pos_scores'][i]) 
        return sent_list

    def topic_map_score(x):
        score_list=[]
        for i,sent in enumerate(x['pos_sents']):
            if any(word in sent for word in val):
    #             sent_list.append(sent)
                score_list.append(x['pos_scores'][i]) 
        return score_list
    
    all_review_list[key+'_sent'] = all_review_list.apply(topic_map_sent,axis=1)
    all_review_list[key+'_score1'] = all_review_list[['pos_sents','pos_scores']].apply(topic_map_score,axis=1)
    all_review_list[key+'_score'] = [np.mean(x) for x in all_review_list[key+'_score1']]

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


In [25]:
all_review_list.shape

(2241, 21)

In [26]:
all_review_list.head(5)

Unnamed: 0,asin,title,all_review_text,sentences,compound_scores,pos_sents,pos_scores,neg_sents,neg_scores,topic_3_sent,...,topic_3_score,topic_4_sent,topic_4_score1,topic_4_score,topic_5_sent,topic_5_score1,topic_5_score,topic_6_sent,topic_6_score1,topic_6_score
0,B0000Y3NO6,"DERMAdoctor Calm, Cool & Corrected anti-rednes...",I gave it a shot for a while until the small b...,[I gave it a shot for a while until the small ...,"[-0.2716, 0.3716, 0.6369, -0.6597, 0.1621, 0.7...","[The cream feels cool on the skin, which is ni...","[0.3716, 0.6369, 0.1621, 0.7506, 0.8242, 0.510...",[I gave it a shot for a while until the small ...,"[-0.2716, -0.6597, -0.6358, -0.2365, 0.0, 0.0,...",[This extremely soothing cream helps any irrit...,...,0.334,"[It also dimished my acne outbreaks, which I t...","[0.2023, 0.6306, 0.0516, 0.6588, 0.4767, 0.419...",0.436043,[After trying all the drugstore rosacea produc...,"[0.6369, 0.8242, 0.5106, 0.1406, 0.4939, 0.624...",0.577633,"[It also dimished my acne outbreaks, which I t...",[0.2023],0.2023
1,B00012C5RS,"DERMAdoctor Picture Porefect Pore Minimizer, 1...","I haven't splurged on a full-size bottle yet, ...","[I haven't splurged on a full-size bottle yet,...","[0.3612, 0.0, 0.5994, 0.0, 0.0, 0.0, 0.4939, 0...","[I haven't splurged on a full-size bottle yet,...","[0.3612, 0.5994, 0.4939, 0.3384, 0.646, 0.3182...","[I received a sample of this, so the effects I...","[0.0, 0.0, 0.0, 0.0, 0.0, -0.5267, 0.0, -0.153...","[This product really does work, just beware if...",...,0.3182,[My skin is normally pretty oily and prone to ...,"[0.4939, 0.3384, 0.1027, 0.4201, 0.3089, 0.799...",0.359314,[My skin is normally pretty oily and prone to ...,"[0.4939, 0.3182, 0.0276, 0.4201, 0.4404]",0.34004,[I rant yet again: the packaging: Pretty lav...,"[0.2023, 0.8885]",0.5454
2,B0001EKTTC,"Glytone Rejuvenating Mask, 3 oz.",Great product . Gets the job done. Leaves your...,"[Great product ., Gets the job done., Leaves y...","[0.6249, 0.0, 0.4173, 0.6194, 0.8555, 0.5574, ...","[Great product ., Leaves your skin absolutely ...","[0.6249, 0.4173, 0.6194, 0.8555, 0.5574, 0.722...","[Gets the job done., This is a keeper!!!!!.., ...","[0.0, 0.0, -0.5267, 0.0, -0.296, 0.0, -0.34, -...",[],...,,"[Perfect for acne prone skin., I use it once a...","[0.5719, 0.6486, 0.886, 0.2732]",0.594925,"[Very, very good deep cleansing masque, for oi...","[0.6194, 0.4201, 0.886, 0.7644, 0.8176]",0.7015,[],[],
3,B0001EL5Q8,"PCA SKIN Rejuvenating Serum, 1 fl. oz.",I was very happy with the promptness with whic...,[I was very happy with the promptness with whi...,"[0.8479, 0.0, -0.2481, 0.1511, 0.8934, 0.0, 0....",[I was very happy with the promptness with whi...,"[0.8479, 0.1511, 0.8934, 0.5859, 0.5719, 0.790...",[Time will tell as to whether or not a 70 year...,"[0.0, -0.2481, 0.0, -0.4019, 0.0, 0.0, -0.4019...",[Reduces the signs of aging love itFast delive...,...,0.674,[As soon as I stop using the serums my acne co...,[0.7227],0.7227,"[love itLove the way it feels, but not sure if...","[0.9703, 0.3182]",0.64425,[I really like the Pca line and notice the dif...,"[0.4201, 0.4754, 0.7964, 0.7506, 0.2023]",0.52896
4,B0001EL5JA,PCA SKIN Protecting Hydrator Broad Spectrum S...,I bought the Hydrator with SPF after having a ...,[I bought the Hydrator with SPF after having a...,"[0.6249, 0.0, 0.3612, 0.8114, 0.6696, 0.1027, ...",[I bought the Hydrator with SPF after having a...,"[0.6249, 0.3612, 0.8114, 0.6696, 0.1027, 0.440...",[I thought the Hydrator would be a similar tex...,"[0.0, 0.0, -0.2755, 0.0, -0.4003, 0.0, 0.0, -0...",[Highly recommended for sensitive and acne-pro...,...,0.529167,[Best SPF day moisturizer for oily/acne-prone ...,"[0.6696, 0.2716, 0.802]",0.581067,[Best SPF day moisturizer for oily/acne-prone ...,"[0.6696, 0.6249, 0.8457, 0.8561, 0.2682, 0.456...",0.631994,[],[],


In [27]:
# save out topic sentiment matching scores and sentences
with open('pickles/all_review_list_SA.pickle', 'wb') as to_write:
    pickle.dump(all_review_list, to_write)