In [1]:
import pandas as pd                          #read in dataset

import nltk
from nltk.tokenize import word_tokenize      #split words from punct
#from nltk.tokenize import sent_tokenize     #split sentences from punct
#from nltk.tokenize import TweetTokenizer     #break apart content into it's most meaningful aspects
from nltk.probability import FreqDist        # how many of a particular word there are
from nltk.corpus import stopwords            #for removing filler words
from nltk.stem import WordNetLemmatizer      #turn a word into it's base dictionary form
from nltk.sentiment.vader import SentimentIntensityAnalyzer   # give numerical value to text

from string import punctuation               #list of puncuation marks

Assignment: Use the pandas library to read in the dataset. Create a function that will analyze the "Review Text" column and calculate a sentiment value. Make a new column in the dataframe that will contain the sentiment value for each review.

#### Read in dataset

In [2]:
#read in dataset
location = "women_clothing_review.csv"
df_wmn_cloth = pd.read_csv(location, encoding = "latin-1")

In [3]:
#veryify dataframe type
type(df_wmn_cloth)

pandas.core.frame.DataFrame

In [4]:
#look at dataset columns
df_wmn_cloth.columns

Index(['Unnamed: 0', 'Clothing ID', 'Age', 'Title', 'Review Text', 'Rating',
       'Recommended IND', 'Positive Feedback Count', 'Division Name',
       'Department Name', 'Class Name'],
      dtype='object')

In [5]:
#set 'Review Text' column to variable reviews
reviews = df_wmn_cloth['Review Text']

In [6]:
#look at first and last five rows of 'Review Tex' column
reviews[:5]

0    Absolutely wonderful - silky and sexy and comf...
1    Love this dress!  it's sooo pretty.  i happene...
2    I had such high hopes for this dress and reall...
3    I love, love, love this jumpsuit. it's fun, fl...
4    This shirt is very flattering to all due to th...
Name: Review Text, dtype: object

#### Create function

In [7]:
#create function that will analyze and clean text and calc sentiment value
def reviewSentiment(review):
    
    #make text lowercase
    review = str(review).lower()
    
    #tokenize the reivew
    review_wtk = word_tokenize(review)
    
    #remove punctuation
    for token in review_wtk:
        if token in punctuation:
            review_wtk.remove(token)
            
    #empty list to hold cleaned tokens
    clean_tokens = []
            
    #remove stop/filler words
    eng_stopwords = stopwords.words('english')
    for token in review_wtk:
        if token not in eng_stopwords:
            clean_tokens.append(token)
            
    #put sentences back together with clean tokens        
    clean_review = ' '.join(clean_tokens)
    
    #initialize function to do sentiment analysis
    sid = SentimentIntensityAnalyzer()
    
    #get polarity scores dictionary
    sid_rev = sid.polarity_scores(clean_review)
    
    #get sentiment polarity from 'compound' key in sid_rev dict.
    r_comp = sid_rev['compound']
    
    #return sentiment value
    return r_comp

In [8]:
#verify function performance
print(reviewSentiment(reviews[0]))
print(reviewSentiment(reviews[103]))

0.8991
-0.3724


#### Create new column for sentiment value

In [9]:
#create new column to hold sentiment value for each review
df_wmn_cloth['review_sentiment'] = df_wmn_cloth['Review Text'].apply(reviewSentiment)

In [10]:
#verify new column created correctly
df_wmn_cloth[100:125]

Unnamed: 0.1,Unnamed: 0,Clothing ID,Age,Title,Review Text,Rating,Recommended IND,Positive Feedback Count,Division Name,Department Name,Class Name,review_sentiment
100,100,861,39,Comfy,At first i wasn't sure about it. the neckline ...,4,1,0,General Petite,Tops,Knits,0.1106
101,101,863,40,"Simple, stylish, lovely-runs a bit big",I find that this brand can be a little bit all...,4,1,5,General,Tops,Knits,0.9371
102,102,822,69,Stunning lace top,This top is absolutely stunning. i purchased t...,5,1,2,General,Tops,Blouses,0.9516
103,103,822,23,Not a fan,The fabric felt cheap and i didn't find it to ...,2,0,9,General,Tops,Blouses,-0.3724
104,104,863,51,,"Runs big and looked unflattering. i am petite,...",2,0,0,General,Tops,Knits,0.0
105,105,836,66,Excited ... but ...,I bought this lovely silk/velvet shirt in the ...,5,1,0,General,Tops,Blouses,0.8658
106,106,836,47,Beautiful shirt but runs small!,Was so excited to order this beautiful shirt! ...,4,1,11,General,Tops,Blouses,0.937
107,107,836,66,Beautiful,I ordered ivory in xl because this brand tends...,4,1,0,General,Tops,Blouses,0.9315
108,108,836,23,Perfect fall shirt!,"The shirt is exactly as it is pictured, i have...",5,1,3,General,Tops,Blouses,0.93
109,109,1081,47,,"Perfect dress for hot, humid, sticky weather.",5,1,0,General,Dresses,Dresses,0.5719
