### Basic Sentiment Analysis

An application that derives the sentiment score of a given sentence or paragraph using the Python library, NLTK.

__Submitted By:__ Auradee Castro

In [3]:
import pandas as pd
import contractions

from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.tokenize import RegexpTokenizer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords

In [6]:
# Get data from a CSV file
df_reviews = pd.read_csv('Reviews_for_Analysis.csv')
print(df_reviews)

                                              Review      Type
0  This is an excellent car with great mileage. b...  Positive
1  I don't think there is anything I really disli...   Neutral
2              I’m not sure if I like the new design   Neutral
3  High quality pants. Very comfortable and great...  Positive
4  Very frustrated right now. Instagram keeps clo...  Negative
5  Your customer service is a nightmare! Totally ...  Negative
6  I still need to further test Zapier to say if ...   Neutral
7  It's so easy to use. It looks less than a week...  Positive
8  The mobile app can be really glitchy and is de...  Negative
9  I love how Zapier takes different apps and tie...  Positive


In [5]:
for i in df_reviews.index:

    print(f"ORIGINAL: {df_reviews.Review[i]}")

    tokenized_sentences = sent_tokenize(df_reviews.Review[i].lower())

    # remove contractions, get only the words
    removed_contractions = " ".join([contractions.fix(word) for word in tokenized_sentences])
    tokenizer = RegexpTokenizer(r'\w+')
    tokenized_words = tokenizer.tokenize(removed_contractions)

    # lemmatize the words
    lemmatizer = WordNetLemmatizer()
    lemmatized_words = [lemmatizer.lemmatize(word) for word in tokenized_words]

    # remove stop words
    stop_words = set(stopwords.words("english")) - set(['not', 'but'])
    filtered_list = [
        word for word in lemmatized_words if word not in stop_words
    ]
    print(f"FILTERED: {filtered_list}")
    
    # get polarity of the words
    sid = SentimentIntensityAnalyzer()
    ss = sid.polarity_scores(" ".join(filtered_list))

    print(f'Compound ------------------------------- {ss["compound"]}')
    print(f'Positive ------------------------------- {ss["pos"]}')
    print(f'Neutral -------------------------------- {ss["neu"]}')
    print(f'Negative ------------------------------- {ss["neg"]}\n')

ORIGINAL: This is an excellent car with great mileage. but it's power could have been better
FILTERED: ['excellent', 'car', 'great', 'mileage', 'but', 'power', 'could', 'better']
Compound ------------------------------- 0.8294
Positive ------------------------------- 0.636
Neutral -------------------------------- 0.364
Negative ------------------------------- 0.0

ORIGINAL: I don't think there is anything I really dislike about the product
FILTERED: ['not', 'think', 'anything', 'really', 'dislike', 'product']
Compound ------------------------------- -0.4391
Positive ------------------------------- 0.0
Neutral -------------------------------- 0.633
Negative ------------------------------- 0.367

ORIGINAL: I’m not sure if I like the new design
FILTERED: ['not', 'sure', 'like', 'new', 'design']
Compound ------------------------------- -0.4717
Positive ------------------------------- 0.0
Neutral -------------------------------- 0.424
Negative ------------------------------- 0.576

ORIGINAL