In [None]:
#Natural Language Processing : Sentiment Analysis in Python

#The process of computationally identifying and categorizing opinions expressed in a piece of text, especially in order to determine whether the writer's attitude towards a particular topic, product, etc. is positive, negative, or neutral.

 

#Text analysis has changed between Python 2 and Python 3 due to changes in the way Python handles encoding of files.  Therefor, this code works with Python version 3.x but may or may not work with Python 2.  Code that works with Python 2 will most likely not work in 3 without major code updates to indicate a different encoding of the text file.

 

#We will use a hard coded piece of text for our example.  Ideally you would use a text file rather than hard coding. I copied comments from the Temple Health System facebook page. 

In [3]:
import nltk.classify.util

from nltk.classify import NaiveBayesClassifier

from nltk.corpus import names

In [4]:
def word_feats(words):

    return dict([(word, True) for word in words])

In [5]:
positive_vocab = [ 'awesome', 'outstanding', 'fantastic', 'terrific', 'good', 'nice', 'great','better', 'best', 'excellent' ':)' ]

negative_vocab = [ 'bad', 'terrible','useless', 'hate', 'discriminated', 'rude', 'careless' ':(' ]

neutral_vocab = [ 'sick','the','stay','was','is','visit','did','know','words','not' ]

In [6]:
positive_features = [(word_feats(pos), 'pos') for pos in positive_vocab]

negative_features = [(word_feats(neg), 'neg') for neg in negative_vocab]

neutral_features = [(word_feats(neu), 'neu') for neu in neutral_vocab]

In [7]:
train_set = negative_features + positive_features + neutral_features

In [8]:
classifier = NaiveBayesClassifier.train(train_set)

In [9]:
# Predict

neg = 0

pos = 0

sentence = "I’m an employee here and on Thursday I suffered from an anaphylactic reaction. I was rushed to the ER and my care was excellent. I was frightened, I couldn’t breath right, hives everywhere, completely panicked. The nursing staff calmed me and walked me through everything that was happening, I got the treatment I needed and feeling much better. I wish I could remember everyone names that treated me. I thank you from the bottom of my heart. The best emergency care ever!"

sentence = sentence.lower()

words = sentence.split(' ')

for word in words:

    classResult = classifier.classify( word_feats(word))

    if classResult == 'neg':

        neg = neg + 1

    if classResult == 'pos':

        pos = pos + 1

In [10]:
print('Positive: ' + str(float(pos)/len(words)))

print('Negative: ' + str(float(neg)/len(words)))

Positive: 0.524390243902439
Negative: 0.3902439024390244


In [11]:
#Now I made up a comment and adjusted some of the words in the code.

 

import nltk.classify.util

from nltk.classify import NaiveBayesClassifier

from nltk.corpus import names

In [12]:
def word_feats(words):

    return dict([(word, True) for word in words])

positive_vocab = [ 'awesome', 'outstanding', 'fantastic', 'terrific', 'good', 'nice', 'great','better', 'best', 'excellent' ':)' ]

negative_vocab = [ 'bad', 'terrible','useless', 'hate', 'hateful' 'discriminated', 'rude', 'careless' ':(' ]

neutral_vocab = [ 'sick','the','stay','was','is','visit','did','know','words','not' ]

positive_features = [(word_feats(pos), 'pos') for pos in positive_vocab]

negative_features = [(word_feats(neg), 'neg') for neg in negative_vocab]

neutral_features = [(word_feats(neu), 'neu') for neu in neutral_vocab]

In [13]:
train_set = negative_features + positive_features + neutral_features

In [14]:
classifier = NaiveBayesClassifier.train(train_set)  

# Predict

neg = 0

pos = 0

sentence = "This was the worst care I ever experience! The nurses were rude and hateful. I felt discriminated against by the rude staff"

In [15]:
sentence = sentence.lower()

words = sentence.split(' ')

for word in words:

    classResult = classifier.classify( word_feats(word))

    if classResult == 'neg':

        neg = neg + 1

    if classResult == 'pos':

        pos = pos + 1

In [16]:
print('Positive: ' + str(float(pos)/len(words)))

print('Negative: ' + str(float(neg)/len(words)))

Positive: 0.36363636363636365
Negative: 0.5454545454545454
