# A simple dictionary-based sentiment analyser

In [1]:
# library for regular expressions
import re

In [2]:
text = "hello I am happy.:) :)Are you?"

In [3]:
def preProcess(text):
    print("original:", text)
    # sentence segmentation - assume already done
    # word tokenisation
    text = re.sub(r"(\w)([.,;:!?'\"”\)])", r"\1 \2", text) # separates punctuation at ends of strings
    text = re.sub(r"([.,;:!?'\"“\(\)])(\w)", r"\1 \2", text) # separates punctuation at beginning of strings
    print("tokenising:", text)
    tokens = re.split(r"\s+",text)
    # normalisation - only by lower casing for now
    tokens = [t.lower() for t in tokens]
    return tokens

In [4]:
print(preProcess(text))
print(preProcess("I am sad :("))

original: hello I am happy.:) :)Are you?
tokenising: hello I am happy .:) :) Are you ?
['hello', 'i', 'am', 'happy', '.:)', ':)', 'are', 'you', '?']
original: I am sad :(
tokenising: I am sad :(
['i', 'am', 'sad', ':(']


In [22]:
# a very simple dictionary
sentimentDict = { 'happy': +1.0, 'sad': -1.0 , 'angry' : -1.0, 'deplorable': -1.0, 'beutiful': +1.0, 'like': +1.0, 'dont': -1.0, 'enjoy': +1.0}
print(sentimentDict)

{'happy': 1.0, 'sad': -1.0, 'angry': -1.0, 'deplorable': -1.0, 'beutiful': 1.0, 'like': 1.0, 'dont': -1.0, 'enjoy': 1.0}


In [15]:
def getSentiment(word):
    try:
        # if the word is in the sentimentDict, return its value
        return sentimentDict[word]
    except KeyError:
        # else a netural 0
        return 0.0

In [16]:
# An alternative way of coding the above function
if False:
    def getSentiment(word):
        if word in sentimentDict.keys():
            return sentimentDict[word]
        else:
            # else a netural 0
            return 0.0

In [17]:
def analyseSentiment(text):
    words = preProcess(text)
    print("words:", words)
    scores = [getSentiment(w) for w in words]
    print("scores", scores)
    return sum(scores)

In [18]:
s = analyseSentiment(text)
print("sentiment = ", s)

original: hello I am happy.:) :)Are you?
tokenising: hello I am happy .:) :) Are you ?
words: ['hello', 'i', 'am', 'happy', '.:)', ':)', 'are', 'you', '?']
scores [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0]
sentiment =  1.0


In [19]:
analyseSentiment("i am very sad")

original: i am very sad
tokenising: i am very sad
words: ['i', 'am', 'very', 'sad']
scores [0.0, 0.0, 0.0, -1.0]


-1.0

# Exercise: Try adding new entries to the dictionary 'sentimentDict' above and re-running the code from the top, trying different sentences with the target words in.

In [23]:
analyseSentiment("i dont like dogs very much and it's sad")

original: i dont like dogs very much and it's sad
tokenising: i dont like dogs very much and it ' s sad
words: ['i', 'dont', 'like', 'dogs', 'very', 'much', 'and', 'it', "'", 's', 'sad']
scores [0.0, -1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.0]


-1.0

In [25]:
analyseSentiment("i find you very deplorable")

original: i find you very deplorable
tokenising: i find you very deplorable
words: ['i', 'find', 'you', 'very', 'deplorable']
scores [0.0, 0.0, 0.0, 0.0, -1.0]


-1.0

In [26]:
analyseSentiment("I think you are very beutiful")

original: I think you are very beutiful
tokenising: I think you are very beutiful
words: ['i', 'think', 'you', 'are', 'very', 'beutiful']
scores [0.0, 0.0, 0.0, 0.0, 0.0, 1.0]


1.0

In [27]:
analyseSentiment("I think you are very beutiful and that makes me happy and sad at same time")

original: I think you are very beutiful and that makes me happy and sad at same time
tokenising: I think you are very beutiful and that makes me happy and sad at same time
words: ['i', 'think', 'you', 'are', 'very', 'beutiful', 'and', 'that', 'makes', 'me', 'happy', 'and', 'sad', 'at', 'same', 'time']
scores [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, -1.0, 0.0, 0.0, 0.0]


1.0

In [30]:
analyseSentiment("I often Enjoy beutiful scenery")

original: I often Enjoy beutiful scenery
tokenising: I often Enjoy beutiful scenery
words: ['i', 'often', 'enjoy', 'beutiful', 'scenery']
scores [0.0, 0.0, 1.0, 1.0, 0.0]


2.0