In [1]:
import nltk
from nltk.corpus import sentiwordnet as swn
from pycorenlp import StanfordCoreNLP
from pywsd.lesk import simple_lesk
from pywsd.lesk import cosine_lesk
from nltk.corpus import wordnet as wn
import requests
import sys, os
import numpy as np
import pandas as pd

Warming up PyWSD (takes ~10 secs)... took 6.164828300476074 secs.


In [2]:
nlp = StanfordCoreNLP('http://localhost:9000')
dependency_parser = nlp.annotate

In [3]:
breakdown = swn.senti_synset('good.n.01')
print(breakdown.pos_score())

0.5


In [4]:
wn.synsets('love')

[Synset('love.n.01'),
 Synset('love.n.02'),
 Synset('beloved.n.01'),
 Synset('love.n.04'),
 Synset('love.n.05'),
 Synset('sexual_love.n.02'),
 Synset('love.v.01'),
 Synset('love.v.02'),
 Synset('love.v.03'),
 Synset('sleep_together.v.01')]

In [5]:
positive_lexicon = []
negative_lexicon = []

def read_lexicon():
    global positive_lexicon;
    global negative_lexicon;
    
    with open(os.path.join(os.path.abspath('opinion-lexicon-English/') , 'positive-words.txt'), 'r') as file:
        line = file.readline();
        while ";" in line:
            line = file.readline();
         
        positive_lexicon = file.readlines()
    
    with open(os.path.join(os.path.abspath('opinion-lexicon-English/') , 'negative-words.txt'), 'r', encoding = "ISO-8859-1") as file:
        line = file.readline();
        while ";" in line:
            line = file.readline();
        
        negative_lexicon = file.readlines()
        
    positive_lexicon = list(map(lambda word: word.rstrip("\n\r"), positive_lexicon))
    negative_lexicon = list(map(lambda word: word.rstrip("\n\r"), negative_lexicon))
    
        
read_lexicon()
op_set = positive_lexicon + negative_lexicon

In [13]:
def pos_tag(sentence):
    result = dependency_parser(sentence, properties={"outputFormat": "json", "annotators": "pos"})['sentences'][0]['tokens']
    res = []
    for pos in result:
        res.append((pos['word'], pos['pos']))
    return res

In [110]:
negation = [
    "afraid",
    "can't",
    "cannot",
    "deny",
    "mean",
    "negate",
    "negation",
    "negative",
    "neither",
    "never",
    "no",
    "non",
    "none",
    "nor",
    "not",
    "nothing",
    "refusal",
    "refuse",
    "reject",
    "rejection"
]

from nltk.corpus import sentiwordnet as swn
import string

def predict_sentiwordnet(opinions, sent):
    endscore = 0
    for opinion in opinions:
        try:
            score = swn.senti_synset(opinion + '.a.1')
            if score.pos_score() > score.neg_score():
                endscore += score.pos_score()
            else:
                endscore += score.neg_score() * (-1)

            words = sent.split(' ')
            word_around = []
            for x in range(0, len(words)):
                if words[x] in string.punctuation:
                    continue
                try:
                    if (words[x+1] == opinion) or (words[x+2] == opinion) or (words[x+3] == opinion) or (words[x+4] == opinion):
                        word_around.append(words[x])
                    elif (words[x-1] == opinion):
                        word_around.append(words[x])
                except:
                    pass
            for neg in negation:
                if neg in word_around:
                    endscore *= (-1)
                    break
        except:
#             print('err')
            pass
    if endscore > 0:
        polarity = 'positive'
    else:
        polarity = 'negative'
           
    return polarity

In [111]:
def calculate(category):
    df = pd.read_csv('Results/Sentiwordnet/hasil_'+ category +'.csv')
    tp, tn, fp, fn = 0, 0, 0, 0
    p = 'positive'
    n = 'negative'
    for index in range(0, len(df)):
        label = df['label'][index]
        predict = df['predict'][index]
        
        if label == p and predict == p:
            tp += 1
        elif label == n and predict == n:
            tn += 1
        elif label == n and predict == p:
            fp += 1
        elif label == p and predict == n:
            fn += 1
    
    precision = round(tp / (tp + fp), 2)
    recall = round(tp / (tp + fn), 2)
    f1 = round(2 * ((precision * recall) / (precision + recall)), 2)
    accuracy = round((tp+tn)/(tp+tn+fp+fn), 2)
    print(tp, '\t', tn, '\t', fp, '\t', fn, '\t', '\t', precision, '\t', recall, '\t', f1, '\t', '\t', accuracy)
    return tp, tn, fp, fn, precision, recall, f1, accuracy

In [112]:
def preprocessing(sentence):
    return sentence.lower()

In [113]:
predict_sentiwordnet(['good'], 'it is good')

'positive'

In [114]:
import pandas as pd
def run(category):
    df = pd.read_csv('Results/Sentiwordnet/'+ category +'.csv')
    sf = pd.DataFrame(columns = ['id','review', 'opinion', 'label', 'predict'])
#     print(df.head())
    for index in range(0, len(df)):
        opinion = [df['opinion'][index]]
#         print(opinion)
        prediction = predict_sentiwordnet(opinion, preprocessing(df['review'][index]))
        sf = sf.append({'id': df['id'][index], 
            'review': df['review'][index],
            'opinion': df['opinion'][index],
            'label': df['label'][index],
            'predict': prediction
           }, ignore_index=True)
    
    sf.to_csv("Results/Sentiwordnet/hasil_"+ category +".csv")
    sf.to_excel("Results/Sentiwordnet/hasil_"+ category +".xlsx")

In [115]:
run('FOOD')

In [116]:
run('AMBIENCE')

In [117]:
run('SERVICE')

In [118]:
run('PRICES')

In [119]:
calculate('AMBIENCE')
calculate('FOOD')
calculate('SERVICE')
calculate('PRICES')

47 	 12 	 8 	 39 	 	 0.85 	 0.55 	 0.67 	 	 0.56
71 	 25 	 12 	 70 	 	 0.86 	 0.5 	 0.63 	 	 0.54
40 	 30 	 12 	 39 	 	 0.77 	 0.51 	 0.61 	 	 0.58
21 	 20 	 16 	 23 	 	 0.57 	 0.48 	 0.52 	 	 0.51


(21, 20, 16, 23, 0.57, 0.48, 0.52, 0.51)

In [120]:
import numpy as np
print('tp', '\t', 'tn', '\t', 'fp', '\t', 'fn', '\t', '\t', 'prec', '\t', 'rec', '\t', 'f1', '\t', '\t', 'accuracy')
precision = []
recall = []
f1 = []
cat = ['AMBIENCE', 'FOOD', 'SERVICE', 'PRICES']
for c in cat:
    a, b, c, d, e, f, g, h = calculate(c)
    precision.append(e)
    recall.append(f)
    f1.append(g)
#     sf = sf.append({'TP':a, 'TN':b, 'FP':c, 'FN':d, 'precision':e, 'recall':f, 'f1':g, 'accuracy': h}, ignore_index=True)
#     sf.to_excel("Results/Calculation/"+ tipe +".xlsx")
print('', '\t', '', '\t', '', '\t', '', '\t', '\t', round(np.mean(precision),2), '\t', round(np.mean(recall),2), '\t', round(np.mean(f1),2), '\t', '\t')

tp 	 tn 	 fp 	 fn 	 	 prec 	 rec 	 f1 	 	 accuracy
47 	 12 	 8 	 39 	 	 0.85 	 0.55 	 0.67 	 	 0.56
71 	 25 	 12 	 70 	 	 0.86 	 0.5 	 0.63 	 	 0.54
40 	 30 	 12 	 39 	 	 0.77 	 0.51 	 0.61 	 	 0.58
21 	 20 	 16 	 23 	 	 0.57 	 0.48 	 0.52 	 	 0.51
 	  	  	  	 	 0.76 	 0.51 	 0.61 	 	
