In [1]:
from nltk.corpus import sentiwordnet as swn
from nltk import pos_tag
import json
import sys
from tqdm import tqdm

In [2]:
def get_score(word, postag, next_not=0):
    
    try:
        if postag == "Noun":  # NOUN
            word_score = swn.senti_synset(word + '.n.01')
        elif postag == "Verb":  # VERB
            word_score = swn.senti_synset(word + '.v.01')
        elif postag == "Adjective":  # ADJECTIVE
            word_score = swn.senti_synset(word + '.a.01')
        elif postag == "Adverb":  # ADVERB
            word_score = swn.senti_synset(word + '.r.01')
        else:
            return None
    
    except Exception as e:
        return -1
    
    if (word_score.obj_score() == 1.0):
        return None

    true_score = word_score.pos_score() - word_score.neg_score()
    if (next_not == 1 and true_score > 0):
        true_score = true_score * (-1)  
    return true_score

In [3]:
def eng_tag(word):
    raw = pos_tag([word])
    eng_pos = raw[0][1]
    if eng_pos == "NN":
        return "Noun"
    elif eng_pos == "VB":
        return "Verb"
    elif eng_pos == "RB":
        return "Adverb"
    elif eng_pos == "JJ":
        return "Adjective"
    else:
        return None

In [4]:
# Read Data
with open('./sentenced-reviews.json') as data_file:
    data = data_file.readlines()

data = [x.strip() for x in data]
json_reviews = [json.loads(x) for x in data]

In [5]:
len(json_reviews)

10

In [8]:
review_scores = []
for review in tqdm(json_reviews):
    sentence_scores = []
    for sentence in review:
        total = 0.0
        words_used = 0
        for i, word_and_tag in enumerate(sentence):
            flag = 0
            try:
                word = word_and_tag["word"]
                tag = word_and_tag["posTag"]
            
            
                if(i != (len(sentence)-1) and sentence[i+1]["word"] == "not"):
                    flag = 1
                elif (i != 0 and sentence[i-1]["word"] == "bad"):
                    flag = 1

                score = get_score(word, tag, flag)

                if (score == -1):
                    tag = eng_tag(word)
                    score = get_score(word, tag, flag)
            except Exception as e:
                continue

            if (score == None):
                continue
            total += score
            words_used += 1
        if (words_used != 0):
            sentence_scores.append(total/words_used)
        else:
            sentence_scores.append(0)
    review_score = sum(x for x in sentence_scores)
    review_scores.append(review_score)

100%|██████████| 10/10 [00:00<00:00, 1578.23it/s]


In [22]:
true = sum(1 for x in review_scores if x > 0)
false = sum(1 for x in review_scores if x < 0)
zero = sum(1 for x in review_scores if x == 0)

In [23]:
print(true)
print(false)
print(zero)

106057
101749
12478


In [10]:
print(review_scores)

[-0.4583333333333333, -1.25, 0.20833333333333334, -0.3541666666666667, -0.21875, -0.20833333333333334, -0.625, 0, -0.125, -0.53125]
