In [1]:
from nltk.corpus import wordnet, sentiwordnet, movie_reviews
from functools import reduce
import nltk
from nltk.metrics import ConfusionMatrix
from sklearn.metrics import accuracy_score

In [2]:
data_set = []
for sentences, file in zip(movie_reviews.paras(), movie_reviews.fileids()):
    data_set.append((
        reduce(lambda acc, s: acc + s, sentences),
        file.split('/')[0]
    ))

In [3]:
from nltk.tag.perceptron import PerceptronTagger
from nltk.corpus import treebank
pos_model = PerceptronTagger(load=False)
pos_model.train(treebank.tagged_sents())

In [4]:
postag_mapper = {
    'VERB': 'v',
    'NOUN': 'n',
    'ADJ': 'a',
    'ADV': 'r'
}

def calculate_sentiment(words):
    result = 0
    for word, pos_tag in pos_model.tag(words):
        pos_tag = nltk.tag.mapping.map_tag('en-ptb', 'universal', pos_tag)
        if pos_tag in ['VERB', 'NOUN', 'ADJ', 'ADV']:
            synsets = wordnet.synsets(word, postag_mapper[pos_tag])
            if len(synsets) > 0:
                synset = synsets[0]
                ssn = sentiwordnet.senti_synset(synset.name())
                result += ssn.pos_score()
                result -= ssn.neg_score()
    return result

In [5]:
scores = [calculate_sentiment(review[0]) for review in data_set]

In [6]:
predictions = ['pos' if s > 0 else 'neg' for s in scores]
actuals = [review[1] for review in data_set]

acc = accuracy_score(
    predictions,
    actuals
)

print(
f"""\
Accuracy: {acc}
Confussion Matrix:
{ConfusionMatrix(actuals, predictions).pretty_format()}
"""
)

Accuracy: 0.572
Confussion Matrix:
    |   n   p |
    |   e   o |
    |   g   s |
----+---------+
neg |<217>783 |
pos |  73<927>|
----+---------+
(row = reference; col = test)




## Conclussions

As we can see most cases that should be negative are tagged as positive, this is probably caused because we are always taken the first synset which may not be the correct one.  
Including word sense desambiguation this algorithm will throw much better results