In [13]:
# UNIVERSIDADE FEDERAL DO RIO GRANDE DO NORTE 
# Ano: 2019
# Projeto: Virtual Mood Identifier
# Autor: Alysson Rafael Oliveira de Lima

import nltk
import pandas as pd

from nltk.classify import NaiveBayesClassifier
from nltk.corpus import subjectivity
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *

nltk.download('subjectivity')
nltk.download('vader_lexicon')

n_instances = 1000
subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]]
obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]]

[nltk_data] Downloading package subjectivity to /root/nltk_data...
[nltk_data]   Package subjectivity is already up-to-date!
[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [14]:
train_subj_docs = subj_docs[:80]
test_subj_docs = subj_docs[80:100]
train_obj_docs = obj_docs[:80]
test_obj_docs = obj_docs[80:100]
training_docs = train_subj_docs+train_obj_docs
testing_docs = test_subj_docs+test_obj_docs
sentim_analyzer = SentimentAnalyzer()
all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])

unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4)
len(unigram_feats)

sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)

training_set = sentim_analyzer.apply_features(training_docs)
test_set = sentim_analyzer.apply_features(testing_docs)

trainer = NaiveBayesClassifier.train
classifier = sentim_analyzer.train(trainer, training_set)

for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
  print('{0}: {1}'.format(key, value))

Training classifier
Evaluating NaiveBayesClassifier results...
Accuracy: 0.8
F-measure [obj]: 0.8
F-measure [subj]: 0.8
Precision [obj]: 0.8
Precision [subj]: 0.8
Recall [obj]: 0.8
Recall [subj]: 0.8


In [0]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer

dataset = pd.read_csv("https://raw.githubusercontent.com/alyssonolima/Projeto-Virtual-Mood-Identifier/master/Dados/tweets_brutos.csv")
sentences = dataset

In [0]:
sid = SentimentIntensityAnalyzer()

compo = []
negative = []
neutro = []
pos = []

#Monta os dados para gerar o csv
for sentence in sentences:    
    ss = sid.polarity_scores(sentence)    
    compo.append(ss["compound"])
    negative.append(ss["neg"])
    neutro.append(ss["neu"])
    pos.append(ss["pos"])
    

tweets_categorizados = pd.DataFrame({'tweets': sentences, 'compound': compo, 'negativos': negative, 'neutro': neutro, 'positivo':pos})
tweets_categorizados.to_csv("tweets_categorizados.csv")