# Sentiment Analysis con Python

In [None]:
# metodo non supervisionato - Vader

In [1]:
# basati su lessici dove le parole sono taggate come positive o negative

In [3]:
from nltk.corpus import movie_reviews
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.corpus import subjectivity
from nltk.sentiment import SentimentAnalyzer
import nltk.sentiment.util
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [2]:
frasi = ["It was a great experience",
         "Bad acting, bad music and especially bad taste",
         "This is the best movie created so far",
         "It's a waste of time",
         "Well directed, well produced",
         "I love the photography and the script is amazing",
         "Truly disappointed from this product",
         "All the characters are beautifully executed"
         "Unfortunately I didn't like it at all",
         "Horrid acting and mediocre script, I'm unhappy"]

In [4]:
type(frasi)

list

In [4]:
sid = SentimentIntensityAnalyzer()
for sentence in frasi:
     print(sentence)
     ss = sid.polarity_scores(sentence)
     for k in ss:
         print('{0}: {1}, '.format(k, ss[k]), end='')
     print()

It was a great experience
neg: 0.0, neu: 0.423, pos: 0.577, compound: 0.6249, 
Bad acting, bad music and especially bad taste
neg: 0.677, neu: 0.323, pos: 0.0, compound: -0.8885, 
This is the best movie created so far
neg: 0.0, neu: 0.492, pos: 0.508, compound: 0.7351, 
It's a waste of time
neg: 0.483, neu: 0.517, pos: 0.0, compound: -0.4215, 
Well directed, well produced
neg: 0.0, neu: 0.323, pos: 0.677, compound: 0.4939, 
I love the photography and the script is amazing
neg: 0.0, neu: 0.429, pos: 0.571, compound: 0.8402, 
Truly disappointed from this product
neg: 0.344, neu: 0.333, pos: 0.322, compound: -0.0516, 
All the characters are beautifully executedUnfortunately I didn't like it at all
neg: 0.142, neu: 0.608, pos: 0.25, compound: 0.3798, 
Horrid acting and mediocre script, I'm unhappy
neg: 0.558, neu: 0.442, pos: 0.0, compound: -0.743, 


In [35]:
# Naïve Bayes

In [5]:
train = [("It was a great experience", "positive"),
         ("Bad acting, bad music and especially bad taste", "negative"),
         ("This is the best movie created so far", "positive"),
         ("It's a waste of time", "negative"),
         ("Well directed, well produced, I like it", "positive"),
         ("I love the photography and the script is amazing", "positive"),
         ("Truly disappointed from this product", "negative"),
         ("All the characters are beautifully executed", "positive"),
         ("Unfortunately I didn't like it at all", "negative"),
         ("Horrid acting and mediocre script, I'm unhappy", "negative")]

In [6]:
type(train)

list

In [6]:
dictionary = set(word.lower() for passage in train for word in word_tokenize(passage[0]))

In [7]:
t = [({word: (word in word_tokenize(x[0])) for word in dictionary}, x[1]) for x in train]

In [8]:
classifier = nltk.NaiveBayesClassifier.train(t)

In [9]:
test_data = "This book is simply terrible, I am horrified"
test_data_features = {word.lower(): (word in word_tokenize(test_data.lower())) for word in dictionary}

In [10]:
print(classifier.classify(test_data_features))

negative


In [11]:
test_data2 = "Wonderful movie, is really incredible"
test_data_features2 = {word.lower(): (word in word_tokenize(test_data2.lower())) for word in dictionary}

In [12]:
print(classifier.classify(test_data_features2))

positive


In [None]:
# in un'altra lingua

In [55]:
train2 = [("E' stata una bella esperienza", "positive"),
         ("Male recitato, male interpretato, pessimo", "negative"),
         ("Il miglior film che ho mai visto, incredibile", "positive"),
         ("Una perdita di tempo", "negative"),
         ("Ben diretto, ben recitato, mi è piaciuto, bellissimo", "positive"),
         ("Mi è piaciuta la fotografia e gli attori sono fantastici", "positive"),
         ("Davvero deluso di questo film", "negative"),
         ("Tutti i personaggi sono belli e ben interpretati", "positive"),
         ("Purtroppo faceva schifo", "negative"),
         ("Orribile e mediocre, sono orripilato", "negative")]

In [56]:
dictionary = set(word.lower() for passage in train2 for word in word_tokenize(passage[0]))

In [57]:
t = [({word: (word in word_tokenize(x[0])) for word in dictionary}, x[1]) for x in train2]

In [58]:
classifier = nltk.NaiveBayesClassifier.train(t)

In [59]:
test_data = "Questo libro era terribile, sono orripilato"
test_data_features = {word.lower(): (word in word_tokenize(test_data.lower())) for word in dictionary}

In [60]:
print(classifier.classify(test_data_features))

negative


In [61]:
test_data2 = "Bellissimo, è stato incredibile"
test_data_features2 = {word.lower(): (word in word_tokenize(test_data2.lower())) for word in dictionary}

In [62]:
print(classifier.classify(test_data_features2))

positive


## altro dizionario per l'inglese - Afinn

In [63]:
sentiment_dictionary = {}

for line in open("AFINN-111.txt"):
    word, score = line.split('\t')
    sentiment_dictionary[word] = int(score)

In [64]:
test_data = "This book is simply terrible, I am horrified"

In [65]:
words = test_data.lower().split()

In [66]:
words

['this', 'book', 'is', 'simply', 'terrible,', 'i', 'am', 'horrified']

In [67]:
sum(sentiment_dictionary.get(word, 0) for word in words)

-3

In [None]:
# movie reviews

In [68]:
def create_word_features(words):
    useful_words = [word for word in words if word not in stopwords.words("english")]
    my_dict = dict([(word, True) for word in useful_words])
    return my_dict

In [69]:
pos_reviews = []
for fileid in movie_reviews.fileids('pos'):
    words = movie_reviews.words(fileid)
    pos_reviews.append((create_word_features(words), "positive"))

In [70]:
print(len(pos_reviews))

1000


In [71]:
neg_reviews = []
for fileid in movie_reviews.fileids('neg'):
    words = movie_reviews.words(fileid)
    neg_reviews.append((create_word_features(words), "negative"))

In [72]:
print(len(neg_reviews))

1000


In [73]:
train_set = neg_reviews[:700] + pos_reviews[:700]
test_set =  neg_reviews[700:] + pos_reviews[700:]

In [74]:
classifier = NaiveBayesClassifier.train(train_set)

In [75]:
nltk.classify.util.accuracy(classifier, test_set)

0.7183333333333334

In [None]:
# diciamo invece che vogliamo importare un nostro corpus di recensioni da utilizzare come base per creare un modello

In [None]:
# in questo caso importiamo le nostre cartelle di commenti positivi o negativi in questo modo

In [None]:
import glob
import os

file_list = glob.glob(os.path.join(os.getcwd(), "negative", "*.txt"))

corpus = []

for f in file_list:
    with open(f, 'rb') as f_input:
        corpus.append(f_input.read())