In [19]:
import pickle
import os, sys
sys.path.insert(1, os.path.join(sys.path[0], '..'))

from src import utils

class LIWC:

    def __init__(self, filename, remove_asterisk=True):
        """Construct LIWC object and initilize the sentiment word dictionary."""

        with open(filename, 'r', encoding='latin-1') as liwc_file:
            self.data = liwc_file.readlines()
            self.dict = dict()

        # Iterate across the LIWC data
        for line in self.data:
            line_words = line.rstrip('\r\n').split()
            word = line_words[0]
            categories = line_words[1:]

            # Remove asterisk notation from word if required
            if remove_asterisk and word[-1] == '*':
                word = word[:-1]

            # Add word to it's corresponding emotion set
            if '126' in categories:
                # Store word as positive emotion
                self.dict[word] = +1

            elif '127' in categories:
                # Store word as an negative emotion
                self.dict[word] = -1

    def get_sentiment(self, word):
        """
        Search a given word on the LIWC dictionary and return the polarity
        associated to it (-1/+1), otherwise return None.
        """

        # List of word derivations to search for on dictionary
        word_derivations = [word]

        # Add to the list derivations of 'word' removing it's last letters
        if len(word) > 2:
            word_derivations.append(word[:-1])
        if len(word) > 3:
            word_derivations.append(word[:-2])

        # Query the word derivations
        for term in word_derivations:
            polarity = self.dict.get(term)
            
            # Polarity found (-1 or +1)
            if polarity is not None:
                return(polarity)

        # No polarity value was found on the dictionary
        return(None)

In [20]:
liwc = utils.load_pickle_object(filename='../data/interim/liwc-object.pickle',
                       class_name=LIWC,
                       class_args=['../data/liwc/LIWC2007_Portugues_win.dic'])

In [21]:
# Test review classification
text = 'hoje eu comprei um celular muito bom, gostei bastante dos aplicativos. Porém a tela veio riscada.'.split()

for word in text:
    print(f'word: {word:{15}} = {liwc.get_sentiment(word)}')

word: hoje            = None
word: eu              = None
word: comprei         = None
word: um              = None
word: celular         = None
word: muito           = 1
word: bom,            = 1
word: gostei          = 1
word: bastante        = 1
word: dos             = None
word: aplicativos.    = None
word: Porém           = None
word: a               = None
word: tela            = None
word: veio            = None
word: riscada.        = None


In [4]:
# Check word's size on dictionary
for j in range(0, 5):
    print(f'Words with {j} letters:')
    for i in liwc.dict.keys():
        if len(i) == j:
            print(i)

Words with 0 letters:
Words with 1 letters:
Words with 2 letters:
ai
dó
fé
ha
ih
má
ok
ri
sã
só
ui
vã
Words with 3 letters:
ais
alô
ama
ame
amo
bem
boa
bom
cdf
cri
cru
crê
doa
dor
doí
dói
dôo
fim
fút
grr
háb
ira
lol
mal
mau
más
oba
opa
ous
paz
pia
pio
puf
ria
rio
rir
ris
riu
sim
sol
sós
vil
vis
vão
vãs
Words with 4 letters:
abal
abra
abre
abri
abro
abus
afli
agit
amad
amai
amam
amar
amas
amei
amem
ames
amig
amor
amou
anua
anui
anuo
anuí
apoi
arma
asco
auge
aviv
bela
belo
bens
boas
boba
bobo
bons
brio
cara
caro
caço
cium
coxa
coxo
crer
creu
cria
crie
crio
crua
crus
crês
culp
dano
dign
doam
doas
doaç
doei
doem
doer
doeu
doía
doíd
dura
duro
dóis
dúbi
elog
enjo
erra
erre
erro
evit
fala
fale
fali
falo
falt
fama
feda
fede
fedi
fedo
feia
feio
fere
feri
fiel
fina
fino
fira
firo
fixa
fixo
foda
fode
fodi
fodo
frág
fuga
gaga
gago
gagu
geek
gera
gere
gero
glam
gozo
haha
hehe
hihi
hoho
honr
horr
idea
iles
inib
ináb
inút
irad
irra
joga
jogo
julg
leal
lesa
lese
leso
leve
lmao
luta
lute
luto
mata
mate