In [1]:
# Standart libraries
import string
import os, sys
sys.path.insert(1, os.path.join(sys.path[0], '..'))

# Third-party libraries
import nltk
import rdflib
from rdflib.namespace import RDF, OWL

# Local files
from ontology import Ontology
from liwc import LIWC

# Download data for the tokenization process
# nltk.download('punkt')

# Context words for polarity change
negation = ['jamais', 'nada', 'nem', 'nenhum', 'ninguém', 'nunca', 'não',
            'tampouco']
amplifier = ['mais', 'muito', 'demais', 'completamente', 'absolutamente',
             'totalmente', 'definitivamente', 'extremamente', 'frequentemente',
             'bastante']
downtoner = ['pouco', 'quase', 'menos', 'apenas']

In [2]:
class Document:
    """
    Document class containing all data about an opinion text.
    """
    
    def __init__(self, text):
        
        self.text = text
        self.words = nltk.word_tokenize(text.lower())
        self.word_tag = []
        
        # Dictionary of aspects occurrences
        self.aspect_pos = dict()

        # Dictionary of aspects polarity
        self.aspect_polarity = dict()
        
        # Dictionary of informations about the aspect context
        self.aspect_context = dict()
    
    def print_data(self):
        """
        Show on screen each document word and it's corresponding tag. 
        Used for debbuging the tagging method.
        '"""

        print(f'[ #] [Word]          [Tag]')
        for i, word in enumerate(self.words):
            print(f'[{i:{2}}] {word:{15}} {self.word_tag[i]}')
        
    def tag_words(self, liwc, ontology):
        """
        Identify aspects, sentiment and context changing words for a given document.
        """

        for pos, word in enumerate(self.words):

            # Check if word is context changing one (negation, amplifier or downtoner)
            if word in negation:
                self.word_tag.append('negation')
            elif word in amplifier:
                self.word_tag.append('amplifier')
            elif word in downtoner:
                self.word_tag.append('downtoner')

            else:
                # Search on the ontology for a matching aspect
                asp_check = ontology.search(word)

                # Check if word is an aspect
                if asp_check is not None:
                    self.aspect_pos[pos] = asp_check         # Mark the aspect position
                    self.word_tag.append('aspect')

                # Check if word is a sentiment word
                else:
                    # Search on LIWC for a polarity conotation
                    polarity = liwc.get_sentiment(word)

                    # Word is not a sentiment word
                    if polarity is None:
                        self.word_tag.append('')

                    # Attribute polarity value to the position
                    else:
                        self.word_tag.append(polarity)

    def compute_sentence(self):
        """
        Atribute polarity to aspects based on surround sentiment words context.

        Calculate the sentence limits in which the located aspects are, identify
        related sentiment words and atribute polarity value to the aspects based
        on the sentiments context.
        """

        punctuation = list(string.punctuation)

        # For each aspect position, define the sentence range
        for pos in self.aspect_pos.keys():
            sentiment_pos = []
            start = pos
            end = pos

            # Set sentence start
            while(start > 0 and self.words[start - 1] not in punctuation):
                start -= 1
                if self.word_tag[start] == -1 or self.word_tag[start] == 1:
                    sentiment_pos.append(start)

            # Set sentence end
            while(end < len(self.words) - 1 and self.words[end + 1] not in punctuation):
                end += 1
                if self.word_tag[end] == -1 or self.word_tag[end] == 1:
                    sentiment_pos.append(end)

                    
            # Store aspect sentence range
            self.aspect_context.setdefault(pos, []).append((start, end))

            # Get aspect's name
            aspect = self.aspect_pos.get(pos)
            
            # Compute the polarity for each sentiment word around the aspect
            for s_pos in sentiment_pos:
                
                # Store word 
                self.aspect_context.setdefault(pos, []).append(s_pos)

                # Compute the polarity for the sentiment word context
                context_polarity = self._get_context_polarity(s_pos)

                # Sum the context polarity with any previous one for the current aspect
                self.aspect_polarity[aspect] = self.aspect_polarity.get(aspect, 0) + context_polarity
            
            # In case there's no sentiment word around the aspect
            if len(sentiment_pos) == 0:
                self.aspect_polarity[aspect] = 0
                
    def _get_context_polarity(self, pos, word_range = 4):
        """
        Compute the context information around a sentiment word, given it's position 'pos' 
        and a range 'word_range' to lookup for.

        Return the sentiment word polarity based on the given word range.
        """
        
        reached_start = False
        reached_end = False

        f_amplifier = False
        f_downtoner = False
        f_negation = False

        # For each word inside 'word_range' around the sentiment
        for i in range(1, word_range + 1):

            # Mark valid 
            if pos - i >= 0 and pos + i < len(self.words):

                # Stop search if a punctuation mark found
                if not reached_start and self.words[pos - i] in list(string.punctuation):
                    reached_start = True
                if not reached_end and self.words[pos + i] in list(string.punctuation):
                    reached_end = True

                # Otherwise include word in context
                if not reached_start:
                    if self.word_tag[pos - i] == 'amplifier': f_amplifier = True
                    elif self.word_tag[pos - i] == 'downtoner': f_downtoner = True
                    elif self.word_tag[pos - i] == 'negation': f_negation = True
                if not reached_end:
                    if self.word_tag[pos + i] == 'amplifier': f_amplifier = True
                    elif self.word_tag[pos + i] == 'downtoner': f_downtoner = True
                    elif self.word_tag[pos + i] == 'negation': f_negation = True

        # Get the sentiment word polarity based on context 
        polarity = self._get_sentiment_polarity(pos, f_amplifier, f_downtoner, f_negation)

        return(polarity)

    def _get_sentiment_polarity(self, pos, f_amplifier, f_downtoner, f_negation): 
        """
        Return the sentiment word polarity given the word position 'pos' and 
        information about the context.
        """
    
        # Set a priori polarity
        polarity = self.word_tag[pos]

        # Algorith to calculate the overall sentiment
        if f_amplifier:
            if f_negation:
                polarity = polarity / 3
            else:
                polarity = polarity * 3
        elif f_downtoner:
            if f_negation:
                polarity = polarity * 3
            else:
                polarity = polarity / 3
        elif f_negation:
            polarity = -1 * polarity

        return(polarity)
    
    def print_aspect_data(self):
        """
        Show polarities associated to each aspect in review, based on context
        """
        
        print(f'[Aspect] \t[Overall polarity]')
        for key, item in self.aspect_polarity.items():
            print(f'\'{key}\' \t{item}')
            
    def print_aspect_context(self):
        """
        Show information about known aspect, respective context range, and associated
        sentiment words with it's positions.
        """
        
        for pos, info in self.aspect_context.items():
            print(f'\nFound ({self.aspect_pos.get(pos)}) as ({self.words[pos]}). Context {info[0]}')
            if len(info) == 1:
                print('   No sentiment word found.')
            else:
                print(f'   [Aspect]\t[Position]')
                for s_pos in info[1:]:
                    print(f'   {self.words[s_pos]}  \t{s_pos}')   

In [3]:
# Create a LIWC dictionary
liwc = LIWC(filename='../liwc_dictionaries/LIWC2007_Portugues_win.dic')

# Load ontology of aspects
ontology = Ontology('../ontologies/smartphone_aspects.owl')

# Load corpus data
text = 'Ótimo celular, desempenho e design espetaculares, superou minhas expectativas. Outra coisa que se destaca bastante é a bateria, com uma grande duração. Os fones que acompanham o celular são provavelmente os melhores que eu já utilizei, com uma qualidade sonora e um isolamento fenomenais. A samsung realmente inovou neste celular'
text2 = 'Adorei o celular, design muito bonito e moderno. Apesar disso, a bateria não dura muito.'
review = Document(text)

print('>> Input:\n', review.text, '\n')
print('>> Word tokenization:\n', review.words)

# Tag the review data using the dictionaries
review.tag_words(liwc, ontology)

# Parse the review to compute aspects polarities
review.compute_sentence()

>> Input:
 Ótimo celular, desempenho e design espetaculares, superou minhas expectativas. Outra coisa que se destaca bastante é a bateria, com uma grande duração. Os fones que acompanham o celular são provavelmente os melhores que eu já utilizei, com uma qualidade sonora e um isolamento fenomenais. A samsung realmente inovou neste celular 

>> Word tokenization:
 ['ótimo', 'celular', ',', 'desempenho', 'e', 'design', 'espetaculares', ',', 'superou', 'minhas', 'expectativas', '.', 'outra', 'coisa', 'que', 'se', 'destaca', 'bastante', 'é', 'a', 'bateria', ',', 'com', 'uma', 'grande', 'duração', '.', 'os', 'fones', 'que', 'acompanham', 'o', 'celular', 'são', 'provavelmente', 'os', 'melhores', 'que', 'eu', 'já', 'utilizei', ',', 'com', 'uma', 'qualidade', 'sonora', 'e', 'um', 'isolamento', 'fenomenais', '.', 'a', 'samsung', 'realmente', 'inovou', 'neste', 'celular']


In [4]:
# Show the review words and the respective tags 
review.print_data()

[ #] [Word]          [Tag]
[ 0] ótimo           1
[ 1] celular         aspect
[ 2] ,               
[ 3] desempenho      1
[ 4] e               
[ 5] design          aspect
[ 6] espetaculares   
[ 7] ,               
[ 8] superou         1
[ 9] minhas          
[10] expectativas    1
[11] .               
[12] outra           
[13] coisa           
[14] que             
[15] se              
[16] destaca         
[17] bastante        amplifier
[18] é               
[19] a               
[20] bateria         aspect
[21] ,               
[22] com             
[23] uma             
[24] grande          1
[25] duração         
[26] .               
[27] os              
[28] fones           
[29] que             
[30] acompanham      
[31] o               
[32] celular         aspect
[33] são             1
[34] provavelmente   
[35] os              
[36] melhores        1
[37] que             
[38] eu              
[39] já              
[40] utilizei        
[41] ,               
[42] com 

In [5]:
# Positions in review for every aspect found
print(review.aspect_pos)

{1: 'smartphone', 5: 'design', 20: 'bateria', 32: 'smartphone', 56: 'smartphone'}


In [6]:
# Relevant information obtained from each aspect context
review.print_aspect_context()     


Found (smartphone) as (celular). Context (0, 1)
   [Aspect]	[Position]
   ótimo  	0

Found (design) as (design). Context (3, 6)
   [Aspect]	[Position]
   desempenho  	3

Found (bateria) as (bateria). Context (12, 20)
   No sentiment word found.

Found (smartphone) as (celular). Context (27, 40)
   [Aspect]	[Position]
   são  	33
   melhores  	36

Found (smartphone) as (celular). Context (51, 56)
   [Aspect]	[Position]
   realmente  	53


In [7]:
# Polarities associated to each aspect in review (based on context)
review.print_aspect_data()

[Aspect] 	[Overall polarity]
'smartphone' 	4
'design' 	1
'bateria' 	0
