In [5]:
import pandas as pd
import helpers
from csv import writer 

In [2]:
def append_list_as_row(file_name, list_of_elem):
    with open(file_name, 'a+', newline='') as write_obj:
        csv_writer = writer(write_obj)
        csv_writer.writerow(list_of_elem)

In [6]:
data = pd.read_csv('data/arabic/data_1.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,rating,Arabic review,no,English review
0,0,5,كتاب رائع. اعتقد ان الروايه كلها تلخصت بجمله و...,0,A wonderful book. I think that the whole novel...
1,1,4,رواية تلامس الروح بعمقها، فخورة اني اخيرا لقيت...,1,"Novel touches the soul Bamgaha, proud that I f..."
2,2,5,رواية محكمة بكل اختصار. وكان الجزء المفضل بالن...,2,A novel court every shortcut. It was my favori...
3,3,3,هذا الكتاب يحزن مرا، ظلم واضطهاد عيسى بلا ذنب ...,3,"This book is sad bitter, injustice and persecu..."
4,4,4,رواية واقعية ورائعة تمثل أحداث تكررت كثيرا في ...,4,Novel and represent a realistic and fantastic ...


In [7]:
doc = [i+i for i in data['English review'][:10]]

In [9]:
# files names 
OPINIONS_FILE = 'opinions.csv'
FEATURES_FILE = 'features.csv'
FEATURES_OPINIONS_PAIRS = 'f_o_pairs.csv'

In [None]:
def feature_opinion_pairs(text,opinions_seed,lang):
    
    '''
    
    Generates a feature-opinion pairs from a given text
    
    Parameters:
    
         text(str): a string that contains all the sentences combined (concatinated)
         opinion_seed(list(str)): a list of initlal opinion words
         lang(str): use 'en' for english and 'ar' for arabic
         
    Returns:
    
         (opinions(set(str)),features(set(str)),feature_opinion_pairs(set((str,str)))): a tuple of 3 objects of that contains the set of opitions, features and feature-opinion pairs
    
    
    '''
    
    # Initiate Stanza library objects
    import stanza
    stanza.download(lang)       # This downloads the English models for the neural pipeline
    nlp = stanza.Pipeline(lang) # This sets up a default neural pipeline in the specified language
    doc = nlp(text)             # This initiates the document object that contains the word dependencies 
    
    # initiate the sets 
    opinions = set()
    features = set()
    feature_opinion_pairs = set()
    
    # constants 
    mr = ['mod','nsubj','obj','pnmod','amod'] #relevent relationships between words of a sentences 
    jj = ['JJ','JJR','JJS'] #adjective POS
    nn = ['NN','NNS'] #noune POS

    # Double propagation algorithm from (Qiu, G., Liu, B., Bu, J., & Chen, C. (2009, July). Expanding domain sentiment lexicon through double propagation. In IJCAI (Vol. 9, pp. 1199-1204). Chicago)  
    has_new = True
    while has_new:

        has_new = False 

        for sent in doc.sentences:
            for i,dep in enumerate(sent.dependencies):

                dep_type = dep[1]
                child = dep[2]
                parent = dep[0]

                ### A Single Dependency ###

                # R1-1 (see paper (Table 1) for details)
                if child.text.lower() in (opinions_seed or opinions) and dep_type == 'conj' and child.xpos in jj:
                    # add it to list
                    if child.text.lower() not in opinions:
                        #print('R1a:Opinion',child.text.lower())
                        opinions.add(child.text.lower())
                        append_list_as_row(OPINIONS_FILE,[child.text.lower()])
                        has_new = True


                # R2-1 (see paper (Table 1) for details)
                if parent.text.lower() in features and dep_type in mr and child.xpos in jj:
                    # add it to list
                    if child.text.lower() not in opinions:
                        #print('R2a:Opinion',child.text.lower())
                        opinions.add(child.text.lower())
                        append_list_as_row(OPINIONS_FILE,[child.text.lower()])
                        has_new = True

                    if (parent.text.lower(),child.text.lower()) not in feature_opinion_pairs:
                        feature_opinion_pairs.add((parent.text.lower(),child.text.lower()))
                        append_list_as_row(FEATURES_OPINIONS_PAIRS,[(parent.text.lower(),child.text.lower())])
                        #print('R2a(%s:%s,%s:%s)'%(parent.text.lower(),parent.xpos,child.text.lower(),child.xpos))
                        has_new = True

                elif child.text.lower() in features and dep_type in mr and parent.xpos in jj:
                    # add it to list
                    if parent.text.lower() not in opinions:
                        #print('R2a:Opinion',parent.text.lower())
                        opinions.add(parent.text.lower())
                        append_list_as_row(OPINIONS_FILE,[child.text.lower()])
                        has_new = True

                    if (child.text.lower(),parent.text.lower()) not in feature_opinion_pairs:
                        feature_opinion_pairs.add((child.text.lower(),parent.text.lower()))
                        append_list_as_row(FEATURES_OPINIONS_PAIRS,[(parent.text.lower(),child.text.lower())])
                        #print('R2a(%s:%s,%s:%s)'%(child.text.lower(),child.xpos,parent.text.lower(),parent.xpos))
                        has_new = True


                # R3-1 (see paper (Table 1) for details)
                if child.text.lower() in (opinions_seed or opinions) and dep_type in mr and parent.xpos in nn:
                    if parent.text.lower() not in features:
                        #print('R3a:Feature',parent.text.lower())
                        features.add(parent.text.lower())
                        append_list_as_row(FEATURES_FILE,[parent.text.lower()])
                        has_new = True

                    if (parent.text.lower(),child.text.lower()) not in feature_opinion_pairs:
                        feature_opinion_pairs.add((parent.text.lower(),child.text.lower()))
                        append_list_as_row(FEATURES_OPINIONS_PAIRS,[(parent.text.lower(),child.text.lower())])
                        #print('R3a',(parent.text.lower(),child.text.lower()))
                        has_new = True

                # R4-1 (see paper (Table 1) for details)
                if parent.text.lower() in features and dep_type == 'conj' and parent.xpos in nn:
                    if parent.text.lower() not in features:
                        #print('R4a:Feature',parent.text.lower())
                        features.add(parent.text.lower())
                        append_list_as_row(FEATURES_FILE,[parent.text.lower()])
                        has_new = True

                ### 2 Double Dependency ###     

                dep_i = dep
                h_i = parent.text
                dep_type_i = dep_type
                word_i = child.text
                xpos_i = child.xpos

                #print('i:%s,word_i:%s,h_i:%s,xpos_i:%s,dep_type_i:%s'%(i,dep[2].text,h_i,xpos_i,dep_type_i))


                for j,dep_j in enumerate(sent.dependencies):


                    dep_j = sent.dependencies[j]
                    h_j = dep_j[0].text
                    dep_type_j = dep_j[1]
                    word_j = dep_j[2].text
                    xpos_j = dep_j[2].xpos

                    #print('j:%s,word_j:%s,h_j:%s,xpos_j:%s,dep_type_j:%s'%(i,dep_j[2].text,h_j,xpos_j,dep_type_j))


                    #R1-2 (see paper (Table 1) for details)    
                    if word_i.lower() in (opinions_seed or opinions) and h_i == h_j and dep_type_i == dep_type_j and xpos_j in jj:
                        if word_i.lower() not in opinions:
                            opinions.add(word_i.lower())
                            append_list_as_row(OPINIONS_FILE,[word_i.lower()])
                            #print('R1b:Opinion',word_i.lower())
                            has_new = True

                    #R2-2 (see paper (Table 1) for details)
                    if word_j.lower() in features and h_i == h_j and (dep_type_i in mr or dep_type_j in mr) and xpos_i in jj:
                        # add it to list
                        if word_i.lower() not in opinions:
                            #print('R2b:Opinion',word_i)
                            opinions.add(word_i.lower())
                            append_list_as_row(OPINIONS_FILE,[word_i.lower()])
                            has_new = True

                        if ((word_j.lower(),word_i.lower())) not in feature_opinion_pairs:
                            feature_opinion_pairs.add((word_j.lower(),word_i.lower()))
                            append_list_as_row(FEATURES_OPINIONS_PAIRS,[(word_j.lower(),word_i.lower())])
                            #print('R2b',((word_j.lower(),word_i.lower())))
                            has_new = True

                    #R3-2 (see paper (Table 1) for details)
                    if word_i.lower() in (opinions_seed or opinions) and h_i == h_j and (dep_type_i in mr or dep_type_j in mr) and xpos_j in nn:

                        if word_j.lower() not in features:
                            #print('R3b:Feature',word_j)
                            features.add(word_j.lower())
                            append_list_as_row(FEATURES_FILE,[word_j.lower()])
                            has_new = True

                        if ((word_j.lower(),word_i.lower())) not in feature_opinion_pairs:
                            feature_opinion_pairs.add((word_j.lower(),word_i.lower()))
                            append_list_as_row(FEATURES_OPINIONS_PAIRS,[(word_j.lower(),word_i.lower())])
                            #print('R3b',((word_j.lower(),word_i.lower())))
                            has_new = True

                    #R4-2 (see paper (Table 1) for details)
                    if word_j.lower() in features and h_i == h_j and dep_type_i == dep_type_j and xpos_i in nn:
                        # add it to list
                        if word_j.lower() not in features:
                            #print('R4b:Feature',word_j)
                            features.add(word_j.lower())
                            append_list_as_row(FEATURES_FILE,[word_j.lower()])
                            has_new = True
    
    
    
    
    #return results
    return (opinions,features,feature_opinion_pairs)