In [1]:
import re
from nltk import Tree
from pycorenlp import StanfordCoreNLP
import json
import nltk
from nltk.sem.logic import *
import requests
from nltk.corpus import sentiwordnet as swn
nltk.download('sentiwordnet')
import os
from IPython.display import clear_output
import numpy as np

read_expr = nltk.sem.Expression.fromstring
nlp = StanfordCoreNLP('http://localhost:9000')
dependency_parser = nlp.annotate

[nltk_data] Downloading package sentiwordnet to C:\Users\Farza
[nltk_data]     Nurifan\AppData\Roaming\nltk_data...
[nltk_data]   Package sentiwordnet is already up-to-date!


Babelfy WSD method

In [2]:
def babelfy(sentence):
    response = []
    token_word = nltk.word_tokenize(sentence)
    url = 'https://babelfy.io/v1/disambiguate?text='+sentence+'&annRes=WN&lang=en&key=57d8ee6f-01f0-46f4-ac1c-0468fb0aae3a'
    r = requests.get(url)
    
    res = r.json()
    
    for x in res:
        tokenFragment = x['tokenFragment']
        startTknFragment = tokenFragment['start']
        endTknFragment = tokenFragment['end']
        babelSynsetID = x['babelSynsetID'];
        response.append((token_word[startTknFragment], babel_info(babelSynsetID)))
    
    return response

def babel_info(synset_id):
    url = 'https://babelnet.io/v5/getSynset?id='+synset_id+'&key=57d8ee6f-01f0-46f4-ac1c-0468fb0aae3a'
    r = requests.get(url)
    res = r.json()

    return '.'.join(res['mainSense'].split('#'))
    

def get_score(sentence):
    resp = babelfy(sentence)
    ss = []
    for (w, sy) in resp:
        swn_senti = swn.senti_synset(sy)
        ss.append((w, sy, swn_senti.pos_score(), swn_senti.neg_score() ))
        
    return ss

lexicon method

In [3]:
positive_lexicon = []
negative_lexicon = []

def read_lexicon():
    global positive_lexicon;
    global negative_lexicon;
    
    with open(os.path.join(os.path.abspath('opinion-lexicon-English/') , 'positive-words.txt'), 'r') as file:
        line = file.readline();
        while ";" in line:
            line = file.readline();
         
        positive_lexicon = file.readlines()
    
    with open(os.path.join(os.path.abspath('opinion-lexicon-English/') , 'negative-words.txt'), 'r', encoding = "ISO-8859-1") as file:
        line = file.readline();
        while ";" in line:
            line = file.readline();
        
        negative_lexicon = file.readlines()
        
    positive_lexicon = list(map(lambda word: word.rstrip("\n\r"), positive_lexicon))
    negative_lexicon = list(map(lambda word: word.rstrip("\n\r"), negative_lexicon))
    
        
read_lexicon()

In [4]:
intensifier_adverb = ['absolutely', 'completely', 'extremely', 'highly', 'rather', 'really', 'very', 'so', 'too', 'totally', 'utterly', 'at all']
negate_adverb = ['no', 'not', 'never', 'none', 'nobody']

In [5]:
def parser(expression):
    old = expression.replace(' ', '_').replace('>_(', '> (').replace(')_(', ') (').replace(')_)', ') )').replace(')_)', ') )').replace(' (', '(')
    new = ''
    flag = False
    for x in range(0, len(old) - 1):        
        if old[x] == '<':
            flag = True
        if old[x] == '>':
            flag = False
            
        if flag == True:
            if old[x] == '(':
                new += '{'
            elif old[x] == ')':
                new += '}'
            else:
                new += old[x]
        else:
            new += old[x]
    new += old[len(old)-1]
    return new


def pos_tag(sentence):
    result = dependency_parser(sentence, properties={"outputFormat": "json", "annotators": "pos"})['sentences']

    res = []
    for iterate in result:
        for pos in iterate['tokens']:
            res.append(pos['pos'])
    return res


def insert_pos_tag(exp, pos, nltk_pos):
    count = 0
    res = exp[0]
    for x in range(1, len(exp)):
        if exp[x-1] == 'O' and exp[x] == 'S' and exp[x+1]==' ' and exp[x+2] == 'P':
            res += 'S '
            res += pos[count] + ' ' + nltk_pos[count][1]
            count += 1
        else:
            res += exp[x]  
    return res


def direction(exp):
    cont = False
    for x in exp:
        if x == '{':
            cont = True
        elif x == '}':
            cont = False
            continue
        if cont == True:
            continue
        if x == '/':
            return '/'
        elif x == '\\':
            return '\\'
    return False


def is_type_raising(tree):
    tree_string = str(tree)
    
    # check type raising
    exp = tree_string.split('_')[1]
    pattern_1 = r'(.*?)\\(.*?){(.*?)/(.*?)}'
    pattern_2 = r'(.*?)/(.*?){(.*?)\\(.*?)}'
    
    match = False
    if re.search(pattern_1, exp):
        match = True
    elif re.search(pattern_2, exp):
        match = True
        
    sub = []
    for subtree in tree:
        sub.append(subtree)
    if len(sub) == 1 and match:
        return True
    else:
        return False

    
def map_wnpos_to_pennpos(pos):
    if(pos == 'n'):
        return 'NN'
    elif(pos == 'a'):
        return 'JJ'
    elif(pos == 'v'):
        return 'VB'
    elif(pos == 'r'):
        return 'RB'
    
    
def find_word_in_swn(swn_score, word):
    for (wordd, synset, pos_score, neg_score) in swn_score:
        if(wordd == word):
            return (wordd, synset, pos_score, neg_score)
    
    
def polarity_with_score(pos, neg, word=None, method='swn'):
    if(method == 'swn'):
        if(pos > neg):
            return ('P', round(pos * 10))
        elif(pos < neg):
            return ('Ne', round(neg*10))
        else:
            return ('N', 0)
    elif(method == 'lexicon'):
        if word in positive_lexicon:
            return ('P', 1)
        elif(word in negative_lexicon):
            return ('Ne', 1)
        else:
            return ('N', 0)
    
    
def adverb_type(word):
    if (word in intensifier_adverb):
        return 'I'
    elif(word in negate_adverb):
        return 'Ne'
    else:
        return 'N'
    
    
def pos_majority_voting(corenlp, nltk, babelfy):
    pos = {}
    
    if(corenlp in pos):
        pos[corenlp] += 1
    else:
        pos[corenlp] = 0  
    
    if(nltk in pos):
        pos[nltk] += 1
    else:
        pos[nltk] = 0
        
    if(babelfy in pos):
        pos[babelfy] += 1
    else:
        pos[babelfy] = 0
        
    #find biggeest counter in pos 
    return sorted(pos.items(), key=lambda x: x[1], reverse=True)[0][0];


def chunk(tree):
    # ini cuma masukin ke array 
    chunk = []
    chunk3 = []
    
    for subtree in tree:
        if type(subtree) == nltk.tree.Tree:
            
            # chunk temp array
            subtree_str_array = str(subtree).split('_')
            if subtree_str_array[0][2] == 'L':
                if subtree_str_array[3] == 'NN' or subtree_str_array[1] == 'N/N':
                    chunk.append(subtree_str_array[6])
                    if len(chunk3) == 0:
                        chunk3.append(subtree_str_array[6])
                    
            if len(chunk3) == 1:
                for sub in subtree:
                    # chunk temp array
                    subtree_str_array3 = str(sub).split('_')
                    if subtree_str_array3[0][2] == 'L':
                        if subtree_str_array3[3] == 'NN':
                            chunk3.append(subtree_str_array3[6])
    
    # chunk noun phrase
    if len(chunk) == 2:
        chunk_str = '+'.join(chunk)
        return True, read_expr(r'(' + chunk_str + '_NN_N_0)')
    
    if len(chunk3) == 3:
        chunk_str = '+'.join(chunk3)
        return True, read_expr(r'(' + chunk_str + '_NN_N_0)')
    
    return False, None


def one_child(tree, swn_score):    
    sub = []
    for subtree in tree:
        if type(subtree) == nltk.tree.Tree:
            sub.append(subtree)
    if len(sub) == 1:
        return True, lambda_calculus(sub[0], swn_score), sub
    
    return False, None, sub


def type_raising(first, second, swn_score):    
    if is_type_raising(first):
        if direction(str(first).split('_')[1]) == '/':
            x = read_expr(r'\F X.F(X, ' + str(lambda_calculus(first, swn_score)) + ')')
            y = lambda_calculus(second, swn_score)
            return True, ApplicationExpression(x, y).simplify()
    
    if is_type_raising(second):
        if direction(str(second).split('_')[1]) == '/':
            x = lambda_calculus(first, swn_score)
            y = read_expr(r'\F X.F(X, ' + str(lambda_calculus(second, swn_score)) + ')')
            return True, ApplicationExpression(x, y).simplify()
        
    return False, None
    

def rule_var(ccg, corenlp_pos, nltk_pos, word, swn_score):
    r_word = ['PRP', 'FW', 'NN', 'LS', 'NNS']
    word_swn_score = find_word_in_swn(swn_score, word) if swn_score else None
    pos_score = 0
    neg_score = 0
    babelfy_pos = None;

    if(word_swn_score):
        word, synset, pos_score, neg_score = word_swn_score
        splitted_syns = synset.split('.');
        babelfy_pos = map_wnpos_to_pennpos(splitted_syns[1]);
        
    polarity, score = polarity_with_score(pos_score, neg_score, word, method="lexicon")
    pos = pos_majority_voting(corenlp_pos, nltk_pos, babelfy_pos)
    
    return rule_leaf(ccg, r_word, pos, word, polarity, score)


def rule_leaf(ccg, r_word, pos, word, polarity, score):
    if pos == 'CC':
        return read_expr(r'CC')
    elif pos in r_word:
        return read_expr(word + '_' + pos + '_' + polarity + '_' + str(score))
    elif 'JJ' in pos:
        return read_expr(word + '_' + 'JJ' + '_' + polarity + '_' + str(score))
    elif 'VB' in pos:
        if '{S[dcl]\\NP}/{S[adj]\\NP}' in ccg:
            return read_expr(r'\X.X')
        else:
            return read_expr(r'\X.' + word + '_' + 'VB' + '_' + polarity + '_' + str(score) +'(X)')
    elif 'RB' in pos:
        #Adverb has three types. I: Intensifier, Ne: Negation, N: no affection toward sentiment
        return read_expr(word + '_' + 'RB' + '_' + adverb_type(word) + '_' + polarity + '_' + str(score))
    #elif word == 'of':
   #     return read_expr('of')
    else:
        return read_expr(r'\X.X')

    
def lambda_calculus(tree, swn_score=None):
    tree_string = str(tree)
    
    # leaf
    if tree_string[2] == 'L':
        splitted = tree_string.split('_')
        ccg = splitted[1]
        corenlp_pos = splitted[3]
        nltk_pos = splitted[4]
        word = splitted[6]
        
        return rule_var(ccg, corenlp_pos, nltk_pos, word, swn_score)
    
    # chunk noun phrase
    is_true, res = chunk(tree)
    if is_true:
        return res
            
    # anak 1
    is_true, res, sub = one_child(tree, swn_score)
    if is_true:
        return res
                        
    # urutan operasi lambda calculusnya    
    first = sub[0]
    second = sub[1]
    
    # type raising
    is_true, res = type_raising(first, second, swn_score)
    if is_true:
        return res    
    
    # urutan
    length_1 = len(str(sub[0]).split('_')[1].replace('\\', '/').split('/'))
    length_2 = len(str(sub[1]).split('_')[1].replace('\\', '/').split('/'))
    if length_2 > length_1:
        first = sub[1]
        second = sub[0]
    
    # rekursi
    return deduction(lambda_calculus(first, swn_score), lambda_calculus(second, swn_score))

In [6]:
def bool_var(str_a, str_b):
    is_adj_in_a = re.search(r'JJ.*', str_a)
    is_adj_in_b = re.search(r'JJ.*', str_b)
    
    is_adverb_in_a = re.search(r'RB.*', str_a)
    is_adverb_in_b = re.search(r'RB.*', str_b)    
    
    is_noun_in_b = re.search(r'\w*?\+?\w*?\+?\w*_NN.*?_\w*', str_b)
    is_verb_in_b = re.search(r'VB.*?_\w*_\d*', str_b)
    
    return is_adj_in_a, is_adj_in_b, is_adverb_in_a, is_adverb_in_b, is_noun_in_b, is_verb_in_b


def rule_and(str_a, str_b, is_noun_in_b, is_adj_in_a):
    a, b = None, None
    if (str_b == 'CC'):
        a = read_expr(str_b)
        b = read_expr(str_a)
        return True, a, b
        
    if (re.search(r'CC\(', str_a)) and (re.search(r',', str_a)) and is_noun_in_b and is_adj_in_a:
        a = read_expr(r'\X.X')
        b = read_expr(replacer(str_a, str_b))
        return True, a, b
    
#     if is_adj_in_a and is_noun_in_b :
#         a = read_expr(r'\x.x')
#         b = read_expr(replacer(str_a, str_b))
#         return True, a, b
        
    
    return False, None, None


def rule_seq(str_a, str_b):
    a, b = None, None
    r_word = ['PRP', 'FW', 'NN', 'LS', 'JJ']
    if '(' not in str_a and '(' not in str_b:
        x_both = False
        x_any = False
        y_any = False
        
        for r in r_word:
            if r in str_a and r in str_b:
                x_both = True
                
            if r in str_a:
                x_any = True
            if r in str_b:
                y_any = True

        if x_both:
            a = read_expr(r'\X.X')
            b = read_expr('CC(' + str_a + ',' + str_b + ')')
            return True, a, b
        
        if x_any and y_any:
            a = read_expr(r'\x.x')
            b = read_expr('seq(' + str_a + ',' + str_b + ')')
            return True, a, b
        
        
    
    return False, None, None



def ini_fungsi_mas_ari_tolong_dipecah_lagi(str_a, str_b, is_adj_in_a, is_adj_in_b, is_adverb_in_a, is_adverb_in_b, is_noun_in_b, is_verb_in_b):
    a, b = None, None
    
    #lupa kondisi gmn
    if is_adverb_in_a and (is_noun_in_b and (not is_verb_in_b)):
        a = read_expr(r'\X.X')
        b = read_expr(str_b)
        return True, a, b

    elif(is_adj_in_a and is_noun_in_b):
        pattern = '\w*_JJ.*?_\w*_\d'
        adjective_score = re.findall(pattern, str_a)[0].split('_')[3]
        sentiment_polarity = re.findall(pattern, str_a)[0].split('_')[2]

        #get all nouns
        all_nouns = re.findall(r'\w*?\+?\w*?\+?\w*_NN.*?_\w*', str_b);
        #change sentiment and polarity
        def mapFunction(data):
            idx, x = data
            if(idx == 2):
                return sentiment_polarity
            elif(idx == 3):
                return adjective_score
            else:
                return x
        for index, noun_str_b in enumerate(all_nouns):
            #noun_str_b = str_b[ is_noun_in_b.start() : is_noun_in_b.end() ] 
            noun_update_str_b = '_'.join( list(map(mapFunction, enumerate(noun_str_b.split('_')))) )
            #change str_b for noun filtered with x
            start_index = str_b.index(noun_str_b)
            end_index = start_index + len(noun_str_b);
            list_str_b = list(str_b)
            list_str_b[start_index : end_index] = 'X'+str(index)
            str_b = "".join(list_str_b)
            str_b = str(ApplicationExpression(read_expr(r"\X"+str(index)+"."+str_b), read_expr(noun_update_str_b)).simplify())
       
        
        a = read_expr(r'\X.X')
        b = read_expr(str_b)
        
        return True, a, b
    
    #change identity function for adverb.
    elif( is_adverb_in_a and is_adj_in_b or is_adverb_in_a and is_verb_in_b ):
        #adverb modify adjective
        #example very_RB_I excellent_JJ_P_10 = excellent_JJ_P_20
        #I Intensifier must *2
        #N Negate must *-1
        adverb_type = re.findall('RB.*?_\w.*?', str_a)[0].split('_')[1]

        if(adverb_type == 'N'):
            a = read_expr(r'\X.X')
            b = read_expr(str_b)
        elif(adverb_type == 'Ne'):
            #for negation adverb just change polairty
            def mapFunction(data):
                idx, x = data
                if(idx == 2):
                    if(x == 'P'):
                        return 'Ne'
                    elif(x == 'Ne'):
                        return 'P'
                    else:
                        return x
                else:
                    return x

            str_b = '_'.join( list(map(mapFunction, enumerate(str_b.split('_')))) )
            a = read_expr(r'\X.X')
            b = read_expr(str_b)
            
        elif(adverb_type == 'I'):
            #for intensifier adverb. scale adjective value 
            
            # ini bener?
            jj_s = re.findall('JJ.*_\w*_\d*', str_b)
            vb_s = re.findall('VB.*_\w*_\d*', str_b)
            score = '0'
            
            if jj_s:
                score = jj_s[0].split('_')[2]
            else:
                score = vb_s[0].split('_')[2]
                
            def mapFunction(data):
                if(data == score):
                    return str(int(score) * 2);
                else:
                    return data;
            list_str_b = list(str_b)
            str_b = ''.join( list(map(mapFunction, list_str_b)))

            a = read_expr(r'\X.X')
            b = read_expr(str_b)
            
        return True, a, b
        
    elif( is_adverb_in_a and is_adverb_in_b):
        #adverb modify other adverb
        #kondisi yang jarang bertemu
        adverb_type_in_a = re.findall('RB.*_\w*', str_a)[0].split('_')[1]
        adverb_type_in_b = re.findall('RB.*_\w*', str_b)[0].split('_')[1]
        
        if(adverb_type_in_a == 'Ne' and adverb_type_in_b == 'I'):
            #ganti type b menjadi NE
            def mapFunction(data):
                idx, x = data
                if(idx == 2):
                    return 'Ne'
                else:
                    return x

            str_b = '_'.join( list(map(mapFunction, enumerate(str_b.split('_')))) )
            a = read_expr(r'\X.X')
            b = read_expr(str_b)

        elif(adverb_type_in_a == 'I' and adverb_type_in_b == 'Ne'):
            #tidak meruba
            a = read_expr(r'\X.X')
            b = read_expr(str_b)
        else:
            a = read_expr(r'\X.X')
            b = read_expr(str_b)
            
        return True, a, b

    return False, None, None


def deduction(a, b):
    str_a = str(a)
    str_b = str(b)
    print('old a ' + str_a)
    print('old b ' + str_b)
    
    if str_a != '\X.X' and str_b == '\X.X':
        a = read_expr(str_b)
        b = read_expr(str_a)
    
    #change identity function for adjective    
    is_adj_in_a, is_adj_in_b, is_adverb_in_a, is_adverb_in_b, is_noun_in_b, is_verb_in_b = bool_var(str_a, str_b)

    # fungsi mas ari
    is_true, a_temp, b_temp = ini_fungsi_mas_ari_tolong_dipecah_lagi(str_a, str_b, is_adj_in_a, is_adj_in_b, is_adverb_in_a, is_adverb_in_b, is_noun_in_b, is_verb_in_b)
    if is_true:
        a = a_temp
        b = b_temp
    print('old a1 ' + str(a))
    print('old b1 ' + str(b))
    
    
    # and
    is_true, a_temp, b_temp = rule_and(str_a, str_b, is_noun_in_b, is_adj_in_a)
    if is_true:
        a = a_temp
        b = b_temp
    
    print('old a2 ' + str(a))
    print('old b2 ' + str(b))
    # sequence
    is_true, a_temp, b_temp = rule_seq(str_a, str_b)
    if is_true:
        a = a_temp
        b = b_temp  
    
    print('old a3 ' + str(a))
    print('old b3 ' + str(b))
    
    str_a = str(a)
    str_b = str(b)
    print('new a ' + str_a)
    print('new b ' + str_b)
    print('hasil ' + str(ApplicationExpression(a, b).simplify()))
    print()
    return ApplicationExpression(a, b).simplify()


In [7]:
def replacer(a, b):
    pattern = '\w*?\+?\w*?\+?\w*_NN.*?_'
    mereplace = re.findall(pattern, b)
    res = ''
    for mreplace in mereplace:
        pattern = '\w*_JJ.*?_'
        hasil = re.findall(pattern, a)
        if hasil:
            res = a.replace(hasil[0], mreplace)
        if len(hasil) > 1:
            for index in range(1, len(hasil)):
                res = res.replace(hasil[index], mreplace)
    return res

In [8]:
def preprocessing(sentence):
    res = re.sub(' +', ' ', re.sub(r'[^\w\s]','',sentence.replace("'m", "am").replace("n't", "not").replace("'s", '')))
    return res

In [9]:
def glue_process(sent):
    sents = nltk.sent_tokenize(sent);
    resp = [];
    for sent in sents:
        preprocessed = preprocessing(sent)
    
        url = "http://localhost:5000/ccgParsing"
        data = {"sent": preprocessed}
        r = requests.post(url, data=data)

        res = r.json()

        from_res = res['tree']

        text = nltk.word_tokenize(preprocessed)
        nltk_pos = nltk.pos_tag(text)

        pos_tagged = insert_pos_tag(from_res, pos_tag(data['sent']), nltk_pos)
        hasil = parser(pos_tagged)

        tree = Tree.fromstring(hasil)
        #swn_score = get_score(preprocessed)

        print(tree)
        resp.append(lambda_calculus(tree))
    
    if(len(resp) > 1):
        a = read_expr(r'\X.NSS(X)')
        for r in resp:
            a = ApplicationExpression(a, r).simplify()
        return a
    else:
        return resp[0]
        

In [10]:
import pandas as pd

def semua(collection, targets, filename, err_filename):
    index = -1
    df = pd.DataFrame(columns=['index','sentence', 'lambda', 'raw_aspect', 'sentiment', 'aspect',  'target'])
    df_err = pd.DataFrame(columns=['index', 'sentence'])
    for data in collection:
        index += 1
        print(index)
        target = targets[index]
        if type(target) is not str:
            target = ''
        try:
            hasil = glue_process(data)
        
            #pattern = '(\w*?\+?\w*_NN_[P|Ne]_\d)'
            pattern1 = '\w*?\+?\w*?\+?\w*_NN_P_\d'
            pattern2 = '\w*?\+?\w*?\+?\w*_NN_Ne_\d'
            
            aspek1 = re.findall(pattern1, str(hasil))
            aspek2 = re.findall(pattern2, str(hasil))
            aspek = aspek1 + aspek2
            aspect = []
            sentiment = []
            temp = ''
            for asp in aspek:
                temp = asp.split('_')
                aspect.append(temp[0].replace('+', ' '))
                sentiment.append(1 if temp[2] == 'P' else 0)

            #print('------------------------')
            #print(aspect, sentiment)
            #print('------------------------')
            df = df.append({'index': index, 'sentence': data, 'lambda': hasil,'raw_aspect': aspek, 'sentiment': sentiment, 'aspect': aspect, 'target': target}, ignore_index=True)
        except:
            df_err = df_err.append({'index': index, 'sentence': data}, ignore_index=True)
        clear_output()
    df.to_csv(filename)
    df_err.to_csv(err_filename)
    return df

In [11]:
# input file
df = pd.read_csv("dataset.csv")
# preprocess
sentences = df['review']
targets = df["aspect"]

semua(sentences[0:596], targets, 'hasil_ccg.csv', 'hasil_ccg_error.csv')

Unnamed: 0,index,sentence,lambda,raw_aspect,sentiment,aspect,target
0,0,i recently purchased the canon powershot g3 an...,"CC(satisfied_VB_P_1(purchase_NN_N_0),purchased...",[],[],[],canon powershot g3[+3]
1,1,"the camera is very easy to use , in fact on a ...",was_VB_N_0(asked_VB_N_0(take_VB_N_0(vacationin...,"[elderly+group_NN_P_1, picture_NN_P_1, past+we...","[1, 1, 1, 1, 1]","[elderly group, picture, past week, recent tri...",use[+2]
2,2,"after i took their picture with their camera ,...","camera_NN_N_0(took_VB_N_0(picture_NN_N_0),i_FW...",[],[],[],
3,3,"i just told them , press halfway , wait for th...","told_VB_N_0(them_PRP_N_0,wait_VB_N_0(box_NN_N_...",[],[],[],
4,4,they fired away and the picture turned out qui...,"NSS(CC(turned_VB_N_0(picture_NN_N_0),fired_VB_...",[],[],[],picture[+2]
5,5,a few of my work constituants owned the g2 and...,CC(recommended_VB_P_1(CC(picture+quality_NN_N_...,[work_NN_P_1],[1],[work],picture quality[+1]
6,6,i 'm easily enlarging pictures to 8 1/2 x 11 w...,am_VB_N_0(CC(super+fine_NN_N_0(using_VB_N_0(CC...,[setting_NN_P_1],[1],[setting],picture quality[+1]
7,7,"ensure you get a larger flash , 128 or 256 , s...",CC(ll_VB_N_0(want_VB_N_0(larger+flash+card_NN_...,[pinch_NN_Ne_1],[0],[pinch],
8,8,"bottom line , well made camera , easy to use ,...",camera_NN_N_0(use_VB_N_0(include_VB_N_0(use_VB...,"[lense_NN_P_1, flash_NN_P_1, ability_NN_P_1]","[1, 1, 1]","[lense, flash, ability]","camera[+2], use[+2], feature[+1]"
9,9,i 'd highly recommend this camera for anyone w...,d_FW_N_0(is_VB_N_0(looking_VB_N_0(CC(flexibili...,"[ease_NN_P_1, quality_NN_P_1]","[1, 1]","[ease, quality]","picture quality[+3], use[+1], option[+1]"


In [18]:
import math
import pandas as pd

hasil = pd.read_csv("hasil_ccg.csv")
target = hasil['target']
aspek = hasil['aspect']
raw_aspek = []

true = 0
tp = 0
tn = 0
fp = 0
fn = 0

false = 0
count = 0
for x in range(0,596):
    array_aspek = aspek[x].replace("'",'').replace('[', '').replace(']', '').split(', ')
    if array_aspek[0] == '':
        array_aspek = []
    else:
        for a in array_aspek:
            raw_aspek.append(a)
    
    tipe_aspek = False if len(array_aspek) == 0 else True
    tipe_target = type(target[x]) is str
    count+=1
    array_target = []
    if tipe_target:
        array_target = target[x].split(', ')
    
    pattern = '\w.*(?=\[)'
    flag = False
    if tipe_aspek == tipe_target and tipe_aspek == True:
        for t in array_target:
            k = re.findall(pattern, t)
            if len(k) > 0:
                if k[0] != '':
                    for x in array_aspek:
                        if k[0] in x:
                            tp += 1
                            flag = True
                            break
                        
                    
    if flag == True:
        continue
    elif tipe_aspek == tipe_target and tipe_aspek == False:
        tn += 1
    elif tipe_aspek == False:
        fn += 1
    else:
        fp += 1
        
print(tp, tn, fp, fn, tp+tn+fp+fn, tp/(tp+fp), tp/(tp+fn), (tp+tn)/(tp+tn+fp+fn), count)

35 281 138 143 597 0.2023121387283237 0.19662921348314608 0.5293132328308208 596


In [21]:
targets = hasil['target']
target = []


for t in targets:
    if t is not np.nan:
        for s in t.split(', '):
            for x in s.split(','):
                jj = re.sub(r'\[[+|-]\d\]', '',x)
                jjs = re.sub(r'\[\w\]', '',jj)
                if(jjs):
                    target.append(jjs);

In [22]:
len(list(dict.fromkeys(target)))

106

In [23]:
37/106

0.3490566037735849

In [25]:
def calculate_precision_recall(aspect, target):
    tp = 0
    """      
    
    for a in aspect: 
        if a in target:
             tp += 1
   
    """
    for t in target: 
        for a in aspect:
            zz = t.split(' ')
            vv =0
            for z in zz:
                if z in a:
                    vv+=1
            if len(zz) == vv:
                tp+=1
                break
    
    P = (tp * 1.0) / (len(aspect) * 1.0)
    R = (tp * 1.0) / (len(target) * 1.0)
#     print(P, len(aspect))
#     print(R, len(target))
    
    f1 = 2.0 * P * R / (P+R)
    
    return P, R, f1

In [26]:
calculate_precision_recall(list(dict.fromkeys(raw_aspek)), list(dict.fromkeys(target)))

(0.30158730158730157, 0.5377358490566038, 0.38644067796610165)

In [157]:
glue_process("whether you are a novice or an expert, its ease of use and functionality goes together . ")

(<T_S[dcl]_0_2>
  (<T_S/S_0_2>
    (<L_{S/S}/S[dcl]_POS_IN_IN_POS_whether_{S/S}/S[dcl]> )
    (<T_S[dcl]_0_2>
      (<L_NP_POS_PRP_PRP_POS_you_NP> )
      (<T_S[dcl]\NP_0_2>
        (<L_{S[dcl]\NP}/NP_POS_VBP_VBP_POS_are_{S[dcl]\NP}/NP> )
        (<T_NP_0_2>
          (<T_NP_0_2>
            (<L_NP[nb]/N_POS_DT_DT_POS_a_NP[nb]/N> )
            (<L_N_POS_NN_NN_POS_novice_N> ))
          (<T_NP\NP_0_2>
            (<L_conj_POS_CC_CC_POS_or_conj> )
            (<T_NP_0_2>
              (<L_NP[nb]/N_POS_DT_DT_POS_an_NP[nb]/N> )
              (<L_N_POS_NN_NN_POS_expert_N> )))))))
  (<T_S[dcl]_0_2>
    (<T_NP_0_2>
      (<T_NP_0_2>
        (<L_NP[nb]/N_POS_PRP$_PRP$_POS_its_NP[nb]/N> )
        (<L_N_POS_NN_NN_POS_ease_N> ))
      (<T_NP\NP_0_2>
        (<L_{NP\NP}/NP_POS_IN_IN_POS_of_{NP\NP}/NP> )
        (<T_NP_0_1>
          (<T_N_0_2>
            (<L_N_POS_NN_NN_POS_use_N> )
            (<T_N\N_0_2>
              (<L_conj_POS_CC_CC_POS_and_conj> )
              (<L_N_POS_NN_NN_POS_functio

<ApplicationExpression are_VB_N_0(CC(expert_NN_N_0,novice_NN_N_0),you_PRP_N_0,CC(functionality_NN_N_0,use_NN_N_0,ease_NN_P_1))>

In [156]:
glue_process("The bowl of squid eyeball stew is hot and delicious")

(<T_S[dcl]_0_2>
  (<T_NP_0_2>
    (<T_NP_0_2>
      (<L_NP[nb]/N_POS_DT_DT_POS_The_NP[nb]/N> )
      (<L_N_POS_NN_NN_POS_bowl_N> ))
    (<T_NP\NP_0_2>
      (<L_{NP\NP}/NP_POS_IN_IN_POS_of_{NP\NP}/NP> )
      (<T_NP_0_1>
        (<T_N_0_2>
          (<L_N/N_POS_NN_NN_POS_squid_N/N> )
          (<T_N_0_2>
            (<L_N/N_POS_NN_NN_POS_eyeball_N/N> )
            (<L_N_POS_NN_NN_POS_stew_N> ))))))
  (<T_S[dcl]\NP_0_2>
    (<L_{S[dcl]\NP}/{S[adj]\NP}_POS_VBZ_VBZ_POS_is_{S[dcl]\NP}/{S[adj]\NP}>)
    (<T_S[adj]\NP_0_2>
      (<L_S[adj]\NP_POS_JJ_JJ_POS_hot_S[adj]\NP> )
      (<T_{S[adj]\NP}\{S[adj]\NP}_0_2>
        (<L_conj_POS_CC_CC_POS_and_conj> )
        (<L_S[adj]\NP_POS_JJ_JJ_POS_delicious_S[adj]\NP> )))))
old a delicious_JJ_P_1
old b CC
old a1 delicious_JJ_P_1
old b1 CC
old a2 CC
old b2 delicious_JJ_P_1
old a3 CC
old b3 delicious_JJ_P_1
new a CC
new b delicious_JJ_P_1
hasil CC(delicious_JJ_P_1)

old a CC(delicious_JJ_P_1)
old b hot_JJ_P_1
old a1 CC(delicious_JJ_P_1)
old b1 hot_JJ_P

<ApplicationExpression CC(bowl_NN_P_1,bowl_NN_P_1)>

In [155]:
glue_process("i bought my canon g3 and i have to say i am very satisfied")

(<T_S[dcl]_0_2>
  (<T_S[dcl]_0_2>
    (<L_NP_POS_LS_NN_POS_i_NP> )
    (<T_S[dcl]\NP_0_2>
      (<L_{S[dcl]\NP}/NP_POS_VBD_VBD_POS_bought_{S[dcl]\NP}/NP> )
      (<T_NP_0_2>
        (<L_NP[nb]/N_POS_PRP$_PRP$_POS_my_NP[nb]/N> )
        (<T_N_0_2>
          (<L_N/N_POS_NN_NN_POS_canon_N/N> )
          (<L_N_POS_NN_NN_POS_g3_N> )))))
  (<T_S[dcl]\S[dcl]_0_2>
    (<L_conj_POS_CC_CC_POS_and_conj> )
    (<T_S[dcl]_0_2>
      (<L_NP_POS_FW_NN_POS_i_NP> )
      (<T_S[dcl]\NP_0_2>
        (<L_{S[dcl]\NP}/{S[to]\NP}_POS_VBP_VBP_POS_have_{S[dcl]\NP}/{S[to]\NP}>)
        (<T_S[to]\NP_0_2>
          (<L_{S[to]\NP}/{S[b]\NP}_POS_TO_TO_POS_to_{S[to]\NP}/{S[b]\NP}>)
          (<T_S[b]\NP_0_2>
            (<L_{S[b]\NP}/S[dcl]_POS_VB_VB_POS_say_{S[b]\NP}/S[dcl]>)
            (<T_S[dcl]_0_2>
              (<L_NP_POS_FW_JJ_POS_i_NP> )
              (<T_S[dcl]\NP_0_2>
                (<L_{S[dcl]\NP}/{S[adj]\NP}_POS_VBP_VBP_POS_am_{S[dcl]\NP}/{S[adj]\NP}>)
                (<T_S[adj]\NP_0_2>
               

<ApplicationExpression CC(have_VB_N_0(say_VB_N_0(seq(canon+g3_NN_P_2,i_FW_N_0)),i_FW_N_0))>

In [154]:
glue_process("i recently purchased the canon powershot g3 and am extremely satisfied with the purchase .")

(<T_S[dcl]_0_2>
  (<L_NP_POS_LS_NN_POS_i_NP> )
  (<T_S[dcl]\NP_0_2>
    (<T_S[dcl]\NP_0_2>
      (<L_{S\NP}/{S\NP}_POS_RB_RB_POS_recently_{S\NP}/{S\NP}> )
      (<T_S[dcl]\NP_0_2>
        (<L_{S[dcl]\NP}/NP_POS_VBD_VBD_POS_purchased_{S[dcl]\NP}/NP>)
        (<T_NP_0_2>
          (<L_NP[nb]/N_POS_DT_DT_POS_the_NP[nb]/N> )
          (<T_N_0_2>
            (<L_N/N_POS_NN_NN_POS_canon_N/N> )
            (<T_N_0_2>
              (<L_N/N_POS_NN_JJ_POS_powershot_N/N> )
              (<L_N_POS_NN_NN_POS_g3_N> ))))))
    (<T_{S[dcl]\NP}\{S[dcl]\NP}_0_2>
      (<L_conj_POS_CC_CC_POS_and_conj> )
      (<T_S[dcl]\NP_0_2>
        (<L_{S[dcl]\NP}/{S[adj]\NP}_POS_VBP_VBP_POS_am_{S[dcl]\NP}/{S[adj]\NP}>)
        (<T_S[adj]\NP_0_2>
          (<L_{S[adj]\NP}/{S[adj]\NP}_POS_RB_RB_POS_extremely_{S[adj]\NP}/{S[adj]\NP}>)
          (<T_S[adj]\NP_0_2>
            (<L_{S[adj]\NP}/PP_POS_VBN_JJ_POS_satisfied_{S[adj]\NP}/PP>)
            (<T_PP_0_2>
              (<L_PP/NP_POS_IN_IN_POS_with_PP/NP> )
         

<ApplicationExpression CC(satisfied_VB_P_1(purchase_NN_N_0),purchased_VB_N_0(canon+powershot+g3_NN_N_0),i_LS_N_0)>

In [164]:
glue_process("they fired away and the picture turned out quite nicely . ( as all of my pictures have thusfar ) .")

(<T_S[dcl]_0_2>
  (<T_S[dcl]_0_2>
    (<L_NP_POS_PRP_PRP_POS_they_NP> )
    (<T_S[dcl]\NP_0_2>
      (<L_S[dcl]\NP_POS_VBD_VBD_POS_fired_S[dcl]\NP> )
      (<L_{S\NP}\{S\NP}_POS_RB_RB_POS_away_{S\NP}\{S\NP}> )))
  (<T_S[dcl]\S[dcl]_0_2>
    (<L_conj_POS_CC_CC_POS_and_conj> )
    (<T_S[dcl]_0_2>
      (<T_NP_0_2>
        (<L_NP[nb]/N_POS_DT_DT_POS_the_NP[nb]/N> )
        (<L_N_POS_NN_NN_POS_picture_N> ))
      (<T_S[dcl]\NP_0_2>
        (<T_S[dcl]\NP_0_2>
          (<L_S[dcl]\NP_POS_VBD_VBD_POS_turned_S[dcl]\NP> )
          (<L_{S\NP}\{S\NP}_POS_RP_RP_POS_out_{S\NP}\{S\NP}> ))
        (<T_{S\NP}\{S\NP}_0_2>
          (<L_{{S\NP}\{S\NP}}/{{S\NP}\{S\NP}}_POS_RB_RB_POS_quite_{{S\NP}\{S\NP}}/{{S\NP}\{S\NP}}>)
          (<L_{S\NP}\{S\NP}_POS_RB_RB_POS_nicely_{S\NP}\{S\NP}> ))))))
old a quite_RB_N_N_0
old b nicely_RB_N_P_1
old a1 \X.X
old b1 nicely_RB_N_P_1
old a2 \X.X
old b2 nicely_RB_N_P_1
old a3 \X.X
old b3 nicely_RB_N_P_1
new a \X.X
new b nicely_RB_N_P_1
hasil nicely_RB_N_P_1

old a \X.X


<ApplicationExpression NSS(CC(turned_VB_N_0(picture_NN_N_0),fired_VB_N_0(they_PRP_N_0)),have_VB_N_0(pictures_NNS_N_0))>

In [165]:
glue_process("in a word , 'awesome' is how i would describe this camera . ")

(<T_S[dcl]_0_2>
  (<T_S/S_0_2>
    (<L_{S/S}/NP_POS_IN_IN_POS_in_{S/S}/NP> )
    (<T_NP_0_2>
      (<L_NP[nb]/N_POS_DT_DT_POS_a_NP[nb]/N> )
      (<L_N_POS_NN_NN_POS_word_N> )))
  (<T_S[dcl]_0_2>
    (<T_NP_0_1> (<L_N_POS_JJ_NN_POS_awesome_N> ))
    (<T_S[dcl]\NP_0_2>
      (<L_{S[dcl]\NP}/S[qem]_POS_VBZ_VBZ_POS_is_{S[dcl]\NP}/S[qem]> )
      (<T_S[qem]_0_2>
        (<L_S[qem]/S[dcl]_POS_WRB_WRB_POS_how_S[qem]/S[dcl]> )
        (<T_S[dcl]_0_2>
          (<L_NP_POS_FW_JJ_POS_i_NP> )
          (<T_S[dcl]\NP_0_2>
            (<L_{S[dcl]\NP}/{S[b]\NP}_POS_MD_MD_POS_would_{S[dcl]\NP}/{S[b]\NP}>)
            (<T_S[b]\NP_0_2>
              (<L_{S[b]\NP}/NP_POS_VB_VB_POS_describe_{S[b]\NP}/NP> )
              (<T_NP_0_2>
                (<L_NP[nb]/N_POS_DT_DT_POS_this_NP[nb]/N> )
                (<L_N_POS_NN_NN_POS_camera_N> )))))))))
old a \X.X
old b word_NN_N_0
old a1 \X.X
old b1 word_NN_N_0
old a2 \X.X
old b2 word_NN_N_0
old a3 \X.X
old b3 word_NN_N_0
new a \X.X
new b word_NN_N_0
hasil word

<ApplicationExpression word_NN_N_0(is_VB_N_0(describe_VB_N_0(camera_NN_N_0,i_FW_N_0),awesome_JJ_P_1))>