# Explore Data
**Author:** Jane Hung  
**Date:** 1 Mar 2020  
**Citations:**  
@inproceedings{xu_bert2019,
    title = "BERT Post-Training for Review Reading Comprehension and Aspect-based Sentiment Analysis",
    author = "Xu, Hu and Liu, Bing and Shu, Lei and Yu, Philip S.",
    booktitle = "Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics",
    year = "2019",
}  
https://drive.google.com/file/d/1NGH5bqzEx6aDlYJ7O3hepZF4i_p4iMR8/view

## Initialize environment

In [70]:
import pandas as pd
import numpy as np
import os
import sys
import json
import pprint
import tensorflow as tf
from time import time
import io
import re

import nltk

import pickle
from csv import reader

import matplotlib.pyplot as plt
from matplotlib import colors
from matplotlib.ticker import PercentFormatter

from tensorflow.keras import layers
from tensorflow.keras.backend import sparse_categorical_crossentropy
from tensorflow.keras.layers import Dense, Flatten

from datetime import datetime

from transformers import BertTokenizer, TFBertModel

from sklearn.metrics import log_loss, confusion_matrix, classification_report, accuracy_score

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Helper functions

In [71]:
def read_json(filename):
    f = open(filename,'r')
    data = json.loads(f.read())
    print('\n',filename)
    pprint.pprint(dict(list(data.items())[:1]))
    return(data)

## Import data

### Training Data

In [72]:
ae_laptop_train = read_json('../data/hu-data/ae/laptop/train.json')
ae_rest_train = read_json('../data/hu-data/ae/rest/train.json')


asc_laptop_train = read_json('../data/hu-data/asc/laptop/train.json')
asc_rest_train = read_json('../data/hu-data/asc/rest/train.json')


 ../data/hu-data/ae/laptop/train.json
{'0': {'label': ['B',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'B',
                 'I',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O'],
       'sentence': ['Keyboard',
                    'is',
                    'great',
                    'but',
                    'primary',
                    'and',
                    'secondary',
                    'control',
                    'buttons',
                    'could',
                    'be',
                    'more',
                    'durable',
                    '.']}}

 ../data/hu-data/ae/rest/train.json
{'0': {'label': ['O', 'O', 'O', 'B'],
       'sentence': ['I', 'LOVE', 'their', 'Thai']}}

 ../data/hu-data/asc/laptop/train.json
{'327_0': {'id': '327_0',
           'polarity': 'positive',
           'sent

### Dev data

In [73]:
ae_laptop_dev  = read_json('../data/hu-data/ae/laptop/dev.json')
ae_rest_dev = read_json('../data/hu-data/ae/rest/dev.json')


asc_laptop_dev = read_json('../data/hu-data/asc/laptop/dev.json')
asc_rest_dev = read_json('../data/hu-data/asc/rest/dev.json')


 ../data/hu-data/ae/laptop/dev.json
{'0': {'label': ['O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O'],
       'sentence': ['I',
                    'have',
                    'had',
                    'this',
                    'laptop',
                    'for',
                    'a',
                    'few',
                    'months',
                    'now',
                    'and',
                    'i',
                    'would',
                    'say',
                    'im',
                    'pretty',
                    'satisfied',
                    '.']}}

 ../data/hu-data/ae/rest/dev.json
{'0': {'label': ['O',
           

Q: How do we get from the ASC data back to the AE data?

In [74]:
asc_laptop_train['327_0']
ae_laptop_train['400']

{'polarity': 'positive',
 'term': 'use',
 'id': '327_0',
 'sentence': 'Also it is very good for college students who just need a reliable, easy to use computer.'}

{'label': ['O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O',
  'O'],
 'sentence': ['If',
  'you',
  'could',
  'stretch',
  'by',
  'a',
  'few',
  '100',
  'dollars',
  'I',
  'highly',
  'recommend',
  'you',
  'should',
  'replace',
  'your',
  'Windows',
  'laptop',
  'with',
  'this',
  'one',
  '.']}

## Play with BERT

In [75]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

In [76]:
tokenizer.tokenize(asc_laptop_train['327_0']['sentence'])

['Also',
 'it',
 'is',
 'very',
 'good',
 'for',
 'college',
 'students',
 'who',
 'just',
 'need',
 'a',
 'reliable',
 ',',
 'easy',
 'to',
 'use',
 'computer',
 '.']

## Play with AE baseline - NN+

In [77]:
# tag with universal POS. Especially for nouns
nltk.pos_tag(ae_laptop_train['0']['sentence'],tagset='universal')

[('Keyboard', 'NOUN'),
 ('is', 'VERB'),
 ('great', 'ADJ'),
 ('but', 'CONJ'),
 ('primary', 'ADJ'),
 ('and', 'CONJ'),
 ('secondary', 'ADJ'),
 ('control', 'NOUN'),
 ('buttons', 'NOUN'),
 ('could', 'VERB'),
 ('be', 'VERB'),
 ('more', 'ADV'),
 ('durable', 'ADJ'),
 ('.', '.')]

In [78]:
ae_laptop_dev_df = pd.DataFrame.from_dict(ae_laptop_dev,orient='index')
ae_laptop_dev_df

Unnamed: 0,label,sentence
0,"[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[I, have, had, this, laptop, for, a, few, mont..."
1,"[O, O, O, O, B, I, O, O, O, O, O, O, B, O, O, ...","[Additional, caveat, :, the, base, installatio..."
2,"[O, O, O, O, B, O, O, O, O, B, O, O, O, O, O, ...","[it, is, of, high, quality, ,, has, a, killer,..."
3,"[O, B, O, O, O, O, O, O, O, O, O, O, O, O]","[The, screen, gets, smeary, and, dusty, very, ..."
4,"[O, O, O, O, O, O, O, O, O, O, O]","[I, previously, owned, an, HP, desktop, and, a..."
...,...,...
145,"[O, O, O, O, O]","[The, benefits, were, immediate, !]"
146,"[O, O, O, O, O, O, O, O, O, O, O, O, O, O]","[All-, in-, all, ,, I, would, definitely, reco..."
147,"[O, O, O, O, O]","[just, chill, and, enjoy, .]"
148,"[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[My, son, and, his, family, have, a, hard, tim..."


In [79]:
def pos_ae(tokenized_sentence):
    """
    Tag sentences using POS tagger and identify consecutive nouns as entities
    """
    pos_sent = tokenized_sentence.apply(lambda sent:nltk.pos_tag(sent,tagset='universal'))
    
    
    # tag with IOB terminology
    ae_tag = lambda sent:['O' if token[1] != 'NOUN' 
                          else 'B' if ((token[1]=='NOUN') & (sent[ind-1][1]!='NOUN')) 
                          else 'I' for ind,token in enumerate(sent)]

    return(pos_sent.apply(ae_tag))

# since the POS tagger is based on the words themselves and not context.
ae_laptop_dev_df['predictions'] = pos_ae(ae_laptop_dev_df['sentence'])
ae_laptop_dev_df.head()

def convert_int(tagged_tokens):
    """
    Convert B,I,O tags to integers
    """
    return(tagged_tokens.apply(lambda sent: [0 if token=='O' else 1 if token=='B' else 2 for token in sent]))

convert_int(ae_laptop_dev_df['predictions'])


Unnamed: 0,label,sentence,predictions
0,"[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[I, have, had, this, laptop, for, a, few, mont...","[O, O, O, O, B, O, O, O, B, O, O, O, O, O, O, ..."
1,"[O, O, O, O, B, I, O, O, O, O, O, O, B, O, O, ...","[Additional, caveat, :, the, base, installatio...","[O, B, O, O, B, I, O, O, O, B, O, O, B, O, O, ..."
2,"[O, O, O, O, B, O, O, O, O, B, O, O, O, O, O, ...","[it, is, of, high, quality, ,, has, a, killer,...","[O, O, O, O, B, O, O, O, B, I, O, O, O, O, O, ..."
3,"[O, B, O, O, O, O, O, O, O, O, O, O, O, O]","[The, screen, gets, smeary, and, dusty, very, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O]"
4,"[O, O, O, O, O, O, O, O, O, O, O]","[I, previously, owned, an, HP, desktop, and, a...","[O, O, O, O, B, I, O, O, B, I, O]"


0      [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...
1      [0, 1, 0, 0, 1, 2, 0, 0, 0, 1, 0, 0, 1, 0, 0, ...
2      [0, 0, 0, 0, 1, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, ...
3             [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
4                      [0, 0, 0, 0, 1, 2, 0, 0, 1, 2, 0]
                             ...                        
145                                      [0, 1, 0, 0, 0]
146           [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0]
147                                      [0, 1, 0, 1, 0]
148    [0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, ...
149    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
Name: predictions, Length: 150, dtype: object

## Explore AE Regex Parser - business rules

In [97]:
# try a more sophisticated method for chunking
def regex_parser(tokenized_sentence):
    """
    Use a Regex Parser to provide some context around noun phrases
    """
    pos_sent = nltk.pos_tag(tokenized_sentence)
#     print(pos_sent)
#     grammar = r"""
#       NP: {<DT|PP\$>?<JJ>*<NN>}   # chunk determiner/possessive, adjectives and noun
#           {<NNP>+}                # chunk sequences of proper nouns
#     """
    
    # Update Grammar Regex to include prepositional phrases ala Semeval annotation guidelines
    grammar = r"""
    NP: {<NN><IN><DT><NN|NNP>+}
        {<NNP>+}
        {<NN>+}
    """
    
    cp = nltk.RegexpParser(grammar)

    tree = cp.parse(pos_sent)
    
    iob = [el[2][0] for el in nltk.chunk.util.tree2conlltags(tree)]
    return(iob)

# print example
ae_laptop_train['15']['sentence']
regex_parser(ae_laptop_train['15']['sentence'])

print(['cover','for','the','DVD','drive'])
regex_parser(['cover','for','the','DVD','drive'])

# since the POS tagger is based on the words themselves and not context.
ae_laptop_dev_df['predictions_1'] = ae_laptop_dev_df['sentence'].apply(lambda x: regex_parser(x))
ae_laptop_dev_df.head()


['Toshiba',
 'is',
 'aware',
 'of',
 'the',
 'issue',
 'but',
 'unless',
 'the',
 'extended',
 'warrenty',
 'is',
 'bought',
 'Toshiba',
 'will',
 'do',
 'nothing',
 'about',
 'it',
 '.']

['B',
 'O',
 'O',
 'O',
 'O',
 'B',
 'O',
 'O',
 'O',
 'O',
 'B',
 'O',
 'O',
 'B',
 'O',
 'O',
 'B',
 'O',
 'O',
 'O']

['cover', 'for', 'the', 'DVD', 'drive']


['B', 'I', 'I', 'I', 'I']

Unnamed: 0,label,sentence,predictions,predictions_1,iob_gold_tree,accuracy,accuracy_1
0,"[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[I, have, had, this, laptop, for, a, few, mont...","[O, O, O, O, B, O, O, O, B, O, O, O, O, O, O, ...","[O, O, O, O, B, O, O, O, O, O, O, O, O, O, O, ...","[(I, PRP, O), (have, VBP, O), (had, VBN, O), (...",0.888889,1.0
1,"[O, O, O, O, B, I, O, O, O, O, O, O, B, O, O, ...","[Additional, caveat, :, the, base, installatio...","[O, B, O, O, B, I, O, O, O, B, O, O, B, O, O, ...","[O, B, O, O, B, I, O, O, O, B, O, O, B, O, O, ...","[(Additional, JJ, O), (caveat, NN, O), (:, :, ...",0.84,0.84
2,"[O, O, O, O, B, O, O, O, O, B, O, O, O, O, O, ...","[it, is, of, high, quality, ,, has, a, killer,...","[O, O, O, O, B, O, O, O, B, I, O, O, O, O, O, ...","[O, O, O, O, B, O, O, O, B, B, O, O, O, O, O, ...","[(it, PRP, O), (is, VBZ, O), (of, IN, O), (hig...",0.894737,0.921053
3,"[O, B, O, O, O, O, O, O, O, O, O, O, O, O]","[The, screen, gets, smeary, and, dusty, very, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O]","[O, O, O, O, O, O, O, O, O, O, O, O, O, O]","[(The, DT, O), (screen, JJ, B-NP), (gets, VBZ,...",0.928571,0.928571
4,"[O, O, O, O, O, O, O, O, O, O, O]","[I, previously, owned, an, HP, desktop, and, a...","[O, O, O, O, B, I, O, O, B, I, O]","[O, O, O, O, B, B, O, O, B, B, O]","[(I, PRP, O), (previously, RB, O), (owned, VBD...",0.636364,0.818182


## Explore AE evaluation - CE

Haven't seen many papers using CE

In [98]:
# only using 0,1 because there aren't many very large token phrases
log_loss(convert_int(pd.DataFrame(ae_laptop_dev_df.iloc[0]['label'])),convert_int(pd.DataFrame(ae_laptop_dev_df.iloc[0]['predictions'])),labels=[0,1])

3.837730665815654

## Explore AE evaluation - SemEval14

http://www.davidsbatista.net/blog/2018/05/09/Named_Entity_Evaluation/  
- partial boundary match over the surface string

In [99]:
# TODO need to explore how we want to move forward with all sentences rather than just 1.
# Should try to implement the SemEval14 evaluation criteria bc this is best practice

In [None]:
# TODO amend this tree structure for all predictions as well
print('\nGold Standard:')
# tag every sentence with the pos
gold_tree = ae_laptop_dev_df['sentence'].apply(lambda x: nltk.pos_tag(x))
print(gold_tree)
iob_gold_tree = [nltk.Tree('S',
                           [(el[0], el[1], ae_laptop_dev_df.iloc[tree_ind]['label'][ind])
                            if ae_laptop_dev_df.iloc[tree_ind]['label'][ind]=='O'
                            else (el[0], el[1], ae_laptop_dev_df.iloc[tree_ind]['label'][ind] + '-NP')
                            for ind,el in enumerate(tree)])
                for tree_ind, tree in enumerate(gold_tree)]
ae_laptop_dev_df['iob_gold_tree'] = iob_gold_tree
ae_laptop_dev_df.head()

In [100]:
def get_entities(sentence_lst, predictions_lst):
    """
    Reformat the IOB structure to get the actual entities from the sentence
    """
    
    # for every sentence, iterate through
    all_entities = []
    for sample in range(len(predictions_lst)):
    
        # get indices where entities are identified
        predictions = np.array(predictions_lst[sample])
        ind = (predictions == 'B') | (predictions == 'I')
        
        # create list of numerical indices and boolean indices. ex. [(4, True), (10, True), (11, True), (15, True)]
        ind_tuple = [num_ind for num_ind in list(enumerate(ind)) if num_ind[1]==True]
        
        # get the sentence of interest. identify what these entities are
        sentence = np.array(sentence_lst[sample])

        # group the phrases together
        entities = []
        for subset,num_ind_tuple in zip(sentence[ind], ind_tuple): # [('price', (4, True)), ('netbook', (10, True)), ('*', (11, True)), ('machine', (15, True))]
            # put the B in entities
            if predictions[num_ind_tuple[0]][0] == 'B':
                entities.append([subset])
            # if the tag is I, add to the last item of the list
            elif predictions[num_ind_tuple[0]][0] == 'I':
                last_entry = entities.pop()
                last_entry.append(subset)
                entities.append(last_entry)
            # there should not be any 'O' indices here
            else:
                print('Error')
        all_entities.append(entities)
    return(all_entities)

prediction_entities = get_entities(ae_laptop_dev_df.sentence,ae_laptop_dev_df.predictions)
gold_entities = get_entities(ae_laptop_dev_df.sentence,ae_laptop_dev_df.label)
prediction_entities[:5]
gold_entities[:5]

[[['laptop'], ['months']],
 [['caveat'],
  ['base', 'installation'],
  ['Toshiba'],
  ['software'],
  ['user'],
  ['liking']],
 [['quality'], ['killer', 'GUI'], ['lots'], ['applications']],
 [],
 [['HP', 'desktop'], ['Dell', 'laptop']]]

[[],
 [['base', 'installation'], ['software']],
 [['quality'], ['GUI'], ['applications'], ['use']],
 [['screen']],
 []]

In [101]:
def get_ae_eval_features(gold_entities,prediction_entities,verbose=False):
    # TODO need to generalize and do for all samples
    # TODO may later need to update these calculations to encompass sentence location.
    y_true_df = pd.DataFrame([[ind,sub_el] for ind,el in enumerate(gold_entities) for sub_el in el], columns=['sample_index','entity'])
    y_pred_df = pd.DataFrame([[ind,sub_el] for ind,el in enumerate(prediction_entities) for sub_el in el], columns=['sample_index','entity'])
    print('True')
    display(y_true_df.head())
    print('Pred')
    display(y_pred_df.head())

    cor = 0
    inc = 0
    par = 0
    mis = 0
    spu = 0

    for el in range(len(gold_entities)):
        if verbose:
            print('\n',el)
        true_subset = y_true_df[y_true_df.sample_index == el]
        pred_subset = y_pred_df[y_pred_df.sample_index == el]
        true_entities = set(true_subset.entity.apply(lambda x: '_'.join(x)))
        pred_entities = set(pred_subset.entity.apply(lambda x: '_'.join(x)))
        if verbose:
            print('True')
            print(true_entities)
            print('Pred')
            print(pred_entities)

        # get correct
        cor_entities = true_entities & pred_entities
        if verbose:
            print(f'Correct entities: {cor_entities}')
        cor += len(cor_entities)
        true_entities = true_entities - cor_entities
        pred_entities = pred_entities - cor_entities

        # get partial and missed
        for true in true_entities:
            # Take into account if the prediction contains a portion of the correct and if correct contains a portion of the prediction
            par_entities = set([pred for pred in pred_entities if (true in pred) | (pred in true)])
            if len(par_entities) != 0:
                if verbose:
                    print(f'Partial entities: {set([true])}')
                par += len(par_entities)
                true_entities = true_entities - set([true])
                pred_entities = pred_entities - par_entities
            else:
                if verbose:
                    print(f'Missed entities: {set([true])}')
                mis += 1
                true_entities = true_entities - set([true])

        if len(true_entities) == 0:
            if verbose:
                print(f'Spurious entities: {pred_entities}')
            spu += len(pred_entities)
        else:
            print('Error')

    print(f'\nCorrect: {cor}')
    print(f'Partial: {par}')
    print(f'Missed: {mis}')
    print(f'Spurious: {spu}')
    return(cor,par,mis,spu,inc)
cor,par,mis,spu,inc = get_ae_eval_features(gold_entities,prediction_entities,verbose=False)

True


Unnamed: 0,sample_index,entity
0,1,"[base, installation]"
1,1,[software]
2,2,[quality]
3,2,[GUI]
4,2,[applications]


Pred


Unnamed: 0,sample_index,entity
0,0,[laptop]
1,0,[months]
2,1,[caveat]
3,1,"[base, installation]"
4,1,[Toshiba]



Correct: 107
Partial: 25
Missed: 15
Spurious: 310


In [115]:
def get_ae_eval(sentence_lst, y_true, y_pred,verbose=False):
    """
    Get entity recognition evaluations accoridng to the partial match SemEval strategy
    """
    prediction_entities = get_entities(sentence_lst,y_pred)
    gold_entities = get_entities(sentence_lst,y_true)
    
    cor,par,mis,spu,inc = get_ae_eval_features(gold_entities,prediction_entities,verbose=verbose)
    
    pos_eval = cor + inc + par + mis
    act_eval = cor + inc + par + spu

    precision = (cor + .5 * par) / act_eval
    recall = (cor + .5 * par) / pos_eval
    f1 = ( 2* precision * recall) / (precision + recall)
    
    print(f'\nPrecision: \t{precision}')
    print(f'Recall: \t{recall}')
    print(f'F1-Score: \t{f1}')
    return(precision, recall, f1)

get_ae_eval(ae_laptop_dev_df.sentence,ae_laptop_dev_df.label,ae_laptop_dev_df.predictions)

True


Unnamed: 0,sample_index,entity
0,1,"[base, installation]"
1,1,[software]
2,2,[quality]
3,2,[GUI]
4,2,[applications]


Pred


Unnamed: 0,sample_index,entity
0,0,[laptop]
1,0,[months]
2,1,[caveat]
3,1,"[base, installation]"
4,1,[Toshiba]



Correct: 107
Partial: 25
Missed: 15
Spurious: 310

Precision: 	0.2703619909502262
Recall: 	0.8129251700680272
F1-Score: 	0.40577249575551777


(0.2703619909502262, 0.8129251700680272, 0.40577249575551777)

In [116]:
get_ae_eval(ae_laptop_dev_df.sentence,ae_laptop_dev_df.label,ae_laptop_dev_df.predictions_1,verbose=True)

True


Unnamed: 0,sample_index,entity
0,1,"[base, installation]"
1,1,[software]
2,2,[quality]
3,2,[GUI]
4,2,[applications]


Pred


Unnamed: 0,sample_index,entity
0,0,[laptop]
1,1,[caveat]
2,1,"[base, installation]"
3,1,[Toshiba]
4,1,[software]



 0
True
set()
Pred
{'laptop'}
Correct entities: set()
Spurious entities: {'laptop'}

 1
True
{'base_installation', 'software'}
Pred
{'base_installation', 'caveat', 'Toshiba', 'software', 'liking', 'user'}
Correct entities: {'base_installation', 'software'}
Spurious entities: {'caveat', 'Toshiba', 'liking', 'user'}

 2
True
{'applications', 'quality', 'use', 'GUI'}
Pred
{'quality', 'killer', 'GUI'}
Correct entities: {'quality', 'GUI'}
Missed entities: {'applications'}
Missed entities: {'use'}
Spurious entities: {'killer'}

 3
True
{'screen'}
Pred
set()
Correct entities: set()
Missed entities: {'screen'}
Spurious entities: set()

 4
True
set()
Pred
{'HP', 'laptop', 'Dell', 'desktop'}
Correct entities: set()
Spurious entities: {'HP', 'laptop', 'Dell', 'desktop'}

 5
True
{'ease', 'power'}
Pred
{'ease_of_the_Mac', 'power'}
Correct entities: {'power'}
Partial entities: {'ease'}
Spurious entities: set()

 6
True
{'internet_speed'}
Pred
{'internet_speed'}
Correct entities: {'internet_speed'}

(0.25609756097560976, 0.6342281879194631, 0.3648648648648649)

## Explore AE evaluation - ChunkScore
https://stackoverflow.com/questions/17325554/difference-between-iob-accuracy-and-precision  
Somehow, this is not working.

In [104]:
# TODO get ChunkScore to work
tokenized_sentence = ae_laptop_train['15']['sentence']
pos_sent = nltk.pos_tag(tokenized_sentence)

##
grammar = r"""
  NP: {<DT|PP\$>?<JJ>*<NN>}   # chunk determiner/possessive, adjectives and noun
      {<NNP>+}                # chunk sequences of proper nouns
"""
cp = nltk.RegexpParser(grammar)

tree = cp.parse(pos_sent)

iob = [el[2][0] for el in nltk.chunk.util.tree2conlltags(tree)]

print('Prediction:')
print(tree)
nltk.chunk.util.tree2conlltags(tree)
regex_parser(ae_laptop_train['15']['sentence'])
##

gold_tree = pos_sent
print('\nGold Standard:')
# create the tree with IOB input
iob_gold_tree = nltk.Tree('S',[(el[0], el[1], ae_laptop_train['15']['label'][ind]) if ae_laptop_train['15']['label'][ind]=='O' 
                               else (el[0], el[1], ae_laptop_train['15']['label'][ind] + '-NP')for ind,el in enumerate(gold_tree)])
print(nltk.chunk.util.conlltags2tree(iob_gold_tree))
# print(nltk.chunk.util.conlltags2tree([(el[0], el[1], ae_laptop_train['15']['label'][ind])for ind,el in enumerate(gold_tree)]))
# print(nltk.chunk.util.conlltags2tree())
print(cp.evaluate([iob_gold_tree]))

# nltk.chunk.util.tagstr2tree(' '.join(tokenized_sentence), chunk_label='NP', root_label='S', sep='/')

Prediction:
(S
  (NP Toshiba/NNP)
  is/VBZ
  aware/JJ
  of/IN
  (NP the/DT issue/NN)
  but/CC
  unless/IN
  (NP the/DT extended/JJ warrenty/NN)
  is/VBZ
  bought/VBN
  (NP Toshiba/NNP)
  will/MD
  do/VB
  (NP nothing/NN)
  about/IN
  it/PRP
  ./.)


[('Toshiba', 'NNP', 'B-NP'),
 ('is', 'VBZ', 'O'),
 ('aware', 'JJ', 'O'),
 ('of', 'IN', 'O'),
 ('the', 'DT', 'B-NP'),
 ('issue', 'NN', 'I-NP'),
 ('but', 'CC', 'O'),
 ('unless', 'IN', 'O'),
 ('the', 'DT', 'B-NP'),
 ('extended', 'JJ', 'I-NP'),
 ('warrenty', 'NN', 'I-NP'),
 ('is', 'VBZ', 'O'),
 ('bought', 'VBN', 'O'),
 ('Toshiba', 'NNP', 'B-NP'),
 ('will', 'MD', 'O'),
 ('do', 'VB', 'O'),
 ('nothing', 'NN', 'B-NP'),
 ('about', 'IN', 'O'),
 ('it', 'PRP', 'O'),
 ('.', '.', 'O')]

['B',
 'O',
 'O',
 'O',
 'O',
 'B',
 'O',
 'O',
 'O',
 'O',
 'B',
 'O',
 'O',
 'B',
 'O',
 'O',
 'B',
 'O',
 'O',
 'O']


Gold Standard:
(S
  Toshiba/NNP
  is/VBZ
  aware/JJ
  of/IN
  the/DT
  issue/NN
  but/CC
  unless/IN
  the/DT
  (NP extended/JJ warrenty/NN)
  is/VBZ
  bought/VBN
  Toshiba/NNP
  will/MD
  do/VB
  nothing/NN
  about/IN
  it/PRP
  ./.)
ChunkParse score:
    IOB Accuracy:  60.0%%
    Precision:      0.0%%
    Recall:         0.0%%
    F-Measure:      0.0%%


In [105]:
# TODO get ChunkScore to work on all dev
print('\nGold Standard:')
# tag every sentence with the pos
gold_tree = ae_laptop_dev_df['sentence'].apply(lambda x: nltk.pos_tag(x))
print(gold_tree)
iob_gold_tree = [nltk.Tree('S',
                           [(el[0], el[1], ae_laptop_dev_df.iloc[tree_ind]['label'][ind])
                            if ae_laptop_dev_df.iloc[tree_ind]['label'][ind]=='O'
                            else (el[0], el[1], ae_laptop_dev_df.iloc[tree_ind]['label'][ind] + '-NP')
                            for ind,el in enumerate(tree)])
                for tree_ind, tree in enumerate(gold_tree)]
ae_laptop_dev_df['iob_gold_tree'] = iob_gold_tree
ae_laptop_dev_df.head()


Gold Standard:
0      [(I, PRP), (have, VBP), (had, VBN), (this, DT)...
1      [(Additional, JJ), (caveat, NN), (:, :), (the,...
2      [(it, PRP), (is, VBZ), (of, IN), (high, JJ), (...
3      [(The, DT), (screen, JJ), (gets, VBZ), (smeary...
4      [(I, PRP), (previously, RB), (owned, VBD), (an...
                             ...                        
145    [(The, DT), (benefits, NNS), (were, VBD), (imm...
146    [(All-, JJ), (in-, JJ), (all, DT), (,, ,), (I,...
147    [(just, RB), (chill, NN), (and, CC), (enjoy, N...
148    [(My, PRP$), (son, NN), (and, CC), (his, PRP$)...
149    [(This, DT), (is, VBZ), (what, WP), (they, PRP...
Name: sentence, Length: 150, dtype: object


Unnamed: 0,label,sentence,predictions,predictions_1,iob_gold_tree,accuracy,accuracy_1
0,"[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[I, have, had, this, laptop, for, a, few, mont...","[O, O, O, O, B, O, O, O, B, O, O, O, O, O, O, ...","[O, O, O, O, B, O, O, O, O, O, O, O, O, O, O, ...","[(I, PRP, O), (have, VBP, O), (had, VBN, O), (...",0.888889,1.0
1,"[O, O, O, O, B, I, O, O, O, O, O, O, B, O, O, ...","[Additional, caveat, :, the, base, installatio...","[O, B, O, O, B, I, O, O, O, B, O, O, B, O, O, ...","[O, B, O, O, B, I, O, O, O, B, O, O, B, O, O, ...","[(Additional, JJ, O), (caveat, NN, O), (:, :, ...",0.84,0.84
2,"[O, O, O, O, B, O, O, O, O, B, O, O, O, O, O, ...","[it, is, of, high, quality, ,, has, a, killer,...","[O, O, O, O, B, O, O, O, B, I, O, O, O, O, O, ...","[O, O, O, O, B, O, O, O, B, B, O, O, O, O, O, ...","[(it, PRP, O), (is, VBZ, O), (of, IN, O), (hig...",0.894737,0.921053
3,"[O, B, O, O, O, O, O, O, O, O, O, O, O, O]","[The, screen, gets, smeary, and, dusty, very, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O]","[O, O, O, O, O, O, O, O, O, O, O, O, O, O]","[(The, DT, O), (screen, JJ, B-NP), (gets, VBZ,...",0.928571,0.928571
4,"[O, O, O, O, O, O, O, O, O, O, O]","[I, previously, owned, an, HP, desktop, and, a...","[O, O, O, O, B, I, O, O, B, I, O]","[O, O, O, O, B, B, O, O, B, B, O]","[(I, PRP, O), (previously, RB, O), (owned, VBD...",0.636364,0.818182


## Explore AE evaluation - Token Accuracy

In [106]:
def get_accuracy(true,predictions):
    accuracy = []
    for true_el, predict_el in zip(true,predictions):
        accuracy.append((np.array(predict_el) == np.array(true_el)).sum() / (len(true_el)))
    return(accuracy)

ae_laptop_dev_df['accuracy'] = get_accuracy(ae_laptop_dev_df.label,ae_laptop_dev_df.predictions)
ae_laptop_dev_df['accuracy_1'] = get_accuracy(ae_laptop_dev_df.label,ae_laptop_dev_df.predictions_1)
ae_laptop_dev_df.head()
ae_laptop_dev_df[['accuracy','accuracy_1']].describe()

Unnamed: 0,label,sentence,predictions,predictions_1,iob_gold_tree,accuracy,accuracy_1
0,"[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[I, have, had, this, laptop, for, a, few, mont...","[O, O, O, O, B, O, O, O, B, O, O, O, O, O, O, ...","[O, O, O, O, B, O, O, O, O, O, O, O, O, O, O, ...","[(I, PRP, O), (have, VBP, O), (had, VBN, O), (...",0.888889,0.944444
1,"[O, O, O, O, B, I, O, O, O, O, O, O, B, O, O, ...","[Additional, caveat, :, the, base, installatio...","[O, B, O, O, B, I, O, O, O, B, O, O, B, O, O, ...","[O, B, O, O, B, I, O, O, O, B, O, O, B, O, O, ...","[(Additional, JJ, O), (caveat, NN, O), (:, :, ...",0.84,0.84
2,"[O, O, O, O, B, O, O, O, O, B, O, O, O, O, O, ...","[it, is, of, high, quality, ,, has, a, killer,...","[O, O, O, O, B, O, O, O, B, I, O, O, O, O, O, ...","[O, O, O, O, B, O, O, O, B, B, O, O, O, O, O, ...","[(it, PRP, O), (is, VBZ, O), (of, IN, O), (hig...",0.894737,0.921053
3,"[O, B, O, O, O, O, O, O, O, O, O, O, O, O]","[The, screen, gets, smeary, and, dusty, very, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O]","[O, O, O, O, O, O, O, O, O, O, O, O, O, O]","[(The, DT, O), (screen, JJ, B-NP), (gets, VBZ,...",0.928571,0.928571
4,"[O, O, O, O, O, O, O, O, O, O, O]","[I, previously, owned, an, HP, desktop, and, a...","[O, O, O, O, B, I, O, O, B, I, O]","[O, O, O, O, B, B, O, O, B, B, O]","[(I, PRP, O), (previously, RB, O), (owned, VBD...",0.636364,0.636364


Unnamed: 0,accuracy,accuracy_1
count,150.0,150.0
mean,0.845546,0.843995
std,0.094523,0.106273
min,0.571429,0.555556
25%,0.777778,0.786654
50%,0.846154,0.851648
75%,0.909091,0.919956
max,1.0,1.0


## Export samples that are well / poorly extracted

In [107]:
def get_bad_examples(n = 1):
    ind = np.argpartition(ae_laptop_dev_df.accuracy + ae_laptop_dev_df.accuracy_1,n)[:n]
    print(ind)
    sort_ind = ind[np.argsort((ae_laptop_dev_df.accuracy + ae_laptop_dev_df.accuracy_1).iloc[ind])]
    bad_example = ae_laptop_dev_df.iloc[sort_ind]
    
    display(bad_example)
    print(*[' '.join(sent) for sent in bad_example.sentence],sep='\n')
    
get_bad_examples(3)

def get_good_examples(n = 1):
    ind = np.argpartition(ae_laptop_dev_df.accuracy + ae_laptop_dev_df.accuracy_1,-n)[-n:]
    print(ind)
    sort_ind = ind[np.argsort((ae_laptop_dev_df.accuracy + ae_laptop_dev_df.accuracy_1).iloc[ind])]
    good_example = ae_laptop_dev_df.iloc[sort_ind]
    
    display(good_example)
    print(*[' '.join(sent) for sent in good_example.sentence],sep='\n')
# good_example = ae_laptop_dev_df.iloc[np.argmax(ae_laptop_dev_df.accuracy + ae_laptop_dev_df.accuracy_1)]
get_good_examples(3)

0    107
1    147
2      4
dtype: int64


Unnamed: 0,label,sentence,predictions,predictions_1,iob_gold_tree,accuracy,accuracy_1
107,"[O, O, O, O, O, O, O]","[The, Mac, Book, Pro, performs, flawlessly, .]","[O, B, I, I, O, O, O]","[O, B, I, I, O, O, O]","[(The, DT, O), (Mac, NNP, O), (Book, NNP, O), ...",0.571429,0.571429
147,"[O, O, O, O, O]","[just, chill, and, enjoy, .]","[O, B, O, B, O]","[O, B, O, B, O]","[(just, RB, O), (chill, NN, O), (and, CC, O), ...",0.6,0.6
4,"[O, O, O, O, O, O, O, O, O, O, O]","[I, previously, owned, an, HP, desktop, and, a...","[O, O, O, O, B, I, O, O, B, I, O]","[O, O, O, O, B, B, O, O, B, B, O]","[(I, PRP, O), (previously, RB, O), (owned, VBD...",0.636364,0.636364


The Mac Book Pro performs flawlessly .
just chill and enjoy .
I previously owned an HP desktop and a Dell laptop .
147     45
148     70
149    113
dtype: int64


Unnamed: 0,label,sentence,predictions,predictions_1,iob_gold_tree,accuracy,accuracy_1
45,"[O, O, B, O, B, O]","[Like, the, price, and, operation, .]","[O, O, B, O, B, O]","[O, O, B, O, B, O]","[(Like, IN, O), (the, DT, O), (price, NN, B-NP...",1.0,1.0
70,"[O, B, O, O, O, B, O, O, O]","[The, design, is, awesome, ,, quality, is, unp...","[O, B, O, O, O, B, O, O, O]","[O, B, O, O, O, B, O, O, O]","[(The, DT, O), (design, NN, B-NP), (is, VBZ, O...",1.0,1.0
113,"[O, B, I, O, O, O, O, O, O, O, O, O, O, O, O]","[The, battery, life, is, amazingly, long, at, ...","[O, B, I, O, O, O, O, O, O, O, O, O, O, O, O]","[O, B, I, O, O, O, O, O, O, O, O, O, O, O, O]","[(The, DT, O), (battery, NN, B-NP), (life, NN,...",1.0,1.0


Like the price and operation .
The design is awesome , quality is unprecedented .
The battery life is amazingly long at 7hrs and 5hrs if you use it .


## Play with ASC baseline

In [108]:
asc_laptop_dev_df = pd.DataFrame.from_dict(asc_laptop_dev,orient='index')
asc_laptop_dev_df.head()

Unnamed: 0,polarity,term,id,sentence
1113_0,negative,safe mode,1113_0,Not even safe mode boots.
2595_0,positive,Keyboard,2595_0,Keyboard was also very nice and had a solid feel.
1039_0,negative,Keyboard,1039_0,Keyboard is plastic and spongey feeling.
315_0,positive,quality,315_0,I would recommend this laptop to anyone lookin...
1284_0,negative,screen,1284_0,"Thus, when you carry it at a slanted angle, th..."


In [109]:
def vader_asc(sentence_lst):
    """
    For every sentence in the list, tag it as a positive/negative sentiment based on the sum of the words.
    """
    analyzer = SentimentIntensityAnalyzer()
    pos_neg_tag_lst = []
    for ind,sentence in enumerate(sentence_lst):
        vs = analyzer.polarity_scores(sentence)
        pos_neg_tag = 'negative' if vs['compound'] <= -0.05 else 'positive' if vs['compound'] >= 0.05 else 'neutral' 
        # print first 10 examples
        if ind <10: print("{:-<65} {} ({})".format(sentence, str(vs['compound']),pos_neg_tag))
        pos_neg_tag_lst.append(pos_neg_tag)
    return(pos_neg_tag_lst)

asc_laptop_dev_df['predictions'] = vader_asc(asc_laptop_dev_df.sentence)

Not even safe mode boots.---------------------------------------- -0.3412 (negative)
Keyboard was also very nice and had a solid feel.---------------- 0.5709 (positive)
Keyboard is plastic and spongey feeling.------------------------- 0.128 (positive)
I would recommend this laptop to anyone looking to get a new laptop who is willing to spend a little more money to get great quality! 0.784 (positive)
Thus, when you carry it at a slanted angle, the screen will "topple" or "slide" down, if you understand what I mean. 0.0 (neutral)
When I called Sony the Customer Service was Great.--------------- 0.6249 (positive)
I also did not like the loud noises it made or how the bottom of the computer would get really hot. -0.2755 (negative)
I also did not like the loud noises it made or how the bottom of the computer would get really hot. -0.2755 (negative)
Also, one of the users mentioned how the edges on the macbook is sharp, if you have money to spend on one of the incase shells, it doesn't seem 

## Explore ASC evaluation - accuracy

In [110]:
asc_laptop_dev_df.head()
(asc_laptop_dev_df.polarity == asc_laptop_dev_df.predictions).value_counts(normalize=True)
accuracy_score(asc_laptop_dev_df.polarity,asc_laptop_dev_df.predictions)

Unnamed: 0,polarity,term,id,sentence,predictions
1113_0,negative,safe mode,1113_0,Not even safe mode boots.,negative
2595_0,positive,Keyboard,2595_0,Keyboard was also very nice and had a solid feel.,positive
1039_0,negative,Keyboard,1039_0,Keyboard is plastic and spongey feeling.,positive
315_0,positive,quality,315_0,I would recommend this laptop to anyone lookin...,positive
1284_0,negative,screen,1284_0,"Thus, when you carry it at a slanted angle, th...",neutral


True     0.613333
False    0.386667
dtype: float64

0.6133333333333333

## Explore ASC evaluation - Confusion Matrix

In [111]:
print(confusion_matrix(asc_laptop_dev_df.polarity,asc_laptop_dev_df.predictions,labels=['negative','neutral','positive']))

[[40 14 12]
 [ 5 12 10]
 [ 7 10 40]]


## Explore ASC evaluation - Classification Report

In [112]:
print(classification_report(asc_laptop_dev_df.polarity,asc_laptop_dev_df.predictions,labels=['negative','neutral','positive']))

              precision    recall  f1-score   support

    negative       0.77      0.61      0.68        66
     neutral       0.33      0.44      0.38        27
    positive       0.65      0.70      0.67        57

    accuracy                           0.61       150
   macro avg       0.58      0.58      0.58       150
weighted avg       0.64      0.61      0.62       150



## Combine ASC evaluation

In [113]:
def get_asc_eval(y_true, y_pred):
    print(np.round(accuracy_score(y_true,y_pred),2))
    print(confusion_matrix(y_true,y_pred,labels=['negative','neutral','positive']))
    print(classification_report(y_true,asc_laptop_dev_df.predictions,labels=['negative','neutral','positive']))