# Explore Data
**Author:** Jane Hung  
**Date:** 1 Mar 2020  
**Citations:**  
@inproceedings{xu_bert2019,
    title = "BERT Post-Training for Review Reading Comprehension and Aspect-based Sentiment Analysis",
    author = "Xu, Hu and Liu, Bing and Shu, Lei and Yu, Philip S.",
    booktitle = "Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics",
    year = "2019",
}  
https://drive.google.com/file/d/1NGH5bqzEx6aDlYJ7O3hepZF4i_p4iMR8/view

## Initialize environment

In [44]:
import pandas as pd
import numpy as np
import os
import sys
import json
import pprint
import tensorflow as tf
from time import time
import io
import re

import nltk

import pickle
from csv import reader

import matplotlib.pyplot as plt
from matplotlib import colors
from matplotlib.ticker import PercentFormatter

from tensorflow.keras import layers
from tensorflow.keras.backend import sparse_categorical_crossentropy
from tensorflow.keras.layers import Dense, Flatten

from datetime import datetime

from transformers import BertTokenizer, TFBertModel

from sklearn.metrics import log_loss, confusion_matrix, classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## Helper functions

In [45]:
def read_json(filename):
    f = open(filename,'r')
    data = json.loads(f.read())
    print('\n',filename)
    pprint.pprint(dict(list(data.items())[:1]))
    return(data)

## Import data

### Training data

In [46]:
ae_laptop_train = read_json('../data/hu-data/ae/laptop/train.json')
ae_rest_train = read_json('../data/hu-data/ae/rest/train.json')


asc_laptop_train = read_json('../data/hu-data/asc/laptop/train.json')
asc_rest_train = read_json('../data/hu-data/asc/rest/train.json')


 ../data/hu-data/ae/laptop/train.json
{'0': {'label': ['B',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'B',
                 'I',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O'],
       'sentence': ['Keyboard',
                    'is',
                    'great',
                    'but',
                    'primary',
                    'and',
                    'secondary',
                    'control',
                    'buttons',
                    'could',
                    'be',
                    'more',
                    'durable',
                    '.']}}

 ../data/hu-data/ae/rest/train.json
{'0': {'label': ['O', 'O', 'O', 'B'],
       'sentence': ['I', 'LOVE', 'their', 'Thai']}}

 ../data/hu-data/asc/laptop/train.json
{'327_0': {'id': '327_0',
           'polarity': 'positive',
           'sent

### Dev data

In [47]:
ae_laptop_dev  = read_json('../data/hu-data/ae/laptop/dev.json')
ae_rest_dev = read_json('../data/hu-data/ae/rest/dev.json')


asc_laptop_dev = read_json('../data/hu-data/asc/laptop/dev.json')
asc_rest_dev = read_json('../data/hu-data/asc/rest/dev.json')


 ../data/hu-data/ae/laptop/dev.json
{'0': {'label': ['O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O'],
       'sentence': ['I',
                    'have',
                    'had',
                    'this',
                    'laptop',
                    'for',
                    'a',
                    'few',
                    'months',
                    'now',
                    'and',
                    'i',
                    'would',
                    'say',
                    'im',
                    'pretty',
                    'satisfied',
                    '.']}}

 ../data/hu-data/ae/rest/dev.json
{'0': {'label': ['O',
           

### Test data

In [48]:
ae_laptop_test  = read_json('../data/hu-data/ae/laptop/test.json')
ae_rest_test = read_json('../data/hu-data/ae/rest/test.json')


asc_laptop_test = read_json('../data/hu-data/asc/laptop/test.json')
asc_rest_test = read_json('../data/hu-data/asc/rest/test.json')


 ../data/hu-data/ae/laptop/test.json
{'0': {'label': ['B',
                 'I',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O',
                 'O'],
       'sentence': ['Boot',
                    'time',
                    'is',
                    'super',
                    'fast',
                    ',',
                    'around',
                    'anywhere',
                    'from',
                    '35',
                    'seconds',
                    'to',
                    '1',
                    'minute',
                    '.']}}

 ../data/hu-data/ae/rest/test.json
{'0': {'label': ['O', 'O'], 'sentence': ['Yum', '!']}}

 ../data/hu-data/asc/laptop/test.json
{'718:1_0': {'id': '718:1_0',
             'polarity': 'positive',
             's

### Convert to df

In [49]:
ae_laptop_train_df = pd.DataFrame.from_dict(ae_laptop_train,orient='index')
ae_laptop_train_df.head()

ae_laptop_dev_df = pd.DataFrame.from_dict(ae_laptop_dev,orient='index')
ae_laptop_dev_df.head()

ae_laptop_test_df = pd.DataFrame.from_dict(ae_laptop_test,orient='index')
ae_laptop_test_df

Unnamed: 0,label,sentence
0,"[B, O, O, O, O, O, O, B, I, O, O, O, O, O]","[Keyboard, is, great, but, primary, and, secon..."
1,"[O, O, O, O, O, O, O, O, O, O, O, O, O, O]","[I, bought, this, laptop, about, a, month, ago..."
2,"[O, O, O, O, O, O, O, O, O, O, O, O]","[I, am, however, pleased, that, it, is, still,..."
3,"[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O]","[I, went, to, my, local, Best, Buy, looking, f..."
4,"[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[The, Apple, MC371LL/, A, 2.4Ghz, 15.4-, inch,..."


Unnamed: 0,label,sentence
0,"[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[I, have, had, this, laptop, for, a, few, mont..."
1,"[O, O, O, O, B, I, O, O, O, O, O, O, B, O, O, ...","[Additional, caveat, :, the, base, installatio..."
2,"[O, O, O, O, B, O, O, O, O, B, O, O, O, O, O, ...","[it, is, of, high, quality, ,, has, a, killer,..."
3,"[O, B, O, O, O, O, O, O, O, O, O, O, O, O]","[The, screen, gets, smeary, and, dusty, very, ..."
4,"[O, O, O, O, O, O, O, O, O, O, O]","[I, previously, owned, an, HP, desktop, and, a..."


Unnamed: 0,label,sentence
0,"[B, I, O, O, O, O, O, O, O, O, O, O, O, O, O]","[Boot, time, is, super, fast, ,, around, anywh..."
1,"[B, I, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[tech, support, would, not, fix, the, problem,..."
2,"[O, O, O, O, O, O, O]","[but, in, resume, this, computer, rocks, !]"
3,"[B, I, O, O, O]","[Set, up, was, easy, .]"
4,"[O, O, O, O, O, B, I, O, B, I, O]","[Did, not, enjoy, the, new, Windows, 8, and, t..."
...,...,...
795,"[O, B, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[This, hardware, seems, to, be, better, than, ..."
796,"[O, O, O, O, O, O, O]","[I, 'm, done, with, WinDoze, computers, .]"
797,"[O, O, O, O, O, O, O, O, O, O, O, O, O, O, B, ...","[I, 've, had, it, for, about, 2, months, now, ..."
798,"[O, O, O, O, O, O, O, B, I, O]","[the, latest, version, does, not, have, a, dis..."


In [50]:
asc_laptop_train_df = pd.DataFrame.from_dict(asc_laptop_train,orient='index')
asc_laptop_train_df.head()
asc_laptop_dev_df = pd.DataFrame.from_dict(asc_laptop_dev,orient='index')
asc_laptop_dev_df.head()
asc_laptop_test_df = pd.DataFrame.from_dict(asc_laptop_test,orient='index')
asc_laptop_test_df.head()

Unnamed: 0,polarity,term,id,sentence
327_0,positive,use,327_0,Also it is very good for college students who ...
3077_0,positive,noise,3077_0,For those that care about noise this thing doe...
1592_1,positive,force,1592_1,Enjoy that Toshib force and durability unparal...
329_0,negative,expense,329_0,I know that everyone thinks Macs are overprice...
1184_0,negative,word processor,1184_0,) And printing from either word processor is a...


Unnamed: 0,polarity,term,id,sentence
1113_0,negative,safe mode,1113_0,Not even safe mode boots.
2595_0,positive,Keyboard,2595_0,Keyboard was also very nice and had a solid feel.
1039_0,negative,Keyboard,1039_0,Keyboard is plastic and spongey feeling.
315_0,positive,quality,315_0,I would recommend this laptop to anyone lookin...
1284_0,negative,screen,1284_0,"Thus, when you carry it at a slanted angle, th..."


Unnamed: 0,polarity,term,id,sentence
718:1_0,positive,retina display display,718:1_0,the retina display display make pictures i too...
217:1_1,neutral,CD/DVD drive,217:1_1,Needs a CD/DVD drive and a bigger power switch.
217:1_0,negative,power switch,217:1_0,Needs a CD/DVD drive and a bigger power switch.
1044:1_0,negative,battery,1044:1_0,The battery is not as shown in the product pho...
1040:1_0,negative,keyboard,1040:1_0,"It feels cheap, the keyboard is not very sensi..."


## AE baseline - NN+

In [45]:
def pos_ae(tokenized_sentence):
    """
    Tag sentences using POS tagger and identify consecutive nouns as entities
    """
    pos_sent = tokenized_sentence.apply(lambda sent:nltk.pos_tag(sent,tagset='universal'))
    
    
    # tag with IOB terminology
    ae_tag = lambda sent:['O' if token[1] != 'NOUN' 
                          else 'B' if ((token[1]=='NOUN') & ((sent[ind-1][1]!='NOUN') | (ind==0))) 
                          else 'I' for ind,token in enumerate(sent)]

    return(pos_sent.apply(ae_tag))

# since the POS tagger is based on the words themselves and not context.
ae_laptop_test_df['predictions'] = pos_ae(ae_laptop_test_df['sentence'])
ae_laptop_test_df.head()

def convert_int(tagged_tokens):
    """
    Convert B,I,O tags to integers
    """
    return(tagged_tokens.apply(lambda sent: [0 if token=='O' else 1 if token=='B' else 2 for token in sent]))

convert_int(ae_laptop_test_df['predictions'])


Unnamed: 0,label,sentence,predictions
0,"[B, I, O, O, O, O, O, O, O, O, O, O, O, O, O]","[Boot, time, is, super, fast, ,, around, anywh...","[B, I, O, O, O, O, O, O, O, O, B, O, O, B, O]"
1,"[B, I, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[tech, support, would, not, fix, the, problem,...","[O, B, O, O, O, O, B, O, O, O, O, B, O, O, O, ..."
2,"[O, O, O, O, O, O, O]","[but, in, resume, this, computer, rocks, !]","[O, O, B, O, B, O, O]"
3,"[B, I, O, O, O]","[Set, up, was, easy, .]","[B, O, O, O, O]"
4,"[O, O, O, O, O, B, I, O, B, I, O]","[Did, not, enjoy, the, new, Windows, 8, and, t...","[B, O, O, O, O, B, O, O, O, B, O]"


0          [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0]
1      [0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, ...
2                                  [0, 0, 1, 0, 1, 0, 0]
3                                        [1, 0, 0, 0, 0]
4                      [1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0]
                             ...                        
795    [0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...
796                                [0, 0, 0, 0, 1, 2, 0]
797    [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, ...
798                       [0, 0, 1, 0, 0, 0, 0, 0, 1, 0]
799     [1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
Name: predictions, Length: 800, dtype: object

## AE Regex Parser - business rules

In [46]:
# try a more sophisticated method for chunking
def regex_parser(tokenized_sentence,verbose=False):
    """
    Use a Regex Parser to provide some context around noun phrases
    """
    pos_sent = nltk.pos_tag(tokenized_sentence)
#     print(pos_sent)
#     grammar = r"""
#       NP: {<DT|PP\$>?<JJ>*<NN>}   # chunk determiner/possessive, adjectives and noun
#           {<NNP>+}                # chunk sequences of proper nouns
#     """
    
    # Update Grammar Regex to include prepositional phrases ala Semeval annotation guidelines
    grammar = r"""
    NP: {<NN><IN><DT><NN|NNP>+}
        {<NNP><NN>}
        {<NNP>+}
        {<NN>+}
        
    """
    
    cp = nltk.RegexpParser(grammar)

    tree = cp.parse(pos_sent)
    
    if verbose: print(tree)
    
    iob = [el[2][0] for el in nltk.chunk.util.tree2conlltags(tree)]
    return(iob)

# since the POS tagger is based on the words themselves and not context.
ae_laptop_test_df['predictions_1'] = ae_laptop_test_df['sentence'].apply(lambda x: regex_parser(x))
ae_laptop_test_df.head()


Unnamed: 0,label,sentence,predictions,predictions_1
0,"[B, I, O, O, O, O, O, O, O, O, O, O, O, O, O]","[Boot, time, is, super, fast, ,, around, anywh...","[B, I, O, O, O, O, O, O, O, O, B, O, O, B, O]","[B, I, O, O, O, O, O, O, O, O, O, O, O, B, O]"
1,"[B, I, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[tech, support, would, not, fix, the, problem,...","[O, B, O, O, O, O, B, O, O, O, O, B, O, O, O, ...","[O, B, O, O, O, O, B, O, O, O, O, B, O, O, O, ..."
2,"[O, O, O, O, O, O, O]","[but, in, resume, this, computer, rocks, !]","[O, O, B, O, B, O, O]","[O, O, B, O, B, O, O]"
3,"[B, I, O, O, O]","[Set, up, was, easy, .]","[B, O, O, O, O]","[B, O, O, O, O]"
4,"[O, O, O, O, O, B, I, O, B, I, O]","[Did, not, enjoy, the, new, Windows, 8, and, t...","[B, O, O, O, O, B, O, O, O, B, O]","[B, O, O, O, O, B, O, O, O, O, O]"


## AE evaluation - SemEval14

http://www.davidsbatista.net/blog/2018/05/09/Named_Entity_Evaluation/  
- partial boundary match over the surface string

In [48]:
# TODO need to explore how we want to move forward with all sentences rather than just 1.
# Should try to implement the SemEval14 evaluation criteria bc this is best practice

In [49]:
# TODO amend this tree structure for all predictions as well
print('\nGold Standard:')
# tag every sentence with the pos
gold_tree = ae_laptop_test_df['sentence'].apply(lambda x: nltk.pos_tag(x))
print(gold_tree)
iob_gold_tree = [nltk.Tree('S',
                           [(el[0], el[1], ae_laptop_test_df.iloc[tree_ind]['label'][ind])
                            if ae_laptop_test_df.iloc[tree_ind]['label'][ind]=='O'
                            else (el[0], el[1], ae_laptop_test_df.iloc[tree_ind]['label'][ind] + '-NP')
                            for ind,el in enumerate(tree)])
                for tree_ind, tree in enumerate(gold_tree)]
ae_laptop_test_df['iob_gold_tree'] = iob_gold_tree
ae_laptop_test_df.head()


Gold Standard:
0      [(Boot, NNP), (time, NN), (is, VBZ), (super, J...
1      [(tech, JJ), (support, NN), (would, MD), (not,...
2      [(but, CC), (in, IN), (resume, NN), (this, DT)...
3      [(Set, NNP), (up, RP), (was, VBD), (easy, JJ),...
4      [(Did, NNP), (not, RB), (enjoy, VB), (the, DT)...
                             ...                        
795    [(This, DT), (hardware, NN), (seems, VBZ), (to...
796    [(I, PRP), ('m, VBP), (done, VBN), (with, IN),...
797    [(I, PRP), ('ve, VBP), (had, VBD), (it, PRP), ...
798    [(the, DT), (latest, JJS), (version, NN), (doe...
799    [(Screen, NNP), (-, :), (although, IN), (some,...
Name: sentence, Length: 800, dtype: object


Unnamed: 0,label,sentence,predictions,predictions_1,iob_gold_tree
0,"[B, I, O, O, O, O, O, O, O, O, O, O, O, O, O]","[Boot, time, is, super, fast, ,, around, anywh...","[B, I, O, O, O, O, O, O, O, O, B, O, O, B, O]","[B, I, O, O, O, O, O, O, O, O, O, O, O, B, O]","[(Boot, NNP, B-NP), (time, NN, I-NP), (is, VBZ..."
1,"[B, I, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[tech, support, would, not, fix, the, problem,...","[O, B, O, O, O, O, B, O, O, O, O, B, O, O, O, ...","[O, B, O, O, O, O, B, O, O, O, O, B, O, O, O, ...","[(tech, JJ, B-NP), (support, NN, I-NP), (would..."
2,"[O, O, O, O, O, O, O]","[but, in, resume, this, computer, rocks, !]","[O, O, B, O, B, O, O]","[O, O, B, O, B, O, O]","[(but, CC, O), (in, IN, O), (resume, NN, O), (..."
3,"[B, I, O, O, O]","[Set, up, was, easy, .]","[B, O, O, O, O]","[B, O, O, O, O]","[(Set, NNP, B-NP), (up, RP, I-NP), (was, VBD, ..."
4,"[O, O, O, O, O, B, I, O, B, I, O]","[Did, not, enjoy, the, new, Windows, 8, and, t...","[B, O, O, O, O, B, O, O, O, B, O]","[B, O, O, O, O, B, O, O, O, O, O]","[(Did, NNP, O), (not, RB, O), (enjoy, VB, O), ..."


In [50]:
def get_entities(sentence_lst, predictions_lst):
    """
    Reformat the IOB structure to get the actual entities from the sentence
    """
    
    # for every sentence, iterate through
    all_entities = []
    for sample in range(len(predictions_lst)):
    
        # get indices where entities are identified
        predictions = np.array(predictions_lst[sample])
        ind = (predictions == 'B') | (predictions == 'I')
        
        # create list of numerical indices and boolean indices. ex. [(4, True), (10, True), (11, True), (15, True)]
        ind_tuple = [num_ind for num_ind in list(enumerate(ind)) if num_ind[1]==True]
        
        # get the sentence of interest. identify what these entities are
        sentence = np.array(sentence_lst[sample])

        # group the phrases together and add entities from each sentence
        entities = []
        for subset,num_ind_tuple in zip(sentence[ind], ind_tuple): # [('price', (4, True)), ('netbook', (10, True)), ('*', (11, True)), ('machine', (15, True))]
            # put the B in entities
            if predictions[num_ind_tuple[0]][0] == 'B':
                entities.append([subset])
            # if the tag is I, add to the last item of the list
            elif predictions[num_ind_tuple[0]][0] == 'I':
                last_entry = entities.pop()
                last_entry.append(subset)
                entities.append(last_entry)
            # there should not be any 'O' indices here
            else:
                print('Error')
        all_entities.append(entities)
    return(all_entities)

In [51]:
def get_ae_eval_features(gold_entities,prediction_entities,verbose=False):
    # TODO need to generalize and do for all samples
    # TODO may later need to update these calculations to encompass sentence location.
    y_true_df = pd.DataFrame([[ind,sub_el] for ind,el in enumerate(gold_entities) for sub_el in el], columns=['sample_index','entity'])
    y_pred_df = pd.DataFrame([[ind,sub_el] for ind,el in enumerate(prediction_entities) for sub_el in el], columns=['sample_index','entity'])
    print('True')
    display(y_true_df.head())
    print('Pred')
    display(y_pred_df.head())

    cor = 0
    inc = 0
    par = 0
    mis = 0
    spu = 0

    for el in range(len(gold_entities)):
        if verbose:
            print('\n',el)
        true_subset = y_true_df[y_true_df.sample_index == el]
        pred_subset = y_pred_df[y_pred_df.sample_index == el]
        true_entities = set(true_subset.entity.apply(lambda x: '_'.join(x)))
        pred_entities = set(pred_subset.entity.apply(lambda x: '_'.join(x)))
        if verbose:
            print('True')
            print(true_entities)
            print('Pred')
            print(pred_entities)

        # get correct
        cor_entities = true_entities & pred_entities
        if verbose:
            print(f'Correct entities: {cor_entities}')
        cor += len(cor_entities)
        true_entities = true_entities - cor_entities
        pred_entities = pred_entities - cor_entities

        # get partial and missed
        for true in true_entities:
            # Take into account if the prediction contains a portion of the correct and if correct contains a portion of the prediction
            par_entities = set([pred for pred in pred_entities if (true in pred) | (pred in true)])
            if len(par_entities) != 0:
                if verbose:
                    print(f'Partial entities: {set([true])}')
                par += len(par_entities)
                true_entities = true_entities - set([true])
                pred_entities = pred_entities - par_entities
            else:
                if verbose:
                    print(f'Missed entities: {set([true])}')
                mis += 1
                true_entities = true_entities - set([true])

        if len(true_entities) == 0:
            if verbose:
                print(f'Spurious entities: {pred_entities}')
            spu += len(pred_entities)
        else:
            print('Error')

    print(f'\nCorrect: {cor}')
    print(f'Partial: {par}')
    print(f'Missed: {mis}')
    print(f'Spurious: {spu}')
    return(cor,par,mis,spu,inc)

In [52]:
def get_ae_eval(sentence_lst, y_true, y_pred,verbose=False):
    """
    Get entity recognition evaluations accoridng to the partial match SemEval strategy
    """
    prediction_entities = get_entities(sentence_lst,y_pred)
    gold_entities = get_entities(sentence_lst,y_true)
    
    cor,par,mis,spu,inc = get_ae_eval_features(gold_entities,prediction_entities,verbose=verbose)
    
    pos_eval = cor + inc + par + mis
    act_eval = cor + inc + par + spu

    precision = (cor + .5 * par) / act_eval
    recall = (cor + .5 * par) / pos_eval
    f1 = ( 2* precision * recall) / (precision + recall)
    
    print(f'\nPrecision: \t{precision}')
    print(f'Recall: \t{recall}')
    print(f'F1-Score: \t{f1}')
    return(precision, recall, f1)

get_ae_eval(ae_laptop_test_df.sentence,ae_laptop_test_df.label,ae_laptop_test_df.predictions)

True


Unnamed: 0,sample_index,entity
0,0,"[Boot, time]"
1,1,"[tech, support]"
2,3,"[Set, up]"
3,4,"[Windows, 8]"
4,4,"[touchscreen, functions]"


Pred


Unnamed: 0,sample_index,entity
0,0,"[Boot, time]"
1,0,[seconds]
2,0,[minute]
3,1,[support]
4,1,[problem]



Correct: 422
Partial: 192
Missed: 75
Spurious: 1336

Precision: 	0.26564102564102565
Recall: 	0.7518142235123367
F1-Score: 	0.3925729442970822


(0.26564102564102565, 0.7518142235123367, 0.3925729442970822)

In [53]:
get_ae_eval(ae_laptop_test_df.sentence,ae_laptop_test_df.label,ae_laptop_test_df.predictions_1)

True


Unnamed: 0,sample_index,entity
0,0,"[Boot, time]"
1,1,"[tech, support]"
2,3,"[Set, up]"
3,4,"[Windows, 8]"
4,4,"[touchscreen, functions]"


Pred


Unnamed: 0,sample_index,entity
0,0,"[Boot, time]"
1,0,[minute]
2,1,[support]
3,1,[problem]
4,1,[plan]



Correct: 327
Partial: 223
Missed: 140
Spurious: 1094

Precision: 	0.26672749391727496
Recall: 	0.6355072463768116
F1-Score: 	0.37574978577549273


(0.26672749391727496, 0.6355072463768116, 0.37574978577549273)

## AE evaluation - Token Accuracy

In [56]:
def get_accuracy(true,predictions):
    accuracy = []
    for true_el, predict_el in zip(true,predictions):
        accuracy.append((np.array(predict_el) == np.array(true_el)).sum() / (len(true_el)))
    return(accuracy)

ae_laptop_test_df['accuracy'] = get_accuracy(ae_laptop_test_df.label,ae_laptop_test_df.predictions)
ae_laptop_test_df['accuracy_1'] = get_accuracy(ae_laptop_test_df.label,ae_laptop_test_df.predictions_1)
ae_laptop_test_df.head()
ae_laptop_test_df[['accuracy','accuracy_1']].describe()

Unnamed: 0,label,sentence,predictions,predictions_1,iob_gold_tree,accuracy,accuracy_1
0,"[B, I, O, O, O, O, O, O, O, O, O, O, O, O, O]","[Boot, time, is, super, fast, ,, around, anywh...","[B, I, O, O, O, O, O, O, O, O, B, O, O, B, O]","[B, I, O, O, O, O, O, O, O, O, O, O, O, B, O]","[(Boot, NNP, B-NP), (time, NN, I-NP), (is, VBZ...",0.866667,0.933333
1,"[B, I, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[tech, support, would, not, fix, the, problem,...","[O, B, O, O, O, O, B, O, O, O, O, B, O, O, O, ...","[O, B, O, O, O, O, B, O, O, O, O, B, O, O, O, ...","[(tech, JJ, B-NP), (support, NN, I-NP), (would...",0.764706,0.764706
2,"[O, O, O, O, O, O, O]","[but, in, resume, this, computer, rocks, !]","[O, O, B, O, B, O, O]","[O, O, B, O, B, O, O]","[(but, CC, O), (in, IN, O), (resume, NN, O), (...",0.714286,0.714286
3,"[B, I, O, O, O]","[Set, up, was, easy, .]","[B, O, O, O, O]","[B, O, O, O, O]","[(Set, NNP, B-NP), (up, RP, I-NP), (was, VBD, ...",0.8,0.8
4,"[O, O, O, O, O, B, I, O, B, I, O]","[Did, not, enjoy, the, new, Windows, 8, and, t...","[B, O, O, O, O, B, O, O, O, B, O]","[B, O, O, O, O, B, O, O, O, O, O]","[(Did, NNP, O), (not, RB, O), (enjoy, VB, O), ...",0.636364,0.636364


Unnamed: 0,accuracy,accuracy_1
count,800.0,800.0
mean,0.827023,0.831191
std,0.117612,0.127836
min,0.2,0.2
25%,0.756466,0.76
50%,0.833333,0.846154
75%,0.909091,0.916667
max,1.0,1.0


## Export samples that are well / poorly extracted

In [57]:
def get_bad_examples(n = 1):
    ind = np.argpartition(ae_laptop_test_df.accuracy + ae_laptop_test_df.accuracy_1,n)[:n]
    print(ind)
    sort_ind = ind[np.argsort((ae_laptop_test_df.accuracy + ae_laptop_test_df.accuracy_1).iloc[ind])]
    bad_example = ae_laptop_test_df.iloc[sort_ind]
    
    display(bad_example)
    print(*[' '.join(sent) for sent in bad_example.sentence],sep='\n')
    
get_bad_examples(3)

def get_good_examples(n = 1):
    ind = np.argpartition(ae_laptop_test_df.accuracy + ae_laptop_test_df.accuracy_1,-n)[-n:]
    print(ind)
    sort_ind = ind[np.argsort((ae_laptop_test_df.accuracy + ae_laptop_test_df.accuracy_1).iloc[ind])]
    good_example = ae_laptop_test_df.iloc[sort_ind]
    
    display(good_example)
    print(*[' '.join(sent) for sent in good_example.sentence],sep='\n')
# good_example = ae_laptop_test_df.iloc[np.argmax(ae_laptop_test_df.accuracy + ae_laptop_test_df.accuracy_1)]
get_good_examples(3)

0    165
1    532
2    632
dtype: int64


Unnamed: 0,label,sentence,predictions,predictions_1,iob_gold_tree,accuracy,accuracy_1
165,"[O, O, O, O, O]","[HUGE, Apple, MAC, Fan, !]","[B, I, I, I, O]","[B, I, I, I, O]","[(HUGE, NNP, O), (Apple, NNP, O), (MAC, NNP, O...",0.2,0.2
532,"[O, O, O, O, O, O, O, O, O, O, O, O, O]","[i, FINALLY, DID, IT, AND, THIS, MACHINE, IS, ...","[B, I, I, I, I, I, I, O, O, B, I, I, O]","[B, O, B, I, I, I, I, O, O, B, I, I, O]","[(i, NN, O), (FINALLY, NNPS, O), (DID, NNP, O)...",0.230769,0.307692
632,"[O, B, O]","[Nice, packing, .]","[B, I, O]","[B, I, O]","[(Nice, NNP, O), (packing, NN, B-NP), (., ., O)]",0.333333,0.333333


HUGE Apple MAC Fan !
i FINALLY DID IT AND THIS MACHINE IS THE WAY TO GO !
Nice packing .
797    346
798    354
799    321
dtype: int64


Unnamed: 0,label,sentence,predictions,predictions_1,iob_gold_tree,accuracy,accuracy_1
346,"[O, O, B, O, B, O, B, O, B, I, O]","[!, Excelent, performance, ,, usability, ,, pr...","[O, O, B, O, B, O, B, O, B, I, O]","[O, O, B, O, B, O, B, O, B, I, O]","[(!, ., O), (Excelent, JJ, O), (performance, N...",1.0,1.0
354,"[O, O, O, B, I, O]","[I, love, the, form, factor, .]","[O, O, O, B, I, O]","[O, O, O, B, I, O]","[(I, PRP, O), (love, VBP, O), (the, DT, O), (f...",1.0,1.0
321,"[O, B, O, O, O, O, O, O, O, O, O, O, O]","[The, memory, was, gone, and, it, was, not, ab...","[O, B, O, O, O, O, O, O, O, O, O, O, O]","[O, B, O, O, O, O, O, O, O, O, O, O, O]","[(The, DT, O), (memory, NN, B-NP), (was, VBD, ...",1.0,1.0


! Excelent performance , usability , presentation and time response .
I love the form factor .
The memory was gone and it was not able to be used .


## ASC baseline - VADER

In [58]:
asc_laptop_test_df = pd.DataFrame.from_dict(asc_laptop_test,orient='index')
asc_laptop_test_df.head()

Unnamed: 0,polarity,term,id,sentence
718:1_0,positive,retina display display,718:1_0,the retina display display make pictures i too...
217:1_1,neutral,CD/DVD drive,217:1_1,Needs a CD/DVD drive and a bigger power switch.
217:1_0,negative,power switch,217:1_0,Needs a CD/DVD drive and a bigger power switch.
1044:1_0,negative,battery,1044:1_0,The battery is not as shown in the product pho...
1040:1_0,negative,keyboard,1040:1_0,"It feels cheap, the keyboard is not very sensi..."


In [59]:
def vader_asc(sentence_lst):
    """
    For every sentence in the list, tag it as a positive/negative sentiment based on the sum of the words.
    """
    analyzer = SentimentIntensityAnalyzer()
    pos_neg_tag_lst = []
    for ind,sentence in enumerate(sentence_lst):
        vs = analyzer.polarity_scores(sentence)
        pos_neg_tag = 'negative' if vs['compound'] <= -0.05 else 'positive' if vs['compound'] >= 0.05 else 'neutral' 
        # print first 10 examples
        if ind <10: print("{:-<65} {} ({})".format(sentence, str(vs['compound']),pos_neg_tag))
        pos_neg_tag_lst.append(pos_neg_tag)
    return(pos_neg_tag_lst)

asc_laptop_test_df['predictions'] = vader_asc(asc_laptop_test_df.sentence)

the retina display display make pictures i took years ago jaw dropping. 0.0 (neutral)
Needs a CD/DVD drive and a bigger power switch.------------------ 0.0 (neutral)
Needs a CD/DVD drive and a bigger power switch.------------------ 0.0 (neutral)
The battery is not as shown in the product photos.--------------- 0.0 (neutral)
It feels cheap, the keyboard is not very sensitive.-------------- 0.0 (neutral)
Shipping was quick and product described was the product sent and so much more... 0.0 (neutral)
I've had it for about 2 months now and found no issues with software or updates. -0.296 (negative)
The only thing I miss is that my old Alienware laptop had backlit keys. -0.1531 (negative)
Unfortunately, it runs XP and Microsoft is dropping support next April. 0.5622 (positive)
Unfortunately, it runs XP and Microsoft is dropping support next April. 0.5622 (positive)


## ASC - BERT

In [51]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [52]:
batch_sentences = [val['sentence'] for key, val in asc_laptop_train.items()]*5
batch_sentences.extend([val['sentence'] for key, val in asc_laptop_dev.items()])
batch_sentences.extend([val['sentence'] for key, val in asc_laptop_test.items()])
train_size = len(asc_laptop_train)*5
dev_size = len(asc_laptop_dev)
test_size = len(asc_laptop_test)
train_size
dev_size
test_size

10815

150

638

In [53]:
batch_label = [val['polarity'] for key, val in asc_laptop_train.items()]*5
batch_label.extend([val['polarity'] for key, val in asc_laptop_dev.items()])
batch_label.extend([val['polarity'] for key, val in asc_laptop_test.items()])

In [54]:
bert_inputs = tokenizer(batch_label, batch_sentences, padding=True, truncation=True, max_length=50, return_tensors="tf")

In [55]:
label_encoder = LabelEncoder().fit(batch_label)
ascLabels = label_encoder.transform(batch_label)
label_encoder.classes_
batch_label[:5]
ascLabels[:5]

array(['negative', 'neutral', 'positive'], dtype='<U8')

['positive', 'positive', 'positive', 'negative', 'negative']

array([2, 2, 2, 0, 0])

In [56]:
bert_inputs

{'input_ids': <tf.Tensor: shape=(11603, 50), dtype=int32, numpy=
array([[ 101, 3893,  102, ...,    0,    0,    0],
       [ 101, 3893,  102, ...,    0,    0,    0],
       [ 101, 3893,  102, ...,    0,    0,    0],
       ...,
       [ 101, 4997,  102, ...,    0,    0,    0],
       [ 101, 3893,  102, ...,    0,    0,    0],
       [ 101, 4997,  102, ...,    0,    0,    0]], dtype=int32)>, 'token_type_ids': <tf.Tensor: shape=(11603, 50), dtype=int32, numpy=
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int32)>, 'attention_mask': <tf.Tensor: shape=(11603, 50), dtype=int32, numpy=
array([[1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       ...,
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0]], dtype=int32)>}

In [57]:
trainSentence_ids = bert_inputs['input_ids'][:train_size]
trainMasks = bert_inputs['attention_mask'][:train_size]
trainSequence_ids = bert_inputs['token_type_ids'][:train_size]

devSentence_ids = bert_inputs['input_ids'][train_size:train_size+dev_size]
devMasks = bert_inputs['attention_mask'][train_size:train_size+dev_size]
devSequence_ids = bert_inputs['token_type_ids'][train_size:train_size+dev_size]

testSentence_ids = bert_inputs['input_ids'][train_size+dev_size:]
testMasks = bert_inputs['attention_mask'][train_size+dev_size:]
testSequence_ids = bert_inputs['token_type_ids'][train_size+dev_size:]

ascLabels_train = ascLabels[:train_size]
ascLabels_dev = ascLabels[train_size:train_size+dev_size]
ascLabels_test = ascLabels[train_size+dev_size:]

X_train = np.array([trainSentence_ids,trainMasks,trainSequence_ids])
X_dev = np.array([devSentence_ids,devMasks,devSequence_ids])
X_test = np.array([testSentence_ids,testMasks,testSequence_ids])

ascLabels_train = np.array(ascLabels_train)
ascLabels_dev = np.array(ascLabels_dev)
ascLabels_test = np.array(ascLabels_test)

In [58]:
# Use a parameter pair k_start, k_end to look at slices. This helps with quick tests.

k_start = 0
k_end = 1000 #-1

if k_end == -1:
    k_end_train = X_train[0].shape[0]
    k_end_dev = X_dev[0].shape[0]
    k_end_test = X_test[0].shape[0]
else:
    k_end_train = k_end_test = k_end_dev = k_end
    


bert_inputs_train_k = [X_train[0][k_start:k_end_train], X_train[1][k_start:k_end_train], 
                       X_train[2][k_start:k_end_train]]
bert_inputs_dev_k = [X_dev[0][k_start:k_end_dev], X_dev[1][k_start:k_end_dev], 
                      X_dev[2][k_start:k_end_dev]]
bert_inputs_test_k = [X_test[0][k_start:k_end_test], X_test[1][k_start:k_end_test], 
                      X_test[2][k_start:k_end_test]]


labels_train_k = ascLabels_train[k_start:k_end_train]
labels_dev_k = ascLabels_dev[k_start:k_end_dev]
labels_test_k = ascLabels_test[k_start:k_end_test]

In [59]:
train_all = [bert_inputs_train_k, labels_train_k]
dev_all = [bert_inputs_dev_k, labels_dev_k]
test_all = [bert_inputs_test_k, labels_test_k]

In [1]:
from transformers import TFBertForSequenceClassification
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer=optimizer, loss=loss)
model.fit(bert_inputs_train_k,{"asc": labels_train_k },validation_data=(bert_inputs_dev_k,{"asc": labels_dev_k}), epochs=2, batch_size=16)

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

Some layers of TFBertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


NameError: name 'tf' is not defined

In [None]:
results = model.predict([X_test[0],X_test[1],X_test[2]], batch_size=16)

## ASC evaluation

In [61]:
def get_asc_eval(y_true, y_pred):
    print(confusion_matrix(y_true,y_pred,labels=['negative','neutral','positive']))
    print(classification_report(y_true,y_pred,labels=['negative','neutral','positive']))
get_asc_eval(asc_laptop_test_df.polarity,asc_laptop_test_df.predictions)

[[ 61  45  22]
 [ 32  81  56]
 [ 18  66 257]]
              precision    recall  f1-score   support

    negative       0.55      0.48      0.51       128
     neutral       0.42      0.48      0.45       169
    positive       0.77      0.75      0.76       341

    accuracy                           0.63       638
   macro avg       0.58      0.57      0.57       638
weighted avg       0.63      0.63      0.63       638

