# Experiment using Transformers

## Loading Data

In [27]:
import json
import os
import nltk
import pandas as pd
import logging
from nltk.tokenize import WhitespaceTokenizer
from simpletransformers.classification import ClassificationModel, ClassificationArgs
from simpletransformers.ner import NERModel, NERArgs
from sklearn import metrics

In [28]:
current_directory = os.getcwd()
train_directory_du = current_directory + "/../../data/darmstadt_unis/train.json"
test_directory_du = current_directory + "/../../data/darmstadt_unis/dev.json"

In [29]:
train_directory_mpqa = current_directory + "/../../data/mpqa/train.json"
test_directory_mpqa = current_directory + "/../../data/mpqa/dev.json"

In [30]:
train_directory_open = current_directory + "/../../data/opener_en/train.json"
test_directory_open = current_directory + "/../../data/opener_en/dev.json"

In [31]:
train_json_du, test_json_du = {}, {}
with open(train_directory_du) as f:
    train_json_du = json.load(f)
with open(test_directory_du) as f:
    test_json_du = json.load(f)

In [32]:
train_json_mpqa, test_json_mpqa = {}, {}
with open(train_directory_mpqa) as f:
    train_json_mpqa = json.load(f)
with open(test_directory_mpqa) as f:
    test_json_mpqa = json.load(f)

In [33]:
train_json_open, test_json_open = {}, {}
with open(train_directory_open) as f:
    train_json_open = json.load(f)
with open(test_directory_open) as f:
    test_json_open = json.load(f)

## Data Cleaning

In [34]:
def check_overlap_tags(opinions, text):
    text_length = len(text)
    for opinion in opinions:
        li = []
        if opinion.get('Source') != None:
            for source in opinion.get('Source')[1]:
                li.append(tuple([int(num) for num in source.split(':')]))
        if opinion.get('Target') != None:
            for target in opinion.get('Target')[1]:
                li.append(tuple([int(num) for num in target.split(':')]))
        if opinion.get('Polar_expression') != None:
            for expression in opinion.get('Polar_expression')[1]:
                li.append(tuple([int(num) for num in expression.split(':')]))
        ranges = [range(r[0]+1, r[1]-1) for r in li]
        # check if any two ranges of tags overlap
        overlap = any(any(e-1 in r for r in ranges) for l in li for e in l)
        if overlap == True:
            return True
        # check if the range exceeds the max and min bounds of the text size
        not_included = any(l[1] > text_length or l[0] < 0 for l in li)
        if not_included == True:
            return True
    return False
                

In [35]:
def remove_overlap_rows(json_data):
    orig_data_size = len(json_data)
    json_data = [row for row in json_data if not check_overlap_tags(row.get('opinions', []), row.get('text', ''))]
    new_data_size = len(json_data)
    removed_rows = orig_data_size - new_data_size
    print('Processing removed {} overlapping rows'.format(removed_rows))
    return json_data

In [36]:
# remove all data where the NER tag overlaps
train_json_du, test_json_du = remove_overlap_rows(train_json_du), remove_overlap_rows(test_json_du)
train_json_mpqa, test_json_mpqa = remove_overlap_rows(train_json_mpqa), remove_overlap_rows(test_json_mpqa)
train_json_open, test_json_open = remove_overlap_rows(train_json_open), remove_overlap_rows(test_json_open)

Processing removed 11 overlapping rows
Processing removed 4 overlapping rows
Processing removed 51 overlapping rows
Processing removed 18 overlapping rows
Processing removed 81 overlapping rows
Processing removed 17 overlapping rows


## Transform Data for NER Task

In [37]:
def swap_tag(cur_tag, next_tag):
    if cur_tag != next_tag:
        cur_tag = next_tag
    return cur_tag
    

In [38]:
def get_span(entity_tags, expected_span):
    span_list_add = []
    span_list_remove = []
    # if the span exists, simply add it
    if entity_tags.get(expected_span) != None:
        span_list_add.append(expected_span)
    # else if not exact match
    else:
        # get the span with the same beginning or the end
        entity_tags_list = list(entity_tags)
        start_span_list = [span for span in entity_tags_list if span[0] == expected_span[0]]
        end_span_list = [span for span in entity_tags_list if span[1] == expected_span[1]]
        # if it's an embedded word
        if len(end_span_list) == 0 and len(start_span_list) == 0:
            span_list_add.append(expected_span)
            start_list = (tag_span[0] for tag_span in entity_tags_list)
            outter_span_index = next(i for i,v in enumerate(start_list) if v > expected_span[0])
            outter_span = entity_tags_list[outter_span_index]
            span_list_add.append((outter_span[0],expected_span[0]+1))
            span_list_add.append((expected_span[1]-1, outter_span[1]))
            span_list_remove.append(outter_span)
        # if it's the beginning that doesn't match, split into two, remove the original
        elif len(start_span_list) == 0:
            span_list_add.append(expected_span)
            span_list_add.append((end_span_list[0][0],expected_span[0]+1))
            span_list_remove.append(end_span_list[0])
        # similar for the end, just the other way around
        elif len(end_span_list) == 0:
            span_list_add.append(expected_span)
            span_list_add.append((start_span_list[0][1]-1,expected_span[1]))
            span_list_remove.append(start_span_list[0])
        # if it's just a longer expected span, break it down
        elif len(start_span_list) != 0 and len(end_span_list)!= 0:
            start_span_index = entity_tags_list.index(start_span_list[0])
            end_span_index = entity_tags_list.index(end_span_list[0])
            span_list_add.extend(entity_tags_list[start_span_index:end_span_index+1])
    # return the span to be added and to be removed
    return span_list_add, span_list_remove
        
    

In [39]:
def custom_named_entity_tagging(sentence_id, sentence, opinions, tokenizer, default_tag='O'):
    span_generator = tokenizer.span_tokenize(sentence)
    entity_tags = dict([(span, default_tag) for span in span_generator])
    for opinion in opinions:
        source = opinion.get('Source')[0]
        target = opinion.get('Target')[0]
        expression = opinion.get('Polar_expression')[0]
        if len(source)!=0:
            # if any tags not in the original sentence, break and return
            for text in opinion.get('Source')[0]:
                if text not in sentence:
                    return []
            span_list = opinion.get('Source')[1]
            cur_tag = 'B-SOURCE'
            for word in span_list:
                expected_span = tuple([int(num) for num in word.split(':')])
                actual_span, removed_span = get_span(entity_tags=entity_tags, expected_span=expected_span)
                for span in actual_span:
                    entity_tags[span] = cur_tag
                    cur_tag = swap_tag(cur_tag=cur_tag, next_tag='I-SOURCE')
                for span in removed_span:
                    del entity_tags[span]
        if len(target)!=0:
            # if any tags not in the original sentence, break and return
            for text in opinion.get('Target')[0]:
                if text not in sentence:
                    return []
            span_list = opinion.get('Target')[1]
            cur_tag = 'B-TARGET'
            for word in span_list:
                expected_span = tuple([int(num) for num in word.split(':')])
                actual_span, removed_span = get_span(entity_tags=entity_tags, expected_span=expected_span)
                for span in actual_span:
                    entity_tags[span] = cur_tag
                    cur_tag = swap_tag(cur_tag=cur_tag, next_tag='I-TARGET')
                for span in removed_span:
                    del entity_tags[span]
        if len(expression)!=0:
            for text in opinion.get('Polar_expression')[0]:
                if text not in sentence:
                    return []
            span_list = opinion.get('Polar_expression')[1]
            cur_tag = 'B-EXPRESSION'
            for word in span_list:
                expected_span = tuple([int(num) for num in word.split(':')])
                actual_span, removed_span = get_span(entity_tags=entity_tags, expected_span=expected_span)
                for span in actual_span:
                    entity_tags[span] = cur_tag
                    cur_tag = swap_tag(cur_tag=cur_tag, next_tag='I-EXPRESSION')
                for span in removed_span:
                    del entity_tags[span]

    tagged_words = [[sentence_id, sentence[word_span[0]: word_span[1]], tag] for word_span, tag in entity_tags.items()]
    return tagged_words


In [40]:
def sent_dict_to_list(sent_dict, tokenizer, tagger):
    sentence_id= sent_dict.get('sent_id', 'UNKNOWN_ID')
    sentence = sent_dict.get('text')
    opinions = sent_dict.get('opinions')
    tokenizer = tokenizer
    sent_list = tagger(
        sentence_id = sentence_id,
        sentence = sentence,
        opinions = opinions,
        tokenizer = tokenizer
    )
    return sent_list

In [41]:
def preprocess_data_NER(input_data):
    w_tokenizer = WhitespaceTokenizer()
    output_data = []
    for row in input_data:
        tagged_word_list = sent_dict_to_list(row, w_tokenizer, custom_named_entity_tagging)
        output_data.extend(tagged_word_list)
    output_df = pd.DataFrame(data=output_data, columns=['sentence_id','words','labels'])
    return output_df

In [42]:
train_df_NER = preprocess_data_NER(train_json_du)
test_df_NER = preprocess_data_NER(test_json_du)

In [43]:
train_df_NER = train_df_NER.append(preprocess_data_NER(train_json_mpqa), ignore_index=True)
test_df_NER = test_df_NER.append(preprocess_data_NER(test_json_mpqa), ignore_index=True)

In [44]:
train_df_NER = train_df_NER.append(preprocess_data_NER(train_json_open), ignore_index=True)
test_df_NER = test_df_NER.append(preprocess_data_NER(test_json_open), ignore_index=True)

In [45]:
# remove any possible parsing left-overs of empty string
train_data = train_df_NER[train_df_NER['words']!='']
eval_data = test_df_NER[test_df_NER['words']!='']

In [None]:
# train_data.to_csv('train_ner.csv', index=False)
# eval_data.to_csv('dev_ner.csv', index=False)

# Data analysis

In [101]:
train_data['labels'].value_counts().to_frame()

Unnamed: 0,labels
O,177315
I-TARGET,8666
I-EXPRESSION,5200
B-EXPRESSION,4698
B-TARGET,3951
I-SOURCE,1776
B-SOURCE,1271


In [95]:
df = [preprocess_data_NER(train_json_du),preprocess_data_NER(train_json_mpqa),preprocess_data_NER(train_json_open)]
df_eval = [preprocess_data_NER(test_json_du),preprocess_data_NER(test_json_mpqa),preprocess_data_NER(test_json_open)]

In [96]:
def calculate_max_length(x,field):
    return list(x).count(field)    

In [97]:
def data_preprocessing_all(df):
    column_names = ["total_sentence","source_count","source_max_count","source_avg_count","target_count","target_max_count",
                    "target_avg_count","exp_count","exp_max_count","exp_avg_count"]
    result = pd.DataFrame(columns = column_names)
    for train_data in df:
        total_sentence = train_data['sentence_id'].nunique()
        label_count = train_data['labels'].value_counts().to_dict()
        target_count  = label_count["B-TARGET"]
        source_count  = label_count["B-SOURCE"]
        exp_count  = label_count["B-EXPRESSION"]
        target_max_count = train_data.groupby('sentence_id').agg(lambda x: calculate_max_length(x,"B-TARGET")).reset_index()['labels'].values.max()
        source_max_count = train_data.groupby('sentence_id').agg(lambda x: calculate_max_length(x,"B-SOURCE")).reset_index()['labels'].values.max()
        exp_max_count = train_data.groupby('sentence_id').agg(lambda x: calculate_max_length(x,"B-EXPRESSION")).reset_index()['labels'].values.max()
        source_avg_count= round(source_count/total_sentence,2)
        target_avg_count= round(target_count/total_sentence,2)
        exp_avg_count= round(exp_count/total_sentence,2)
        df2 = {"total_sentence":total_sentence,"source_count":source_count,"source_max_count":source_max_count,"source_avg_count":source_avg_count,"target_count":target_count,"target_max_count":target_max_count,
                    "target_avg_count":target_avg_count,"exp_count":exp_count,"exp_max_count":exp_max_count,"exp_avg_count":exp_avg_count}
        result = result.append(df2, ignore_index = True)
    result["Dataset"] = ["Darmstadt_unis","MPQA","OpeNER_en"]
    result = result.set_index("Dataset")
    return result

In [58]:
data_preprocessing_all(df)

Unnamed: 0_level_0,total_sentence,source_count,source_max_count,source_avg_count,target_count,target_max_count,target_avg_count,exp_count,exp_max_count,exp_avg_count
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Darmstadt_unis,2230.0,55.0,1.0,0.02,707.0,4.0,0.32,770.0,5.0,0.35
MPQA,5628.0,1048.0,3.0,0.19,1293.0,5.0,0.23,1473.0,5.0,0.26
OpeNER_en,1640.0,168.0,3.0,0.1,1951.0,16.0,1.19,2455.0,13.0,1.5


In [59]:
data_preprocessing_all(df_eval)

Unnamed: 0_level_0,total_sentence,source_count,source_max_count,source_avg_count,target_count,target_max_count,target_avg_count,exp_count,exp_max_count,exp_avg_count
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Darmstadt_unis,225.0,8.0,2.0,0.04,85.0,3.0,0.38,90.0,3.0,0.4
MPQA,1990.0,293.0,2.0,0.15,426.0,4.0,0.21,486.0,4.0,0.24
OpeNER_en,224.0,32.0,3.0,0.14,258.0,8.0,1.15,326.0,11.0,1.46


# Data balancing (Optional)

In [21]:
categorical_mapping = {"B-SOURCE": "SOURCE","I-SOURCE": "SOURCE","B-TARGET": "TARGET","I-TARGET": "TARGET","I-EXPRESSION": "EXPRESSION","B-EXPRESSION": "EXPRESSION","O":"O"}
train_data['label_category'] = train_data['labels'].apply(lambda x: categorical_mapping[x])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [22]:
train_data_sentence = train_data.groupby('sentence_id').agg(lambda x: list(x)).reset_index()
train_data = train_data.groupby('sentence_id').agg({"labels": "nunique","label_category": "nunique"}).reset_index()

In [23]:
train_data.label_category.value_counts()

1    6380
3    1929
4    1055
2     134
Name: label_category, dtype: int64

In [24]:
unbalanced_data = train_data_sentence.merge(train_data,on="sentence_id")

In [25]:
unbalanced_data.label_category_y.value_counts()

1    6380
3    1929
4    1055
2     134
Name: label_category_y, dtype: int64

In [26]:
upsample_cate_3 = unbalanced_data.query("label_category_y == 3 or label_category_y == 2").sample(frac=2, replace=True, random_state=1)
upsample_cate_4 = unbalanced_data.query("label_category_y == 4").sample(frac=2, replace=True, random_state=1)
#upsample_cate_2 = unbalanced_data.query("label_category_y == 2").sample(frac=1, replace=True, random_state=1)
upsample_cate_1 = unbalanced_data.query("label_category_y == 1")

In [27]:
# combine dataset, and shuffle
balanced_data = pd.concat([upsample_cate_3,upsample_cate_4,upsample_cate_1]).sample(frac=1)

In [28]:
balanced_data.label_category_y.value_counts()

1    6380
3    3858
4    2110
2     268
Name: label_category_y, dtype: int64

In [29]:
balanced_data.head()

Unnamed: 0,sentence_id,words,labels_x,label_category_x,labels_y,label_category_y
418,../opener/en/kaf/hotel/english00053_37db4002a5...,"[The, hotel, is, working, together, with, diff...","[B-TARGET, I-TARGET, O, B-EXPRESSION, I-EXPRES...","[TARGET, TARGET, O, EXPRESSION, EXPRESSION, EX...",5,3
6779,non_fbis/09.53.15-23595-24,"[As, tanks, surrounded, Miraflores, ,, Mr, Cha...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...",1,1
1430,../opener/en/kaf/hotel/english00173_d11c0a69dd...,"[Front, desk, is, extremely, well, trained, an...","[B-TARGET, I-TARGET, O, B-EXPRESSION, I-EXPRES...","[TARGET, TARGET, O, EXPRESSION, EXPRESSION, EX...",5,3
5331,University_of_Phoenix_Online_18_04-04-2008-15,"[I, found, them, to, be, a, waste, of, time, a...","[O, O, B-TARGET, O, O, O, B-EXPRESSION, I-EXPR...","[O, O, TARGET, O, O, O, EXPRESSION, EXPRESSION...",4,3
891,../opener/en/kaf/hotel/english00103_9470bc3be0...,"[We, had, a, very, enjoyable, stay, at, the, h...","[O, O, O, B-EXPRESSION, I-EXPRESSION, B-TARGET...","[O, O, O, EXPRESSION, EXPRESSION, TARGET, O, O...",4,3


In [30]:
def data_preprocessing_all(df):
    column_names = ["sentence_id","words","labels"]
    result = pd.DataFrame(columns = column_names)
    for index,row in df.iterrows():
        for i in range(len(row['words'])):
            df2 = {'sentence_id': row['sentence_id'], 'words': row['words'][i], 'labels_x':row['labels_x'][i]}
            result = result.append(df2, ignore_index = True)
    return result

In [31]:
train_data_packed = balanced_data.drop(['label_category_x','labels_y','label_category_y'], axis=1)
w = train_data_packed.apply(lambda x: pd.Series(x['words']),axis=1).stack().reset_index(level=1, drop=True)
w.name = 'words'
train_data_final = train_data_packed.drop(['words','labels_x'], axis=1).join(w)

In [32]:
train_data_packed = balanced_data.drop(['label_category_x','labels_y','label_category_y'], axis=1)
l = train_data_packed.apply(lambda x: pd.Series(x['labels_x']),axis=1).stack().reset_index(level=1, drop=True)
l.name = 'labels_x'
train_data_final_2 = train_data_packed.drop(['words','labels_x'], axis=1).join(l)

In [33]:
train_data_final["labels"] = train_data_final_2['labels_x']

In [None]:
train_data = train_data_final

# Baseline model with NLTK POS tagging

In [42]:
eval_data_baseline = eval_data.groupby('sentence_id').agg(lambda x: list(x)).reset_index()

In [45]:
#NN	noun, singular (cat, tree)
#NNS	noun plural (desks)
#NNP	proper noun, singular (sarah)
#NNPS	proper noun, plural (indians or americans)
#PRP	personal pronoun (hers, herself, him, himself)
#PRP$	possessive pronoun (her, his, mine, my, our )
#VB	verb (ask)
#VBG	verb gerund (judging)
#VBD	verb past tense (pleaded)
#VBN	verb past participle (reunified)
#VBP	verb, present tense not 3rd person singular(wrap)
#VBZ	verb, present tense with 3rd person singular (bases)
pos_mapping = {"PRP": "B-SOURCE","PRP$": "B-SOURCE","NN": "B-TARGET","NNS": "B-TARGET","NNP": "B-TARGET","NNPS": "B-TARGET","VBZ": "B-EXPRESSION","VBP": "B-EXPRESSION","VBN": "B-EXPRESSION","VBD": "B-EXPRESSION","VBG": "B-EXPRESSION","VB": "B-EXPRESSION",}

In [52]:
eval_data_baseline

Unnamed: 0,sentence_id,words,labels,pos_tag
0,../opener/en/kaf/hotel/english00011_131b37a0eb...,"[Sri, Lanka, ·, we, will, come, again, !]","[B-TARGET, I-TARGET, O, B-SOURCE, B-EXPRESSION...","[NNP, NNP, NNP, PRP, MD, VB, RB, .]"
1,../opener/en/kaf/hotel/english00011_131b37a0eb...,"[We, had, a, wonderful, stay, in, Sri, Lanka, .]","[B-SOURCE, O, O, B-EXPRESSION, B-TARGET, O, O,...","[PRP, VBD, DT, JJ, NN, IN, NNP, NNP, .]"
2,../opener/en/kaf/hotel/english00011_131b37a0eb...,"[Everybody, was, very, kind, ·, country, is, b...","[O, O, O, O, O, O, O, O, O]","[NN, VBD, RB, NN, JJ, NN, VBZ, JJ, .]"
3,../opener/en/kaf/hotel/english00011_131b37a0eb...,"[The, staff, at, the, Club, Hotel, Dolphin, wa...","[B-TARGET, I-TARGET, O, O, O, O, O, O, B-EXPRE...","[DT, NN, IN, DT, NNP, NNP, NNP, VBD, RB, JJ, ,..."
4,../opener/en/kaf/hotel/english00011_131b37a0eb...,"[We, will, come, again, !]","[B-SOURCE, B-EXPRESSION, I-EXPRESSION, I-EXPRE...","[PRP, MD, VB, RB, .]"
...,...,...,...,...
2434,xbank/wsj_1038-4,"[In, over, -, the-counter, trading, yesterday,...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O]","[IN, IN, :, JJ, NN, NN, ,, NNP, NNP, VBD, CD, ..."
2435,xbank/wsj_1038-5,"[The, company, said, the, restructuring, 's, i...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[DT, NN, VBD, DT, NN, POS, JJ, NN, MD, VB, DT,..."
2436,xbank/wsj_1038-6,"[The, plan, may, include, selling, branches, ,...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[DT, NN, MD, VB, VBG, NNS, ,, VBG, CC, VBG, NN..."
2437,xbank/wsj_1038-7,"[Initially, ,, the, company, said, it, will, c...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[RB, ,, DT, NN, VBD, PRP, MD, VB, PRP$, JJ, NN..."


In [49]:
def generate_pos_tagging(df):
    result = []
    for index,row in df.iterrows():
        tags = []
        pos_tag = nltk.pos_tag(row['words']) 
        for i in range(len(row['words'])):
            tags.append(pos_tag[i][1])
        result.append(tags)
    return result

In [53]:
def map_pos_tag(df):
    result = []
    for index,row in df.iterrows():
        tags = []
        for i in range(len(row['words'])):
            tag = row['pos_tag'][i]
            if tag in pos_mapping.keys(): 
                tags.append(pos_mapping[row['pos_tag'][i]])
            else:
                tags.append("O")
        result.append(tags)
    return result

In [60]:
def predict_o_only(df):
    result = []
    for index,row in df.iterrows():
        tags = []
        for i in range(len(row['words'])):
            tags.append("O")
        result.append(tags)
    return result

In [61]:
eval_data_baseline['pos_tag'] = generate_pos_tagging(eval_data_baseline)
eval_data_baseline['pos_tag_label'] = map_pos_tag(eval_data_baseline)
eval_data_baseline['o_only_label'] = predict_o_only(eval_data_baseline)

In [57]:
expression_label = 'EXPRESSION'
target_label = 'TARGET'
source_label = 'SOURCE'
def get_tag_report(df, preds_list):
    output_view = df.copy()
    output_view['prediction'] = [item for sublist in preds_list for item in sublist]
    output_view['target_label'] = output_view.apply(lambda row: target_label in row['labels'], axis=1)
    output_view['target_pred'] = output_view.apply(lambda row: target_label in row['prediction'], axis=1)
    output_view['source_label'] = output_view.apply(lambda row: source_label in row['labels'], axis=1)
    output_view['source_pred'] = output_view.apply(lambda row: source_label in row['prediction'], axis=1)
    output_view['expression_label'] = output_view.apply(lambda row: expression_label in row['labels'], axis=1)
    output_view['expression_pred'] = output_view.apply(lambda row: expression_label in row['prediction'], axis=1)
    
    target_report = metrics.classification_report(output_view['target_label'], output_view['target_pred'])
    source_report = metrics.classification_report(output_view['source_label'], output_view['source_pred'])
    expression_report = metrics.classification_report(output_view['expression_label'], output_view['expression_pred'])
    print("Target Report")
    print(target_report)
    print("Source Report")
    print(source_report)
    print("Expression Report")
    print(expression_report)
    

In [58]:
eval_data_baseline

Unnamed: 0,sentence_id,words,labels,pos_tag,pos_tag_label
0,../opener/en/kaf/hotel/english00011_131b37a0eb...,"[Sri, Lanka, ·, we, will, come, again, !]","[B-TARGET, I-TARGET, O, B-SOURCE, B-EXPRESSION...","[NNP, NNP, NNP, PRP, MD, VB, RB, .]","[B-TARGET, B-TARGET, B-TARGET, B-SOURCE, O, B-..."
1,../opener/en/kaf/hotel/english00011_131b37a0eb...,"[We, had, a, wonderful, stay, in, Sri, Lanka, .]","[B-SOURCE, O, O, B-EXPRESSION, B-TARGET, O, O,...","[PRP, VBD, DT, JJ, NN, IN, NNP, NNP, .]","[B-SOURCE, B-EXPRESSION, O, O, B-TARGET, O, B-..."
2,../opener/en/kaf/hotel/english00011_131b37a0eb...,"[Everybody, was, very, kind, ·, country, is, b...","[O, O, O, O, O, O, O, O, O]","[NN, VBD, RB, NN, JJ, NN, VBZ, JJ, .]","[B-TARGET, B-EXPRESSION, O, B-TARGET, O, B-TAR..."
3,../opener/en/kaf/hotel/english00011_131b37a0eb...,"[The, staff, at, the, Club, Hotel, Dolphin, wa...","[B-TARGET, I-TARGET, O, O, O, O, O, O, B-EXPRE...","[DT, NN, IN, DT, NNP, NNP, NNP, VBD, RB, JJ, ,...","[O, B-TARGET, O, O, B-TARGET, B-TARGET, B-TARG..."
4,../opener/en/kaf/hotel/english00011_131b37a0eb...,"[We, will, come, again, !]","[B-SOURCE, B-EXPRESSION, I-EXPRESSION, I-EXPRE...","[PRP, MD, VB, RB, .]","[B-SOURCE, O, B-EXPRESSION, O, O]"
...,...,...,...,...,...
2434,xbank/wsj_1038-4,"[In, over, -, the-counter, trading, yesterday,...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O]","[IN, IN, :, JJ, NN, NN, ,, NNP, NNP, VBD, CD, ...","[O, O, O, O, B-TARGET, B-TARGET, O, B-TARGET, ..."
2435,xbank/wsj_1038-5,"[The, company, said, the, restructuring, 's, i...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[DT, NN, VBD, DT, NN, POS, JJ, NN, MD, VB, DT,...","[O, B-TARGET, B-EXPRESSION, O, B-TARGET, O, O,..."
2436,xbank/wsj_1038-6,"[The, plan, may, include, selling, branches, ,...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[DT, NN, MD, VB, VBG, NNS, ,, VBG, CC, VBG, NN...","[O, B-TARGET, O, B-EXPRESSION, B-EXPRESSION, B..."
2437,xbank/wsj_1038-7,"[Initially, ,, the, company, said, it, will, c...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ...","[RB, ,, DT, NN, VBD, PRP, MD, VB, PRP$, JJ, NN...","[O, O, O, B-TARGET, B-EXPRESSION, B-SOURCE, O,..."


In [59]:
get_tag_report(eval_data, eval_data_baseline['pos_tag_label'])

Target Report
              precision    recall  f1-score   support

       False       0.95      0.71      0.81     51003
        True       0.06      0.31      0.10      2901

    accuracy                           0.69     53904
   macro avg       0.50      0.51      0.45     53904
weighted avg       0.90      0.69      0.77     53904

Source Report
              precision    recall  f1-score   support

       False       0.98      0.96      0.97     53050
        True       0.01      0.03      0.02       854

    accuracy                           0.94     53904
   macro avg       0.50      0.49      0.49     53904
weighted avg       0.97      0.94      0.95     53904

Expression Report
              precision    recall  f1-score   support

       False       0.97      0.85      0.91     52103
        True       0.03      0.13      0.05      1801

    accuracy                           0.83     53904
   macro avg       0.50      0.49      0.48     53904
weighted avg       0.93     

In [62]:
get_tag_report(eval_data, eval_data_baseline['o_only_label'])

Target Report
              precision    recall  f1-score   support

       False       0.95      1.00      0.97     51003
        True       0.00      0.00      0.00      2901

    accuracy                           0.95     53904
   macro avg       0.47      0.50      0.49     53904
weighted avg       0.90      0.95      0.92     53904

Source Report
              precision    recall  f1-score   support

       False       0.98      1.00      0.99     53050
        True       0.00      0.00      0.00       854

    accuracy                           0.98     53904
   macro avg       0.49      0.50      0.50     53904
weighted avg       0.97      0.98      0.98     53904

Expression Report
              precision    recall  f1-score   support

       False       0.97      1.00      0.98     52103
        True       0.00      0.00      0.00      1801

    accuracy                           0.97     53904
   macro avg       0.48      0.50      0.49     53904
weighted avg       0.93     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Train Model for NER Task

In [76]:
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)


# Configure the model
model_args = NERArgs()
model_args.labels_list = ["O", "I-TARGET", "I-SOURCE", "I-EXPRESSION","B-TARGET", "B-SOURCE", "B-EXPRESSION"]
model_args.train_batch_size = 32
model_args.evaluate_during_training = True
model_args.overwrite_output_dir = True
model_args.num_train_epochs = 10
model_args.max_seq_length = 256
model_args.wandb_project = 'NER-model'
model_args.logging_steps = 10
model = NERModel(
    "roberta", "roberta-base", args=model_args
)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForTokenClassification: ['lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able

In [77]:
model.train_model(train_data, eval_data=eval_data)

INFO:simpletransformers.ner.ner_model: Converting to features started.


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

VBox(children=(Label(value=' 0.03MB of 0.03MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Running Epoch 0 of 10:   0%|          | 0/297 [00:00<?, ?it/s]

INFO:simpletransformers.ner.ner_model: Converting to features started.


  0%|          | 0/5 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/305 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/297 [00:00<?, ?it/s]

INFO:simpletransformers.ner.ner_model: Converting to features started.


  0%|          | 0/5 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/305 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/297 [00:00<?, ?it/s]

INFO:simpletransformers.ner.ner_model: Converting to features started.


  0%|          | 0/5 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/305 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/297 [00:00<?, ?it/s]

INFO:simpletransformers.ner.ner_model: Converting to features started.


  0%|          | 0/5 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/305 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/297 [00:00<?, ?it/s]

INFO:simpletransformers.ner.ner_model: Converting to features started.


  0%|          | 0/5 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/305 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/297 [00:00<?, ?it/s]

INFO:simpletransformers.ner.ner_model: Converting to features started.


  0%|          | 0/5 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/305 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/297 [00:00<?, ?it/s]

INFO:simpletransformers.ner.ner_model: Converting to features started.


  0%|          | 0/5 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/305 [00:00<?, ?it/s]

INFO:simpletransformers.ner.ner_model: Converting to features started.


  0%|          | 0/5 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/305 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/297 [00:00<?, ?it/s]

INFO:simpletransformers.ner.ner_model: Converting to features started.


  0%|          | 0/5 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/305 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/297 [00:00<?, ?it/s]

INFO:simpletransformers.ner.ner_model: Converting to features started.


  0%|          | 0/5 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/305 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/297 [00:00<?, ?it/s]

INFO:simpletransformers.ner.ner_model: Converting to features started.


  0%|          | 0/5 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/305 [00:00<?, ?it/s]

INFO:simpletransformers.ner.ner_model: Training of roberta model complete. Saved to outputs/.


(2970,
 defaultdict(list,
             {'global_step': [297,
               594,
               891,
               1188,
               1485,
               1782,
               2000,
               2079,
               2376,
               2673,
               2970],
              'train_loss': [0.20080578327178955,
               0.5730990171432495,
               0.22009192407131195,
               0.12208372354507446,
               0.07039044797420502,
               0.1164388507604599,
               0.03451358154416084,
               0.019080353900790215,
               0.02395833283662796,
               0.022110242396593094,
               0.00582613842561841],
              'eval_loss': [0.28000834482859394,
               0.26133145689544435,
               0.2415812126128003,
               0.26724727843953944,
               0.3212938595429105,
               0.3317930789970455,
               0.37567395498455103,
               0.3760283562833525,
               0.37518

## Evaluation of NER Task

In [32]:
# Evaluate the model
result, model_outputs, preds_list = model.eval_model(eval_data)

# Make predictions with the model
# predictions, raw_outputs = model.predict(eval_data)
result

INFO:simpletransformers.ner.ner_model: Converting to features started.


  0%|          | 0/5 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/307 [00:00<?, ?it/s]

INFO:simpletransformers.ner.ner_model:{'eval_loss': 0.3696765049820725, 'precision': 0.4823266219239374, 'recall': 0.5205214872042492, 'f1_score': 0.5006967022758941}


{'eval_loss': 0.3696765049820725,
 'precision': 0.4823266219239374,
 'recall': 0.5205214872042492,
 'f1_score': 0.5006967022758941}

In [82]:
def custom_eval(model, eval_data):
    new_data = eval_data.groupby(['sentence_id'], sort=False).agg(lambda x: " ".join(list(x))).reset_index()
    result = model.predict(new_data['words'])
    preds_list = [[list(word.values())[0] for word in pred] for pred in result[0]]
    return preds_list

In [250]:
preds_list = custom_eval(model, eval_data)

INFO:simpletransformers.ner.ner_model: Converting to features started.


  0%|          | 0/5 [00:00<?, ?it/s]

Running Prediction:   0%|          | 0/305 [00:00<?, ?it/s]

In [305]:
len(eval_data)

53904

In [84]:
expression_label = 'EXPRESSION'
target_label = 'TARGET'
source_label = 'SOURCE'
def get_tag_report(df, preds_list):
    output_view = df.copy()
    output_view['prediction'] = [item for sublist in preds_list for item in sublist]
    output_view['target_label'] = output_view.apply(lambda row: target_label in row['labels'], axis=1)
    output_view['target_pred'] = output_view.apply(lambda row: target_label in row['prediction'], axis=1)
    output_view['source_label'] = output_view.apply(lambda row: source_label in row['labels'], axis=1)
    output_view['source_pred'] = output_view.apply(lambda row: source_label in row['prediction'], axis=1)
    output_view['expression_label'] = output_view.apply(lambda row: expression_label in row['labels'], axis=1)
    output_view['expression_pred'] = output_view.apply(lambda row: expression_label in row['prediction'], axis=1)
    
    target_report = metrics.classification_report(output_view['target_label'], output_view['target_pred'])
    source_report = metrics.classification_report(output_view['source_label'], output_view['source_pred'])
    expression_report = metrics.classification_report(output_view['expression_label'], output_view['expression_pred'])
    print("Target Report")
    print(target_report)
    print("Source Report")
    print(source_report)
    print("Expression Report")
    print(expression_report)
    

In [252]:
get_tag_report(eval_data, preds_list)

Target Report
              precision    recall  f1-score   support

       False       0.98      0.98      0.98     51003
        True       0.66      0.58      0.62      2901

    accuracy                           0.96     53904
   macro avg       0.82      0.78      0.80     53904
weighted avg       0.96      0.96      0.96     53904

Source Report
              precision    recall  f1-score   support

       False       0.99      0.99      0.99     53050
        True       0.62      0.61      0.62       854

    accuracy                           0.99     53904
   macro avg       0.81      0.80      0.81     53904
weighted avg       0.99      0.99      0.99     53904

Expression Report
              precision    recall  f1-score   support

       False       0.99      0.99      0.99     52103
        True       0.65      0.61      0.63      1801

    accuracy                           0.98     53904
   macro avg       0.82      0.80      0.81     53904
weighted avg       0.98     

In [79]:
eval_data_dict = {'du': preprocess_data_NER(test_json_du), 'mpqa':preprocess_data_NER(test_json_mpqa), 'opener_en':preprocess_data_NER(test_json_open)}

In [85]:
def get_tag_reports(model, eval_data_dict):
    for name, eval_data in eval_data_dict.items():
        eval_data = eval_data[eval_data['words']!='']
        preds_list = custom_eval(model, eval_data)
        print(name)
        get_tag_report(eval_data, preds_list)
    

In [86]:
get_tag_reports(model, eval_data_dict)

INFO:simpletransformers.ner.ner_model: Converting to features started.


  0%|          | 0/1 [00:00<?, ?it/s]

Running Prediction:   0%|          | 0/29 [00:00<?, ?it/s]

du
Target Report
              precision    recall  f1-score   support

       False       0.99      0.99      0.99      3931
        True       0.57      0.54      0.55       111

    accuracy                           0.98      4042
   macro avg       0.78      0.76      0.77      4042
weighted avg       0.98      0.98      0.98      4042

Source Report
              precision    recall  f1-score   support

       False       1.00      1.00      1.00      4034
        True       0.00      0.00      0.00         8

    accuracy                           1.00      4042
   macro avg       0.50      0.50      0.50      4042
weighted avg       1.00      1.00      1.00      4042

Expression Report
              precision    recall  f1-score   support

       False       0.98      0.98      0.98      3883
        True       0.51      0.53      0.52       159

    accuracy                           0.96      4042
   macro avg       0.75      0.75      0.75      4042
weighted avg       0.96  

INFO:simpletransformers.ner.ner_model: Converting to features started.


  0%|          | 0/4 [00:00<?, ?it/s]

Running Prediction:   0%|          | 0/249 [00:00<?, ?it/s]

mpqa


INFO:simpletransformers.ner.ner_model: Converting to features started.


Target Report
              precision    recall  f1-score   support

       False       0.98      0.99      0.98     44521
        True       0.66      0.55      0.60      2324

    accuracy                           0.96     46845
   macro avg       0.82      0.77      0.79     46845
weighted avg       0.96      0.96      0.96     46845

Source Report
              precision    recall  f1-score   support

       False       0.99      0.99      0.99     46032
        True       0.64      0.62      0.63       813

    accuracy                           0.99     46845
   macro avg       0.82      0.81      0.81     46845
weighted avg       0.99      0.99      0.99     46845

Expression Report
              precision    recall  f1-score   support

       False       0.99      0.99      0.99     45925
        True       0.57      0.52      0.54       920

    accuracy                           0.98     46845
   macro avg       0.78      0.75      0.77     46845
weighted avg       0.98     

  0%|          | 0/1 [00:00<?, ?it/s]

Running Prediction:   0%|          | 0/28 [00:00<?, ?it/s]

opener_en
Target Report
              precision    recall  f1-score   support

       False       0.95      0.96      0.96      2551
        True       0.77      0.74      0.75       466

    accuracy                           0.92      3017
   macro avg       0.86      0.85      0.85      3017
weighted avg       0.92      0.92      0.92      3017

Source Report
              precision    recall  f1-score   support

       False       1.00      0.99      1.00      2984
        True       0.60      0.73      0.66        33

    accuracy                           0.99      3017
   macro avg       0.80      0.86      0.83      3017
weighted avg       0.99      0.99      0.99      3017

Expression Report
              precision    recall  f1-score   support

       False       0.93      0.93      0.93      2295
        True       0.78      0.78      0.78       722

    accuracy                           0.89      3017
   macro avg       0.85      0.85      0.85      3017
weighted avg      

## Data Transformation for Relationship Prediction

In [46]:
def process_relationship(sent_id, text, opinions):
    if len(opinions) == 0:
        return []
    correlation_dict = {}
    global_expression_list = [expression for opinion in opinions for expression in opinion.get('Polar_expression', [[]])[0]]
    for opinion in opinions:
        expression_list = opinion.get('Polar_expression', [])
        if len(expression_list) != 0 and len(expression_list[0]) != 0:
            target_list = opinion.get('Target')[0]
            source_list = opinion.get('Source')[0]
            for expression in expression_list[0]:
                for target in target_list:
                    correlation_dict[(target, expression)] = 1
                for source in source_list:
                    correlation_dict[(source, expression)] = 1
            for expression in global_expression_list:
                if expression not in expression_list:
                    for target in target_list:
                        correlation_dict[(target, expression)] = 0 if correlation_dict.get((target, expression)) != 1 else 1
                    for source in source_list:
                        correlation_dict[(source, expression)] = 0 if correlation_dict.get((source, expression)) != 1 else 1
            
                    
    relationship_table = [[sent_id, text, ind[0], ind[1], val] for ind, val in correlation_dict.items()]
    return relationship_table
        
    

In [47]:
relationship_table_columns = ['sent_id', 'text', 'reference', 'expression', 'labels']
def transform_to_relationship_df(json_data):
    relationship_table = []
    for row in json_data:
        row_relation = process_relationship(sent_id=row.get('sent_id', 'UNKNOWN_ID'), text=row.get('text', ''), opinions=row.get('opinions', []))
        relationship_table.extend(row_relation)
    return pd.DataFrame(data=relationship_table, columns=relationship_table_columns)
        

In [48]:
rel_df_train_du = transform_to_relationship_df(json_data=train_json_du)
rel_df_test_du = transform_to_relationship_df(json_data=test_json_du)
rel_df_train_mpqa = transform_to_relationship_df(json_data=train_json_mpqa)
rel_df_test_mpqa = transform_to_relationship_df(json_data=test_json_mpqa)
rel_df_train_open = transform_to_relationship_df(json_data=train_json_open)
rel_df_test_open = transform_to_relationship_df(json_data=test_json_open)

In [49]:
train_data_rel = rel_df_train_du.copy()
train_data_rel = train_data_rel.append(rel_df_train_mpqa, ignore_index=True)
train_data_rel = train_data_rel.append(rel_df_train_open, ignore_index=True)
train_data_rel['text'] = train_data_rel.apply(lambda row: row['text'] + " Reference: " + row['reference'] + "Expression: " + row['expression'], axis=1)

test_data_rel = rel_df_test_du.copy()
test_data_rel = test_data_rel.append(rel_df_test_mpqa, ignore_index=True)
test_data_rel = test_data_rel.append(rel_df_test_open, ignore_index=True)
test_data_rel['text'] = test_data_rel.apply(lambda row: row['text'] + " Reference: " + row['reference'] + "Expression: " + row['expression'], axis=1)

## Model Training for Relationship Prediction

In [50]:
# Preparing train data
train_df = train_data_rel[['text', 'labels']]

# Preparing eval data
eval_df = test_data_rel[['text', 'labels']]

# Optional model configuration
model_args = ClassificationArgs()

model_args.train_batch_size = 32
model_args.overwrite_output_dir = True
model_args.num_train_epochs = 10
model_args.max_seq_length = 256
model_args.wandb_project = 'visualization-demo'
model_args.logging_steps = 10

# Create a ClassificationModel
model = ClassificationModel(
    "roberta", "roberta-base", args=model_args
)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classi

In [51]:
# Train the model
model.train_model(train_df)

  0%|          | 0/10270 [00:00<?, ?it/s]

Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

[34m[1mwandb[0m: Currently logged in as: [33mskytianzhu[0m (use `wandb login --relogin` to force relogin)


Running Epoch 0 of 10:   0%|          | 0/321 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/321 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/321 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/321 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/321 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/321 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/321 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/321 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/321 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/321 [00:00<?, ?it/s]

(3210, 0.12527044025632209)

In [52]:
# Evaluate the model
result, model_outputs, wrong_predictions = model.eval_model(eval_df)

  0%|          | 0/1982 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/248 [00:00<?, ?it/s]

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Training loss,██▆▄▆▄▃▄▁▂▁▃▁▂▁▁▁▁▅▃▁▁▁▃▁▁▁▁▁▁▁▁▁▁▁▁▄▁▁▁
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
lr,▂▅███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▁▁▁

0,1
Training loss,0.00034
global_step,3210.0
lr,0.0


In [75]:
print(metrics.classification_report(eval_df['labels'], list(model_outputs[:,0] < model_outputs[:,1])))

              precision    recall  f1-score   support

           0       0.91      0.84      0.88       656
           1       0.93      0.96      0.94      1326

    accuracy                           0.92      1982
   macro avg       0.92      0.90      0.91      1982
weighted avg       0.92      0.92      0.92      1982



# Always predic true baseline

In [73]:
import sklearn
always_true = len(test_data_rel[['text', 'labels']]['labels'].values)
sklearn.metrics.classification_report([1]*always_true, test_data_rel[['text', 'labels']]['labels'].values)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


'              precision    recall  f1-score   support\n\n           0       0.00      0.00      0.00         0\n           1       1.00      0.67      0.80      1982\n\n    accuracy                           0.67      1982\n   macro avg       0.50      0.33      0.40      1982\nweighted avg       1.00      0.67      0.80      1982\n'

In [75]:
print(sklearn.metrics.classification_report([1]*always_true, test_data_rel[['text', 'labels']]['labels'].values))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.67      0.80      1982

    accuracy                           0.67      1982
   macro avg       0.50      0.33      0.40      1982
weighted avg       1.00      0.67      0.80      1982



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
