In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
import nltk
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [3]:
from google.colab import drive
drive.mount("/content/gdrive/")

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


In [4]:
cd gdrive/MyDrive/coursework

/content/gdrive/MyDrive/coursework


In [5]:
from scienceie_loader import load_tokenized_data, load_data_with_char_offsets, get_entity_span_from_B_index

In [6]:
data_root = os.path.join(os.getcwd(), 'original_datasets')
data_train = os.path.join(data_root, 'scienceie2017_train/train2')
data_dev = os.path.join(data_root, 'scienceie2017_dev/dev')
data_test = os.path.join(data_root, 'semeval_articles_test')

In [7]:
train_docs, train_rels, _ = load_tokenized_data(data_train)
test_docs, test_rels, _ = load_tokenized_data(data_test)
#dev_docs, dev_rels, _ = load_tokenized_data(data_dev)

In [8]:
train_tagged_words = [ tup for sent in train_docs for tup in sent ]
test_tagged_words = [ tup for sent in test_docs for tup in sent ]

In [9]:
ne_tags = {tag for word, tag in train_tagged_words} 
print(ne_tags)

{'I-Task', 'B-Task', 'B-Process', 'O', 'B-Material', 'I-Process', 'I-Material'}


In [10]:
def train_HMM_POS_tagger(train_set):
    tagger = nltk.tag.hmm.HiddenMarkovModelTagger.train(train_set)
    return tagger

In [11]:
tagger = train_HMM_POS_tagger(train_docs)

In [12]:
def tag_test_set(test_set, tagger):
    test_sents = [[token for token,tag in sent] for sent in test_set]
    predicted_tags = tagger.tag_sents(test_sents)
    return predicted_tags

In [13]:
predicted_tags = tag_test_set(test_docs,tagger)

In [14]:
test_sents_with_predicted_tags = tag_test_set(test_docs, tagger)

In [15]:
test_sents_with_predicted_tags[ : 2]

[[('Three-dimensional', 'B-Task'),
  ('digital', 'I-Task'),
  ('subtraction', 'I-Task'),
  ('angiographic', 'I-Task'),
  ('(', 'O'),
  ('3D-DSA', 'B-Material'),
  (')', 'I-Material'),
  ('images', 'I-Material'),
  ('from', 'I-Material'),
  ('diagnostic', 'I-Material'),
  ('cerebral', 'I-Material'),
  ('angiography', 'I-Material'),
  ('were', 'O'),
  ('obtained', 'O'),
  ('at', 'O'),
  ('least', 'O'),
  ('one', 'O'),
  ('day', 'O'),
  ('prior', 'O'),
  ('to', 'O'),
  ('embolization', 'O'),
  ('in', 'O'),
  ('all', 'O'),
  ('patients.', 'O'),
  ('The', 'O'),
  ('raw', 'B-Material'),
  ('data', 'I-Material'),
  ('of', 'I-Material'),
  ('3D-DSA', 'I-Material'),
  ('in', 'O'),
  ('a', 'O'),
  ('DICOM', 'B-Material'),
  ('file', 'I-Material'),
  ('were', 'O'),
  ('used', 'O'),
  ('for', 'O'),
  ('creating', 'B-Task'),
  ('a', 'I-Task'),
  ('3D', 'I-Task'),
  ('model', 'I-Task'),
  ('of', 'I-Task'),
  ('the', 'I-Task'),
  ('target', 'I-Task'),
  ('vessel', 'I-Task'),
  ('segment.', 'I-Task'),

In [16]:
#The code is taken from the lab6


def extract_spans(tagged_sents, ne_tags):
    
    spans = {}
    for ne_tag in ne_tags:
        if ne_tag == 'O':
            continue

        spans[ne_tag[2:]] = []  
        
    for sidx, sent in enumerate(tagged_sents):
        start = -1
        entity_type = None
        for i, (tok, lab) in enumerate(sent):
            if 'B-' in lab:
                start = i
                end = i + 1
                entity_type = lab[2:]
            elif 'I-' in lab:
                end = i + 1
            elif lab == 'O' and start >= 0:
                spans[entity_type].append((start, end, sidx))
                start = -1
                
    return spans

In [17]:
def cal_span_level_f1(test_sents, test_sents_with_pred, ne_tags):
   
    gold_spans = extract_spans(test_sents, ne_tags)


    pred_spans = extract_spans(test_sents_with_pred, ne_tags)
    
    f1_per_class = []
    
    ne_types = gold_spans.keys()  
    
    for ne_type in ne_types:
      
        true_pos = 0
        false_pos = 0
        
      
        for span in pred_spans[ne_type]:
            if span in gold_spans[ne_type]:
                true_pos += 1
            else:
                false_pos += 1
                
        false_neg = 0
        for span in gold_spans[ne_type]:
            if span not in pred_spans[ne_type]:
                false_neg += 1
                
   
                
        if true_pos + false_pos == 0:
            precision = 0
        else:
            precision = true_pos / float(true_pos + false_pos)
            
        if true_pos + false_neg == 0:
            recall = 0
        else:
            recall = true_pos / float(true_pos + false_neg)
        
        if precision + recall == 0:
            f1 = 0
        else:
            f1 = 2 * precision * recall / (precision + recall)
            
        f1_per_class.append(f1)
        print(f'F1-score for class {ne_type} = {f1}')
        
    print(f'Macro-average F1-score = {np.mean(f1_per_class)}')


In [18]:
cal_span_level_f1(test_docs, test_sents_with_predicted_tags, ne_tags)

F1-score for class Task = 0.06597222222222224
F1-score for class Process = 0.18242343541944075
F1-score for class Material = 0.21467391304347827
Macro-average F1-score = 0.15435652356171375
