In [1]:
import pandas as pd
import pickle as pkl
import re
from collections import Counter
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords 

import stanza
# stanza.download('en')
nlp = stanza.Pipeline(lang='en', processors='tokenize,mwt,pos,lemma')

def count_freq(preds, gts):
    preds_len = [len(x.split(' ')) for x in preds]
    gts_len = [len(x.split(' ')) for x in gts]
    print(Counter(preds_len))
    print(Counter(gts_len))

def evaluation_metrics(pred, gt):
    TP = len(set(pred) & set(gt)) 
    FP = len(set(pred)-set(gt))
    FN = len(set(gt)-set(pred))
    precision = round((TP/(TP+FP))*100, 2)
    recall = round((TP/(TP+FN))*100,2)
    f1_score = round((2 * precision * recall) / (precision + recall),2)
    return precision, recall, f1_score 

def lemma(li):
    new_list = []
    for t in li:
        doc = nlp(str(t))
        doc1 = ' '.join([word.lemma for sent in doc.sentences for word in sent.words])
        doc1 = re.sub('-',' ',doc1)
        doc1 = re.sub(' +', ' ',doc1)
        new_list.append(doc1)
    new_list = [s for s in new_list if len(s) >= 2]
    return new_list

def get_term_(predictions):
    all_term = []
    for sentence in predictions:
        tokens = []
        labels = []
        for d in sentence:
            tokens.extend(d.keys())
            labels.extend(d.values())

        for i, label in enumerate(labels):
            if labels[i] == 'I' and (i == 0 or labels[i - 1] == 'O'):
                labels[i] = 'O'

        terms = []
        term = []
        for token, label in zip(tokens, labels):
            if label == 'B':
                #Lưu vị trí B
                b_pos = i
                term = [token]
            elif label == 'I':
                term.append(token)
            elif len(term) > 0:
                terms.append(' '.join(term))
                term = []
        if len(term) > 0:
            terms.append(' '.join(term))
            # Check b_pos = 0 không
        all_term.append(terms)
    
    final_terms = []
    for i in all_term:
        final_terms.extend(i)

    final_terms = [x.lower().strip() for x in final_terms]
    return final_terms  

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/hanhtran/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
2021-06-09 16:14:55 INFO: Loading these models for language: en (English):
| Processor | Package  |
------------------------
| tokenize  | combined |
| pos       | combined |
| lemma     | combined |

2021-06-09 16:14:55 INFO: Use device: cpu
2021-06-09 16:14:55 INFO: Loading: tokenize
2021-06-09 16:14:55 INFO: Loading: pos
2021-06-09 16:14:56 INFO: Loading: lemma
2021-06-09 16:14:56 INFO: Done loading processors!


1. ADJ NOUN - 1

In [2]:
def get_term_adj_noun(predictions):
    all_term = []
    for sentence in predictions:
        tokens = []
        labels = []
        for d in sentence:
            tokens.extend(d.keys())
            labels.extend(d.values())

        for i, label in enumerate(labels):
            if labels[i] == 'I' and (i == 0 or labels[i - 1] == 'O'):
                labels[i] = 'O'
        terms = []
        term = []
        for i, (token, label) in enumerate(zip(tokens, labels)):
            if label == 'B': 
                #Lưu vị trí B
                b_pos = i
                term = [token]
            elif label == 'I':
                term.append(token)
            elif len(term) > 0:
                terms.append(' '.join(term))
                if (b_pos != 0) and (tokens[b_pos - 1] != '') and (tokens[b_pos - 1] != ' ') and (len(nlp(str(tokens[b_pos - 1])).sentences) > 0) and (len(nlp(str(tokens[b_pos])).sentences) > 0):
                    b_word = nlp(str(tokens[b_pos - 1])).sentences[0].words[0]
                    c_word = nlp(str(tokens[b_pos])).sentences[0].words[0]
                    if (c_word.upos == 'NOUN') and (c_word.text != 'None') and (b_word.text != 'None') and (b_word.upos == 'ADJ'):
                        terms.append(' '.join([b_word.text] + term))
                term = []
        if len(term) > 0:
            terms.append(' '.join(term))
        all_term.append(terms)
    
    final_terms = []
    for i in all_term:
        final_terms.extend(i)

    final_terms = [x.lower().strip() for x in final_terms]
    return final_terms    

2. NOUN NOUN - 0

In [3]:
def get_term_noun_noun(predictions):
    all_term = []
    for sentence in predictions:
        tokens = []
        labels = []
        for d in sentence:
            tokens.extend(d.keys())
            labels.extend(d.values())

        for i, label in enumerate(labels):
            if labels[i] == 'I' and (i == 0 or labels[i - 1] == 'O'):
                labels[i] = 'O'

        terms = []
        term = []
        for i, (token, label) in enumerate(zip(tokens, labels)):
            if label == 'B': 
                #Lưu vị trí B
                b_pos = i
                term = [token]
            elif label == 'I':
                term.append(token)
            elif len(term) > 0:
                terms.append(' '.join(term))
                if b_pos != 0:
                    if (tokens[b_pos - 1] != '') and (tokens[b_pos - 1] != ' ') and (len(nlp(str(tokens[b_pos - 1])).sentences) > 0) and (len(nlp(str(tokens[b_pos])).sentences) > 0):
                        b_word = nlp(str(tokens[b_pos - 1])).sentences[0].words[0]
                        c_word = nlp(str(tokens[b_pos])).sentences[0].words[0]
                        if (c_word.upos == 'NOUN') and (c_word.text != 'None') and (b_word.text != 'None') and (b_word.upos == 'NOUN'):
                            terms.append(' '.join([b_word.text] + term))
                    if (tokens[i] != '') and (tokens[i] != ' ') and (len(nlp(str(tokens[i])).sentences) > 0):
                        a_word = nlp(str(tokens[i])).sentences[0].words[0]
                        if (a_word.text != 'None') and (a_word.upos == 'NOUN'):
                            terms.append(' '.join(term + [a_word.text]))
                term = []
        if len(term) > 0:
            terms.append(' '.join(term))
        all_term.append(terms)
    
    final_terms = []
    for i in all_term:
        final_terms.extend(i)

    final_terms = [x.lower().strip() for x in final_terms]
    return final_terms    

3. ADJ NOUN NOUN - 1

In [4]:
def get_term_adj_noun_noun(predictions):
    all_term = []
    for sentence in predictions:
        tokens = []
        labels = []
        for d in sentence:
            tokens.extend(d.keys())
            labels.extend(d.values())

        for i, label in enumerate(labels):
            if labels[i] == 'I' and (i == 0 or labels[i - 1] == 'O'):
                labels[i] = 'O'

        terms = []
        term = []
        for i, (token, label) in enumerate(zip(tokens, labels)):
            if label == 'B': 
                #Lưu vị trí B
                b_pos = i
                term = [token]
            elif label == 'I':
                term.append(token)
            elif len(term) > 0:
                terms.append(' '.join(term))
                # Check b_pos = 0 không
                if b_pos != 0:
                    #ADJ NOUN NOUN
                    if (tokens[i] != '') and (tokens[i] != ' ') and (tokens[b_pos - 1] != '') and (tokens[b_pos - 1] != ' ') and (len(nlp(str(tokens[b_pos - 1])).sentences) > 0) and (len(nlp(str(tokens[i])).sentences) > 0) and (len(nlp(str(tokens[b_pos])).sentences) > 0):
                        a_word = nlp(str(tokens[i])).sentences[0].words[0]
                        b_word = nlp(str(tokens[b_pos - 1])).sentences[0].words[0]
                        c_word = nlp(str(tokens[b_pos])).sentences[0].words[0]
                        # Check vị trí b_pos - 1: terms.append()
                        if (c_word.text != 'None') and (c_word.upos == 'NOUN') and (b_word.text != 'None') and ((b_word.upos == 'ADJ') and (a_word.upos == 'NOUN')):
                            terms.append(' '.join([b_word.text] + term + [a_word.text]))
                term = []
        if len(term) > 0:
            terms.append(' '.join(term))
            # check b_pos - 1
        all_term.append(terms)
    
    final_terms = []
    for i in all_term:
        final_terms.extend(i)

    final_terms = [x.lower().strip() for x in final_terms]
    return final_terms    

4. ADJ ADJ NOUN - 2

In [5]:
def get_term_adj_adj_noun(predictions):
    all_term = []
    for sentence in predictions:
        tokens = []
        labels = []
        for d in sentence:
            tokens.extend(d.keys())
            labels.extend(d.values())

        for i, label in enumerate(labels):
            if labels[i] == 'I' and (i == 0 or labels[i - 1] == 'O'):
                labels[i] = 'O'

        terms = []
        term = []
        for i, (token, label) in enumerate(zip(tokens, labels)):
            if label == 'B': 
                b_pos = i
                term = [token]
            elif label == 'I':
                term.append(token)
            elif len(term) > 0:
                terms.append(' '.join(term))
                if b_pos != 0:
                    #ADJ ADJ NOUN              
                    if (tokens[b_pos - 2] != '') and (tokens[b_pos - 2] != ' ') and (tokens[b_pos - 1] != '') and (tokens[b_pos - 1] != ' ') and (len(nlp(str(tokens[b_pos - 2])).sentences) > 0 and len(nlp(str(tokens[b_pos - 1])).sentences) > 0 and len(nlp(str(tokens[b_pos])).sentences) > 0):
                        b1_word = nlp(str(tokens[b_pos - 2])).sentences[0].words[0] 
                        b_word = nlp(str(tokens[b_pos - 1])).sentences[0].words[0]
                        c_word = nlp(str(tokens[b_pos])).sentences[0].words[0]
                        if (c_word.upos == 'NOUN') and (c_word.text != 'None') and (b_word.text != 'None') and (b1_word.text != 'None') and ((b_word.upos == 'ADJ') and (b1_word.upos == 'ADJ')):
                            terms.append(' '.join([b1_word.text] +[b_word.text] + term))
                term = []
        if len(term) > 0:
            terms.append(' '.join(term))
        all_term.append(terms)
    
    final_terms = []
    for i in all_term:
        final_terms.extend(i)

    final_terms = [x.lower().strip() for x in final_terms]
    return final_terms    

5. NOUN ADJ NOUN - 2

In [6]:
def get_term_noun_adj_noun(predictions):
    all_term = []
    for sentence in predictions:
        tokens = []
        labels = []
        for d in sentence:
            tokens.extend(d.keys())
            labels.extend(d.values())

        for i, label in enumerate(labels):
            if labels[i] == 'I' and (i == 0 or labels[i - 1] == 'O'):
                labels[i] = 'O'

        terms = []
        term = []
        for i, (token, label) in enumerate(zip(tokens, labels)):
            if label == 'B': 
                b_pos = i
                term = [token]
            elif label == 'I':
                term.append(token)
            elif len(term) > 0:
                terms.append(' '.join(term))
                if b_pos != 0:
                    #NOUN ADJ NOUN
                    if (tokens[b_pos - 2] != '') and (tokens[b_pos - 2] != ' ') and (tokens[b_pos - 1] != '') and (tokens[b_pos - 1] != ' ') and (len(nlp(str(tokens[b_pos - 2])).sentences) > 0 and len(nlp(str(tokens[b_pos - 1])).sentences) > 0 and len(nlp(str(tokens[b_pos])).sentences) > 0):
                        b1_word = nlp(str(tokens[b_pos - 2])).sentences[0].words[0] 
                        b_word = nlp(str(tokens[b_pos - 1])).sentences[0].words[0]
                        c_word = nlp(str(tokens[b_pos])).sentences[0].words[0]
                        if (c_word.upos == 'NOUN') and (c_word.text != 'None') and (b_word.text != 'None') and (b1_word.text != 'None') and ((b_word.upos == 'ADJ') and (b1_word.upos == 'NOUN')):
                            terms.append(' '.join([b1_word.text] +[b_word.text] + term))
                term = []
        if len(term) > 0:
            terms.append(' '.join(term))
        all_term.append(terms)
    
    final_terms = []
    for i in all_term:
        final_terms.extend(i)

    final_terms = [x.lower().strip() for x in final_terms]
    return final_terms    

6. NOUN NOUN NOUN - 0

In [7]:
def get_term_noun_noun_noun(predictions):
    all_term = []
    for sentence in predictions:
        tokens = []
        labels = []
        for d in sentence:
            tokens.extend(d.keys())
            labels.extend(d.values())

        for i, label in enumerate(labels):
            if labels[i] == 'I' and (i == 0 or labels[i - 1] == 'O'):
                labels[i] = 'O'

        terms = []
        term = []
        for i, (token, label) in enumerate(zip(tokens, labels)):
            if label == 'B': 
                b_pos = i
                term = [token]
            elif label == 'I':
                term.append(token)
            elif len(term) > 0:
                terms.append(' '.join(term))
                # NOUN NOUN NOUN
                if b_pos != 0 and i + 1 < len(tokens):
                    if (tokens[i] != '') and (tokens[i] != ' ') and (tokens[i+1] != '') and (tokens[i+1] != ' ') and (len(nlp(str(tokens[i])).sentences) > 0 and len(nlp(str(tokens[i+1])).sentences) > 0 and len(nlp(str(tokens[b_pos])).sentences) > 0):
                        a1_word = nlp(str(tokens[i+1])).sentences[0].words[0] 
                        a_word = nlp(str(tokens[i])).sentences[0].words[0]
                        c_word = nlp(str(tokens[b_pos])).sentences[0].words[0]
                        if (c_word.text != 'None') and (c_word.upos == 'NOUN') and (a_word.text != 'None') and (a1_word.text != 'None') and ((a_word.upos == 'NOUN') and (a1_word.upos == 'NOUN')):
                            terms.append(' '.join( term + [a_word.text] +[a1_word.text]))
                term = []
        if len(term) > 0:
            terms.append(' '.join(term))
        all_term.append(terms)
    
    final_terms = []
    for i in all_term:
        final_terms.extend(i)

    final_terms = [x.lower().strip() for x in final_terms]
    return final_terms    

7. NOUN ADP NOUN - 0

In [8]:
def get_term_noun_adp_noun(predictions):
    all_term = []
    for sentence in predictions:
        tokens = []
        labels = []
        for d in sentence:
            tokens.extend(d.keys())
            labels.extend(d.values())

        for i, label in enumerate(labels):
            if labels[i] == 'I' and (i == 0 or labels[i - 1] == 'O'):
                labels[i] = 'O'

        terms = []
        term = []
        for i, (token, label) in enumerate(zip(tokens, labels)):
            if label == 'B': 
                b_pos = i
                term = [token]
            elif label == 'I':
                term.append(token)
            elif len(term) > 0:
                terms.append(' '.join(term))
                if b_pos != 0 and i+1 < len(tokens):
                    if (tokens[i] != '') and (tokens[i] != ' ') and (tokens[i+1] != '') and (tokens[i+1] != ' ') and (len(nlp(str(tokens[i])).sentences) > 0 and len(nlp(str(tokens[i+1])).sentences) > 0 and len(nlp(str(tokens[b_pos])).sentences) > 0):
                        a1_word = nlp(str(tokens[i+1])).sentences[0].words[0] 
                        a_word = nlp(str(tokens[i])).sentences[0].words[0]
                        c_word = nlp(str(tokens[b_pos])).sentences[0].words[0]
                        if (c_word.upos == 'NOUN')and (c_word.text != 'None') and (a_word.text != 'None') and (a1_word.text != 'None') and ((a_word.upos == 'ADP') and (a1_word.upos == 'NOUN')):
                            terms.append(' '.join(term + [a_word.text] + [a1_word.text]))                               
                term = []
        if len(term) > 0:
            terms.append(' '.join(term))
        
        all_term.append(terms)
    
    final_terms = []
    for i in all_term:
        final_terms.extend(i)

    final_terms = [x.lower() for x in final_terms]
    return final_terms

8. NOUN ADP ADJ NOUN - 0

In [9]:
def get_term_noun_adp_adj_noun(predictions):
    all_term = []
    for sentence in predictions:
        tokens = []
        labels = []
        for d in sentence:
            tokens.extend(d.keys())
            labels.extend(d.values())
        for i, label in enumerate(labels):
            if labels[i] == 'I' and (i == 0 or labels[i - 1] == 'O'):
                labels[i] = 'O'
        terms = []
        term = []
        for i, (token, label) in enumerate(zip(tokens, labels)):
            if label == 'B': 
                #Lưu vị trí B
                b_pos = i
                term = [token]
            elif label == 'I':
                term.append(token)
            elif len(term) > 0:
                terms.append(' '.join(term))
                if b_pos != 0 and i + 2 < len(tokens):
                    if (tokens[i] != '') and (tokens[i] != ' ') and (tokens[i+1] != '') and (tokens[i+1] != ' ') and (tokens[i+2] != '') and (tokens[i+2] != ' ') and (tokens[b_pos] != '') and (tokens[b_pos] != ' ') and (len(nlp(str(tokens[i])).sentences) > 0 and len(nlp(str(tokens[i+1])).sentences) > 0 and len(nlp(str(tokens[i+2])).sentences) > 0 and len(nlp(str(tokens[b_pos])).sentences) > 0):
                        a2_word = nlp(str(tokens[i+2])).sentences[0].words[0] 
                        a1_word = nlp(str(tokens[i+1])).sentences[0].words[0] 
                        a_word = nlp(str(tokens[i])).sentences[0].words[0]
                        c_word = nlp(str(tokens[b_pos])).sentences[0].words[0]
                        if (c_word.upos == 'NOUN') and (c_word.text != 'None') and (a_word.text != 'None') and (a1_word.text != 'None') and (a2_word.text != 'None') and (a_word.upos == 'ADP') and (a1_word.upos == 'ADJ') and (a2_word.upos == 'NOUN'):
                            terms.append(' '.join(term + [a_word.text] + [a1_word.text] +[a2_word.text]))   
                term = []
        if len(term) > 0:
            terms.append(' '.join(term))
        all_term.append(terms)
    
    final_terms = []
    for i in all_term:
        final_terms.extend(i)

    final_terms = [x.lower() for x in final_terms]
    return final_terms    

9. ADJ NOUN ADP NOUN - 0

In [10]:
def get_term_adj_noun_adp_noun(predictions):
    all_term = []
    for sentence in predictions:
        tokens = []
        labels = []
        for d in sentence:
            tokens.extend(d.keys())
            labels.extend(d.values())

        for i, label in enumerate(labels):
            if labels[i] == 'I' and (i == 0 or labels[i - 1] == 'O'):
                labels[i] = 'O'

        terms = []
        term = []
        for i, (token, label) in enumerate(zip(tokens, labels)):
            if label == 'B': 
                b_pos = i
                term = [token]
            elif label == 'I':
                term.append(token)
            elif len(term) > 0:
                terms.append(' '.join(term))
                if b_pos != 0 and i + 1 < len(tokens):
                    if (tokens[i+1] != '') and (tokens[i+1] != ' ') and (tokens[i] != '') and (tokens[i] != ' ') and (tokens[b_pos - 1] != '') and (tokens[b_pos - 1] != ' ') and len(nlp(str(tokens[b_pos - 1])).sentences) > 0 and (len(nlp(str(tokens[i])).sentences) > 0) and (len(nlp(str(tokens[i+1])).sentences) > 0) and (len(nlp(str(tokens[b_pos])).sentences) > 0):
                        a1_word = nlp(str(tokens[i+1])).sentences[0].words[0]
                        a_word = nlp(str(tokens[i])).sentences[0].words[0]
                        b_word = nlp(str(tokens[b_pos - 1])).sentences[0].words[0]
                        c_word = nlp(str(tokens[b_pos])).sentences[0].words[0]
                        if (c_word.text != 'None') and (c_word.upos == 'NOUN') and (b_word.text != 'None') and (a_word.text != 'None') and (a1_word.text != 'None') and (b_word.upos == 'ADJ') and (a_word.upos == 'ADP') and (a1_word.upos == 'NOUN'):
                            terms.append(' '.join([b_word.text] + term + [a_word.text] + [a1_word.text]))        
                term = []
        if len(term) > 0:
            terms.append(' '.join(term))
        all_term.append(terms)
    
    final_terms = []
    for i in all_term:
        final_terms.extend(i)

    final_terms = [x.lower() for x in final_terms]
    return final_terms  

10. NOUN ADP NOUN NOUN - 0

In [11]:
def get_term_noun_adp_noun_noun(predictions):
    all_term = []
    for sentence in predictions:
        tokens = []
        labels = []
        for d in sentence:
            tokens.extend(d.keys())
            labels.extend(d.values())

        for i, label in enumerate(labels):
            if labels[i] == 'I' and (i == 0 or labels[i - 1] == 'O'):
                labels[i] = 'O'

        terms = []
        term = []
        for i, (token, label) in enumerate(zip(tokens, labels)):
            if label == 'B': 
                b_pos = i
                term = [token]
            elif label == 'I':
                term.append(token)
            elif len(term) > 0:
                terms.append(' '.join(term))
                if b_pos != 0 and i + 2 < len(tokens):
                    if (tokens[i+2] != '') and (tokens[i+2] != ' ') and (tokens[i+1] != '') and (tokens[i+1] != ' ') and (tokens[i] != '') and (tokens[i] != ' ') and (len(nlp(str(tokens[i])).sentences) > 0) and (len(nlp(str(tokens[i+1])).sentences) > 0) and (len(nlp(str(tokens[i+2])).sentences) > 0) and (len(nlp(str(tokens[b_pos])).sentences) > 0):
                            a2_word = nlp(str(tokens[i+2])).sentences[0].words[0]
                            a1_word = nlp(str(tokens[i+1])).sentences[0].words[0]
                            a_word = nlp(str(tokens[i])).sentences[0].words[0]
                            c_word = nlp(str(tokens[b_pos])).sentences[0].words[0]
                            if (c_word.text != 'None') and (c_word.upos == 'NOUN') and (a_word.text != 'None')and (a1_word.text != 'None') and (a2_word.text != 'None') and (a_word.upos == 'ADP') and (a1_word.upos == 'NOUN') and (a2_word.upos == 'NOUN'):
                                terms.append(' '.join(term + [a_word.text] + [a1_word.text] + [a2_word.text]))
                term = []
        if len(term) > 0:
            terms.append(' '.join(term))
            # check b_pos - 1
        all_term.append(terms)
    
    final_terms = []
    for i in all_term:
        final_terms.extend(i)

    final_terms = [x.lower() for x in final_terms]
    return final_terms  

11. ADJ NOUN ADP ADJ NOUN - 1

In [12]:
def get_term_adj_noun_adp_adj_noun(predictions):
    all_term = []
    for sentence in predictions:
        tokens = []
        labels = []
        for d in sentence:
            tokens.extend(d.keys())
            labels.extend(d.values())

        for i, label in enumerate(labels):
            if labels[i] == 'I' and (i == 0 or labels[i - 1] == 'O'):
                labels[i] = 'O'

        terms = []
        term = []
        for i, (token, label) in enumerate(zip(tokens, labels)):
            if label == 'B': 
                #Lưu vị trí B
                b_pos = i
                term = [token]
            elif label == 'I':
                term.append(token)
            elif len(term) > 0:
                terms.append(' '.join(term))
                if (b_pos != 0) and (i + 2 < len(tokens)) and (tokens[i+2] != '') and (tokens[i+2] != ' ') and (tokens[i+1] != '') and (tokens[i+1] != ' ') and (tokens[i] != '') and (tokens[i] != ' ')and (tokens[b_pos] != '') and (tokens[b_pos] != ' ') and (tokens[b_pos - 1] != '') and (tokens[b_pos - 1] != ' ') and (len(nlp(str(tokens[b_pos - 1])).sentences) > 0) and (len(nlp(str(tokens[i])).sentences) > 0) and (len(nlp(str(tokens[i+1])).sentences) > 0) and (len(nlp(str(tokens[i+2])).sentences) > 0) and (len(nlp(str(tokens[b_pos])).sentences) > 0):
                    a2_word = nlp(str(tokens[i+2])).sentences[0].words[0]
                    a1_word = nlp(str(tokens[i+1])).sentences[0].words[0]
                    a_word = nlp(str(tokens[i])).sentences[0].words[0]
                    b_word = nlp(str(tokens[b_pos - 1])).sentences[0].words[0]
                    c_word = nlp(str(tokens[b_pos])).sentences[0].words[0]
                    if (c_word.text != 'None') and (c_word.upos == 'NOUN') and (b_word.text != 'None') and (a_word.text != 'None') and (a1_word.text != 'None') and (a2_word.text != 'None') and (b_word.upos == 'ADJ') and (a_word.upos == 'ADP') and (a1_word.upos == 'ADJ')and (a2_word.upos == 'NOUN'):
                        terms.append(' '.join([b_word.text] + term + [a_word.text] + [a1_word.text]  + [a2_word.text]))
                term = []
        if len(term) > 0:
            terms.append(' '.join(term))
        all_term.append(terms)
    
    final_terms = []
    for i in all_term:
        final_terms.extend(i)

    final_terms = [x.lower() for x in final_terms]
    return final_terms  

12. NOUN PRON - 0

In [13]:
def get_term_noun_pron(predictions):
    all_term = []
    for sentence in predictions:
        tokens = []
        labels = []
        for d in sentence:
            tokens.extend(d.keys())
            labels.extend(d.values())

        for i, label in enumerate(labels):
            if labels[i] == 'I' and (i == 0 or labels[i - 1] == 'O'):
                labels[i] = 'O'

        terms = []
        term = []
        for i, (token, label) in enumerate(zip(tokens, labels)):
            if label == 'B': 
                #Lưu vị trí B
                b_pos = i
                term = [token]
            elif label == 'I':
                term.append(token)
            elif len(term) > 0:
                terms.append(' '.join(term))
                if (b_pos != 0) and (tokens[i] != '') and (tokens[i] != ' ') and (tokens[b_pos] != '') and (tokens[b_pos] != ' ') and (len(nlp(str(tokens[i])).sentences) > 0) and (len(nlp(str(tokens[b_pos])).sentences) > 0):
                        a_word = nlp(str(tokens[i])).sentences[0].words[0]
                        c_word = nlp(str(tokens[b_pos])).sentences[0].words[0]
                        # Check vị trí b_pos - 1: terms.append()
                        if (c_word.text != 'None') and (c_word.upos == 'NOUN')  and (a_word.text != 'None') and (a_word.upos == 'PROPN'):
                            terms.append(' '.join(term + [a_word.text]))
                term = []
        if len(term) > 0:
            terms.append(' '.join(term))
            # check b_pos - 1
        all_term.append(terms)
    
    final_terms = []
    for i in all_term:
        final_terms.extend(i)

    final_terms = [x.lower() for x in final_terms]
    return final_terms  

13. ADV ADJ NOUN - 2

In [14]:
def get_term_adv_adj_noun(predictions):
    all_term = []
    for sentence in predictions:
        tokens = []
        labels = []
        for d in sentence:
            tokens.extend(d.keys())
            labels.extend(d.values())

        for i, label in enumerate(labels):
            if labels[i] == 'I' and (i == 0 or labels[i - 1] == 'O'):
                labels[i] = 'O'
        terms = []
        term = []
        for i, (token, label) in enumerate(zip(tokens, labels)):
            if label == 'B': 
                #Lưu vị trí B
                b_pos = i
                term = [token]
            elif label == 'I':
                term.append(token)
            elif len(term) > 0:
                terms.append(' '.join(term))
                if (b_pos != 0) and (tokens[b_pos - 2] != '') and (tokens[b_pos - 2] != ' ') and (tokens[b_pos - 1] != '') and (tokens[b_pos - 1] != ' ') and (len(nlp(str(tokens[b_pos - 2])).sentences) > 0 and len(nlp(str(tokens[b_pos - 1])).sentences) > 0 and len(nlp(str(tokens[b_pos])).sentences) > 0):
                        b1_word = nlp(str(tokens[b_pos - 2])).sentences[0].words[0] 
                        b_word = nlp(str(tokens[b_pos - 1])).sentences[0].words[0]
                        c_word = nlp(str(tokens[b_pos])).sentences[0].words[0]
                        if (c_word.text != 'None') and (c_word.upos == 'NOUN') and (b_word.text != 'None') and (b1_word.text != 'None') and ((b_word.upos == 'ADJ') and (b1_word.upos == 'ADV')):
                            terms.append(' '.join([b1_word.text] +[b_word.text] + term))                   
                term = []
        if len(term) > 0:
            terms.append(' '.join(term))
        
        all_term.append(terms)
    
    final_terms = []
    for i in all_term:
        final_terms.extend(i)

    final_terms = [x.lower() for x in final_terms]
    return final_terms    

14. ADJ NOUN ADJ NOUN - 1

In [15]:
def get_term_adj_noun_adj_noun(predictions):
    all_term = []
    for sentence in predictions:
        tokens = []
        labels = []
        for d in sentence:
            tokens.extend(d.keys())
            labels.extend(d.values())

        for i, label in enumerate(labels):
            if labels[i] == 'I' and (i == 0 or labels[i - 1] == 'O'):
                labels[i] = 'O'

        terms = []
        term = []
        for i, (token, label) in enumerate(zip(tokens, labels)):
            if label == 'B': 
                b_pos = i
                term = [token]
            elif label == 'I':
                term.append(token)
            elif len(term) > 0:
                terms.append(' '.join(term))
                if (b_pos != 0) and (i + 1 < len(tokens)) and (tokens[b_pos - 1] != '') and (tokens[b_pos - 1] != ' ') and (tokens[b_pos] != '') and (tokens[b_pos] != ' ') and (tokens[i] != '') and (tokens[i] != ' ') and (tokens[i+1] != '') and (tokens[i+1] != ' ') and (len(nlp(str(tokens[b_pos - 1])).sentences) > 0) and (len(nlp(str(tokens[b_pos])).sentences) > 0) and (len(nlp(str(tokens[i])).sentences) > 0) and  (len(nlp(str(tokens[i+1])).sentences) > 0): 
                    a1_word = nlp(str(tokens[i+1])).sentences[0].words[0]
                    a_word = nlp(str(tokens[i])).sentences[0].words[0]
                    b_word = nlp(str(tokens[b_pos - 1])).sentences[0].words[0]
                    c_word = nlp(str(tokens[b_pos])).sentences[0].words[0]
                    # Check vị trí b_pos - 1: terms.append()
                    if (c_word.text != 'None') and (b_word.text != 'None') and (a_word.text != 'None') and (a1_word.text != 'None') and (c_word.upos == 'NOUN') and ((b_word.upos == 'ADJ') and (a_word.upos == 'ADJ') and (a1_word.upos == 'NOUN')):
                        terms.append(' '.join([b_word.text] + term + [a_word.text] + [a1_word.text]))
                                
                term = []
        if len(term) > 0:
            terms.append(' '.join(term))
            # check b_pos - 1
        
        all_term.append(terms)
    
    final_terms = []
    for i in all_term:
        final_terms.extend(i)

    final_terms = [x.lower() for x in final_terms]
    return final_terms

In [16]:
def term_evaluation(domain_path, preds_path, rule=None):
    groundtruth = pd.read_csv(domain_path, sep='	', engine='python',header=None)
    gt = list(groundtruth[0])
    predictions = pkl.load(open(preds_path, 'rb'))
    if rule == 'adj_noun':
        preds =  get_term_adj_noun(predictions)
    elif rule == 'noun_noun':
        preds =  get_term_noun_noun(predictions)
    elif rule == 'adj_adj_noun':
        preds = get_term_adj_adj_noun(predictions)
    elif rule == 'adj_noun_noun':
        preds = get_term_adj_noun_noun(predictions)
    elif rule == 'noun_adj_noun':
        preds = get_term_noun_adj_noun(predictions)
    elif rule == 'noun_noun_noun':
        preds = get_term_noun_noun_noun(predictions)
    elif rule == 'noun_adp_noun':
        preds = get_term_noun_adp_noun(predictions)
    elif rule == 'noun_adp_adj_noun':
        preds = get_term_noun_adp_adj_noun(predictions)
    elif rule == 'adj_noun_adp_noun':
        preds = get_term_adj_noun_adp_noun(predictions)
    elif rule == 'noun_adp_noun_noun':
        preds = get_term_noun_adp_noun_noun(predictions)
    elif rule == 'adj_noun_adp_adj_noun':
        preds = get_term_adj_noun_adp_adj_noun(predictions)
    elif rule == 'adv_adj_noun':
        preds = get_term_adv_adj_noun(predictions)
    elif rule == 'noun_pron':
        preds = get_term_noun_pron(predictions)
    elif rule == 'adj_noun_adj_noun':
        preds = get_term_adj_noun_adj_noun(predictions)
    else:
        preds =  get_term_(predictions)
    stop_words = set(stopwords.words('english'))
    pred_terms =  set(preds) - set(stop_words)
    pred_terms = [x for x in pred_terms if len(x)>1]
    pred_terms = [x.lower().strip() for x in pred_terms]
    pred_terms = [re.sub(' -','-', x) for x in pred_terms]
    pred_terms = [re.sub('- ','-', x) for x in pred_terms]
    pred_terms = [re.sub('\(','', x) for x in pred_terms]
    pred_terms = [re.sub('\/','', x) for x in pred_terms]
    precision, recall, f1 = evaluation_metrics(pred_terms, gt)
    return precision, recall, f1
    

In [17]:
path = '/Users/hanhtran/Documents/terminology-extraction/ACTER/'
domain_paths = [path+'en/htfl/annotations/htfl_en_terms.ann', path+'en/wind/annotations/wind_en_terms.ann',
                path+'en/equi/annotations/equi_en_terms.ann',path+'en/corp/annotations/corp_en_terms.ann',
                path+'en/htfl/annotations/htfl_en_terms_nes.ann', path+'en/wind/annotations/wind_en_terms_nes.ann',
                path+'en/equi/annotations/equi_en_terms_nes.ann',path+'en/corp/annotations/corp_en_terms_nes.ann',
               ]
preds_paths = ['ann_xlnet_htfl.pkl','ann_xlnet_wind.pkl','ann_xlnet_equi.pkl', 'ann_xlnet_corp.pkl',
              'nes_xlnet_htfl.pkl','nes_xlnet_wind.pkl','nes_xlnet_equi.pkl', 'nes_xlnet_corp.pkl']
results = []
for d, p in zip(domain_paths, preds_paths):
    pre, rec, f1 = term_evaluation(d,p)
    results.append([p,pre, rec, f1])
raw_res = pd.DataFrame(results,columns=['DOMAIN','NON_PATTERN_P','NON_PATTERN_R','NON_PATTERN_F1'])
raw_res

Unnamed: 0,DOMAIN,NON_PATTERN_P,NON_PATTERN_R,NON_PATTERN_F1
0,ann_xlnet_htfl.pkl,42.94,30.03,35.34
1,ann_xlnet_wind.pkl,37.76,52.61,43.96
2,ann_xlnet_equi.pkl,42.22,61.99,50.23
3,ann_xlnet_corp.pkl,42.54,59.33,49.55
4,nes_xlnet_htfl.pkl,43.63,36.83,39.94
5,nes_xlnet_wind.pkl,42.47,49.61,45.76
6,nes_xlnet_equi.pkl,46.77,55.17,50.62
7,nes_xlnet_corp.pkl,45.05,55.41,49.7


In [18]:
results = []
for d, p in zip(domain_paths, preds_paths):
    pre, rec, f1 = term_evaluation(d,p, 'adj_noun')
    results.append([p,pre, rec, f1])
adj_noun = pd.DataFrame(results,columns=['DOMAIN','ADJ_NOUN_P','ADJ_NOUN_R','ADJ_NOUN_F1'])
adj_noun

Unnamed: 0,DOMAIN,ADJ_NOUN_P,ADJ_NOUN_R,ADJ_NOUN_F1
0,ann_xlnet_htfl.pkl,40.83,34.22,37.23
1,ann_xlnet_wind.pkl,34.48,57.75,43.18
2,ann_xlnet_equi.pkl,34.81,64.59,45.24
3,ann_xlnet_corp.pkl,38.52,64.62,48.27
4,nes_xlnet_htfl.pkl,41.47,40.54,41.0
5,nes_xlnet_wind.pkl,39.42,52.22,44.93
6,nes_xlnet_equi.pkl,39.33,56.89,46.51
7,nes_xlnet_corp.pkl,41.79,59.42,49.07


In [19]:
results = []
for d, p in zip(domain_paths, preds_paths):
    pre, rec, f1  = term_evaluation(d,p, 'noun_noun')
    results.append([p,pre, rec, f1])
noun_noun = pd.DataFrame(results,columns=['DOMAIN','NOUN_NOUN_P','NOUN_NOUN_R','NOUN_NOUN_F1'])
noun_noun

Unnamed: 0,DOMAIN,NOUN_NOUN_P,NOUN_NOUN_R,NOUN_NOUN_F1
0,ann_xlnet_htfl.pkl,32.89,36.47,34.59
1,ann_xlnet_wind.pkl,25.93,60.86,36.37
2,ann_xlnet_equi.pkl,33.53,67.45,44.79
3,ann_xlnet_corp.pkl,32.93,64.4,43.58
4,nes_xlnet_htfl.pkl,35.28,41.43,38.11
5,nes_xlnet_wind.pkl,31.19,55.02,39.81
6,nes_xlnet_equi.pkl,38.06,59.68,46.48
7,nes_xlnet_corp.pkl,35.85,59.59,44.77


In [22]:
results = []
for d, p in zip(domain_paths, preds_paths):
    pre, rec, f1 = term_evaluation(d,p, 'adj_noun_noun')
    results.append([p,pre, rec, f1])
adj_noun_noun = pd.DataFrame(results,columns=['DOMAIN','ADJ_NOUN_NOUN_P','ADJ_NOUN_NOUN_R','ADJ_NOUN_NOUN_F1'])
adj_noun_noun

Unnamed: 0,DOMAIN,ADJ_NOUN_NOUN_P,ADJ_NOUN_NOUN_R,ADJ_NOUN_NOUN_F1
0,ann_xlnet_htfl.pkl,41.79,30.41,35.2
1,ann_xlnet_wind.pkl,36.67,52.7,43.25
2,ann_xlnet_equi.pkl,41.49,62.08,49.74
3,ann_xlnet_corp.pkl,42.01,59.55,49.27
4,nes_xlnet_htfl.pkl,42.79,37.1,39.74
5,nes_xlnet_wind.pkl,41.79,49.74,45.42
6,nes_xlnet_equi.pkl,46.18,55.24,50.31
7,nes_xlnet_corp.pkl,44.65,55.5,49.49


In [23]:
results = []
for d, p in zip(domain_paths, preds_paths):
    pre, rec, f1 = term_evaluation(d,p, 'adj_adj_noun')
    results.append([p,pre, rec, f1])
adj_adj_noun = pd.DataFrame(results,columns=['DOMAIN','ADJ_ADJ_NOUN_P','ADJ_ADJ_NOUN_R','ADJ_ADJ_NOUN_F1'])
adj_adj_noun

Unnamed: 0,DOMAIN,ADJ_ADJ_NOUN_P,ADJ_ADJ_NOUN_R,ADJ_ADJ_NOUN_F1
0,ann_xlnet_htfl.pkl,42.77,30.83,35.83
1,ann_xlnet_wind.pkl,37.21,52.8,43.65
2,ann_xlnet_equi.pkl,41.37,62.08,49.65
3,ann_xlnet_corp.pkl,41.52,59.44,48.89
4,nes_xlnet_htfl.pkl,43.45,37.45,40.23
5,nes_xlnet_wind.pkl,42.1,49.67,45.57
6,nes_xlnet_equi.pkl,45.98,55.24,50.19
7,nes_xlnet_corp.pkl,44.47,55.5,49.38


In [24]:
results = []
for d, p in zip(domain_paths, preds_paths):
    pre, rec, f1 = term_evaluation(d,p, 'noun_adj_noun')
    results.append([p,pre, rec, f1])
noun_adj_noun = pd.DataFrame(results,columns=['DOMAIN','NOUN_ADJ_NOUN_P','NOUN_ADJ_NOUN_R','NOUN_ADJ_NOUN_F1'])
noun_adj_noun

Unnamed: 0,DOMAIN,NOUN_ADJ_NOUN_P,NOUN_ADJ_NOUN_R,NOUN_ADJ_NOUN_F1
0,ann_xlnet_htfl.pkl,42.67,30.2,35.37
1,ann_xlnet_wind.pkl,37.19,52.8,43.64
2,ann_xlnet_equi.pkl,41.77,61.99,49.91
3,ann_xlnet_corp.pkl,41.98,59.33,49.17
4,nes_xlnet_htfl.pkl,43.33,36.94,39.88
5,nes_xlnet_wind.pkl,42.02,49.61,45.5
6,nes_xlnet_equi.pkl,46.32,55.17,50.36
7,nes_xlnet_corp.pkl,44.64,55.41,49.45


In [26]:
results = []
for d, p in zip(domain_paths, preds_paths):
    pre, rec, f1 = term_evaluation(d,p, 'noun_noun_noun')
    results.append([p,pre, rec, f1])
noun_noun_noun = pd.DataFrame(results,columns=['DOMAIN','NOUN_NOUN_NOUN_P','NOUN_NOUN_NOUN_R','NOUN_NOUN_NOUN_F1'])
noun_noun_noun

Unnamed: 0,DOMAIN,NOUN_NOUN_NOUN_P,NOUN_NOUN_NOUN_R,NOUN_NOUN_NOUN_F1
0,ann_xlnet_htfl.pkl,42.29,30.45,35.41
1,ann_xlnet_wind.pkl,36.66,52.89,43.3
2,ann_xlnet_equi.pkl,41.72,61.99,49.87
3,ann_xlnet_corp.pkl,42.08,59.33,49.24
4,nes_xlnet_htfl.pkl,43.19,37.06,39.89
5,nes_xlnet_wind.pkl,41.62,49.67,45.29
6,nes_xlnet_equi.pkl,46.4,55.17,50.41
7,nes_xlnet_corp.pkl,44.7,55.41,49.48


In [27]:
results = []
for d, p in zip(domain_paths, preds_paths):
    pre, rec, f1 = term_evaluation(d,p, 'noun_adp_noun')
    results.append([p,pre, rec, f1])
noun_adp_noun = pd.DataFrame(results,columns=['DOMAIN','NOUN_ADP_NOUN_P','NOUN_ADP_NOUN_R','NOUN_ADP_NOUN_F1'])
noun_adp_noun

Unnamed: 0,DOMAIN,NOUN_ADP_NOUN_P,NOUN_ADP_NOUN_R,NOUN_ADP_NOUN_F1
0,ann_xlnet_htfl.pkl,39.78,30.07,34.25
1,ann_xlnet_wind.pkl,34.89,52.8,42.02
2,ann_xlnet_equi.pkl,38.93,62.08,47.85
3,ann_xlnet_corp.pkl,38.09,60.19,46.66
4,nes_xlnet_htfl.pkl,41.0,36.91,38.85
5,nes_xlnet_wind.pkl,40.41,49.87,44.64
6,nes_xlnet_equi.pkl,43.08,55.3,48.43
7,nes_xlnet_corp.pkl,41.11,56.18,47.48


In [28]:
results = []
for d, p in zip(domain_paths, preds_paths):
    pre, rec, f1 = term_evaluation(d,p, 'noun_adp_adj_noun')
    results.append([p,pre, rec, f1])
noun_adp_adj_noun = pd.DataFrame(results,columns=['DOMAIN','NOUN_ADP_ADJ_NOUN_P','NOUN_ADP_ADJ_NOUN_R','NOUN_ADP_ADJ_NOUN_F1'])
noun_adp_adj_noun

Unnamed: 0,DOMAIN,NOUN_ADP_ADJ_NOUN_P,NOUN_ADP_ADJ_NOUN_R,NOUN_ADP_ADJ_NOUN_F1
0,ann_xlnet_htfl.pkl,41.85,30.03,34.97
1,ann_xlnet_wind.pkl,36.63,52.61,43.19
2,ann_xlnet_equi.pkl,41.29,61.99,49.57
3,ann_xlnet_corp.pkl,41.18,59.44,48.65
4,nes_xlnet_htfl.pkl,42.81,36.87,39.62
5,nes_xlnet_wind.pkl,41.65,49.61,45.28
6,nes_xlnet_equi.pkl,45.88,55.17,50.1
7,nes_xlnet_corp.pkl,43.75,55.75,49.03


In [29]:
results = []
for d, p in zip(domain_paths, preds_paths):
    pre, rec, f1 = term_evaluation(d,p, 'noun_adp_noun_noun')
    results.append([p,pre, rec, f1])
noun_adp_noun_noun = pd.DataFrame(results,columns=['DOMAIN','NOUN_ADP_NOUN_NOUN_P','NOUN_ADP_NOUN_NOUN_R','NOUN_ADP_NOUN_NOUN_F1'])
noun_adp_noun_noun

Unnamed: 0,DOMAIN,NOUN_ADP_NOUN_NOUN_P,NOUN_ADP_NOUN_NOUN_R,NOUN_ADP_NOUN_NOUN_F1
0,ann_xlnet_htfl.pkl,41.83,30.03,34.96
1,ann_xlnet_wind.pkl,36.84,52.61,43.33
2,ann_xlnet_equi.pkl,41.82,61.99,49.95
3,ann_xlnet_corp.pkl,41.57,59.33,48.89
4,nes_xlnet_htfl.pkl,42.79,36.87,39.61
5,nes_xlnet_wind.pkl,41.79,49.61,45.37
6,nes_xlnet_equi.pkl,46.32,55.17,50.36
7,nes_xlnet_corp.pkl,44.41,55.58,49.37


In [30]:
results = []
for d, p in zip(domain_paths, preds_paths):
    pre, rec, f1 = term_evaluation(d,p, 'adj_noun_adp_adj_noun')
    results.append([p,pre, rec, f1])
adj_noun_adp_adj_noun = pd.DataFrame(results,columns=['DOMAIN','ADJ_NOUN_ADP_ADJ_NOUN_P','ADJ_NOUN_ADP_ADJ_NOUN_R','ADJ_NOUN_ADP_ADJ_NOUN_F1'])
adj_noun_adp_adj_noun

Unnamed: 0,DOMAIN,ADJ_NOUN_ADP_ADJ_NOUN_P,ADJ_NOUN_ADP_ADJ_NOUN_R,ADJ_NOUN_ADP_ADJ_NOUN_F1
0,ann_xlnet_htfl.pkl,42.58,30.03,35.22
1,ann_xlnet_wind.pkl,37.52,52.61,43.8
2,ann_xlnet_equi.pkl,42.07,61.99,50.12
3,ann_xlnet_corp.pkl,42.37,59.33,49.44
4,nes_xlnet_htfl.pkl,43.44,36.87,39.89
5,nes_xlnet_wind.pkl,42.37,49.61,45.71
6,nes_xlnet_equi.pkl,46.7,55.17,50.58
7,nes_xlnet_corp.pkl,44.77,55.41,49.52


In [31]:
results = []
for d, p in zip(domain_paths, preds_paths):
    pre, rec, f1 = term_evaluation(d,p, 'adv_adj_noun')
    results.append([p,pre, rec, f1])
adv_adj_noun = pd.DataFrame(results,columns=['DOMAIN','ADV_ADJ_NOUN_P','ADV_ADJ_NOUN_R','ADV_ADJ_NOUN_F1'])
adv_adj_noun

Unnamed: 0,DOMAIN,ADV_ADJ_NOUN_P,ADV_ADJ_NOUN_R,ADV_ADJ_NOUN_F1
0,ann_xlnet_htfl.pkl,42.66,30.03,35.25
1,ann_xlnet_wind.pkl,37.32,52.61,43.67
2,ann_xlnet_equi.pkl,41.44,61.99,49.67
3,ann_xlnet_corp.pkl,42.21,59.33,49.33
4,nes_xlnet_htfl.pkl,43.46,36.87,39.89
5,nes_xlnet_wind.pkl,42.07,49.61,45.53
6,nes_xlnet_equi.pkl,46.03,55.17,50.19
7,nes_xlnet_corp.pkl,44.74,55.41,49.51


In [32]:
results = []
for d, p in zip(domain_paths, preds_paths):
    pre, rec, f1 = term_evaluation(d,p, 'noun_pron')
    results.append([p,pre, rec, f1])
noun_pron = pd.DataFrame(results,columns=['DOMAIN','NOUN_PRON_P','NOUN_PRON_R','NOUN_PRON_F1'])
noun_pron

Unnamed: 0,DOMAIN,NOUN_PRON_P,NOUN_PRON_R,NOUN_PRON_F1
0,ann_xlnet_htfl.pkl,42.43,30.37,35.4
1,ann_xlnet_wind.pkl,36.68,52.61,43.22
2,ann_xlnet_equi.pkl,41.6,61.99,49.79
3,ann_xlnet_corp.pkl,42.32,59.44,49.44
4,nes_xlnet_htfl.pkl,43.33,37.21,40.04
5,nes_xlnet_wind.pkl,41.56,49.61,45.23
6,nes_xlnet_equi.pkl,46.52,55.17,50.48
7,nes_xlnet_corp.pkl,44.95,55.41,49.63


In [33]:
results = []
for d, p in zip(domain_paths, preds_paths):
    pre, rec, f1 = term_evaluation(d,p, 'adj_noun_adj_noun')
    results.append([p,pre, rec, f1])
adj_noun_adj_noun = pd.DataFrame(results,columns=['DOMAIN','ADJ_NOUN_ADJ_NOUN_P','ADJ_NOUN_ADJ_NOUN_R','ADJ_NOUN_ADJ_NOUN_F1'])
adj_noun_adj_noun

Unnamed: 0,DOMAIN,ADJ_NOUN_ADJ_NOUN_P,ADJ_NOUN_ADJ_NOUN_R,ADJ_NOUN_ADJ_NOUN_F1
0,ann_xlnet_htfl.pkl,42.92,30.03,35.34
1,ann_xlnet_wind.pkl,37.71,52.61,43.93
2,ann_xlnet_equi.pkl,42.19,61.99,50.21
3,ann_xlnet_corp.pkl,42.47,59.33,49.5
4,nes_xlnet_htfl.pkl,43.66,36.87,39.98
5,nes_xlnet_wind.pkl,42.47,49.61,45.76
6,nes_xlnet_equi.pkl,46.75,55.17,50.61
7,nes_xlnet_corp.pkl,45.05,55.41,49.7


In [34]:
df_ls = [adj_noun, noun_noun, adj_adj_noun, adj_noun_noun, noun_adj_noun, 
        noun_noun_noun , noun_adp_noun, noun_adp_adj_noun, adj_noun_adp_noun, noun_adp_noun_noun,
        adj_noun_adp_adj_noun, adv_adj_noun, noun_pron, adj_noun_adj_noun]

NameError: name 'adj_noun_adp_noun' is not defined

In [None]:
for d in df_ls:
    raw_res = raw_res.merge(d, on='DOMAIN', how='left')
raw_res

In [None]:
raw_res.to_csv('en_iate_comparison.csv')