In [1]:
import  os
import pandas as pd
import numpy as np
from warnings import filterwarnings
filterwarnings('ignore')

### PubMed Data Wrangling

In [2]:
path = os.path.join('PubMed','docs_keys')

In [3]:
indices = set([f[:-4]  for f in os.listdir(path)])

In [4]:
pd.read_csv(r'{}\10933267.key'.format(path),sep='/n',header=None)

Unnamed: 0,0
0,Adult
1,Alleles
2,Biological Markers
3,Body Mass Index
4,Cardiovascular Diseases
5,Child
6,"Child, Preschool"
7,Ethnic Groups
8,Female
9,Fibrinogen


In [5]:
buff =open(r'{}\10933267.txt'.format(path),mode='r')

In [6]:
txt = buff.read()

### Metrics Definition

In [7]:
def precision_at_k(prediction,ground_truth,k):
    
    pak= sum([1 if  p==g else 0  for p in prediction[:k] for g in ground_truth])/k
    
    return pak    

In [8]:
def recall_at_k(prediction,ground_truth,k):
    
    rak =  sum([1 if  p==g else 0 for g in ground_truth for p in prediction[:k]])/len(ground_truth)
    
    return rak

In [9]:
def f1_at_k(prediction,ground_truth,k):
    
    pak = precision_at_k(prediction,ground_truth,k)
    rak = recall_at_k(prediction,ground_truth,k)
    
    
    try:
        f1ak = 2 * (pak*rak)/(pak+rak) 
    except :
        f1ak = 0
        
    return f1ak
           

### keyBoost test with one example

In [10]:
import pandas as pd
import numpy as np
from keyBoost_lib.keyBoost import *

In [11]:
import spacy

nlp = spacy.load('en_core_web_sm')

stopwords = nlp.Defaults.stop_words

In [12]:
keyboost = KeyBoost(transformers_model='distilbert-base-nli-mean-tokens')

In [14]:
keywords = keyboost.extract_keywords(text=txt,
                       language='en',
                       n_top=10,
                       keyphrases_ngram_max=2,
                       stopwords=stopwords,
                       consensus='statistical',
                       models=['yake','textrank'])

*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus


In [15]:
keywords

['Plasma Fibrinogen',
 'Fibrinogen Level',
 'Fibrinogen',
 'heart rate',
 'Columbia University',
 'abstract',
 'children',
 'study',
 'studies',
 'medline']

In [15]:
keywords_stat = [k for k in keywords]

In [16]:
keywords_stat

['Plasma Fibrinogen',
 'Fibrinogen Level',
 'Fibrinogen',
 'heart rate',
 'Columbia University',
 'abstract',
 'children',
 'study',
 'studies',
 'medline']

In [17]:
gt = list(pd.read_csv(r'{}\10933267.key'.format(path),sep='/n',header=None)[0])

In [18]:
gt

['Adult',
 'Alleles',
 'Biological Markers',
 'Body Mass Index',
 'Cardiovascular Diseases',
 'Child',
 'Child, Preschool',
 'Ethnic Groups',
 'Female',
 'Fibrinogen',
 'Heart Rate',
 'Humans',
 'Linear Models',
 'Male',
 'Physical Fitness',
 'Questionnaires',
 'Risk Factors']

In [19]:
len(gt)

17

In [20]:
f1_at_k(prediction=keywords,
       ground_truth=gt,
       k=10)

0.07407407407407408

In [21]:
precision_at_k(prediction=keywords,
       ground_truth=gt,
       k=10)

0.1

In [22]:
recall_at_k(prediction=keywords,
       ground_truth=gt,
       k=10)

0.058823529411764705

In [23]:
f1_at_k(prediction=keywords_stat,
       ground_truth=gt,
       k=10)

0.07407407407407408

In [24]:
precision_at_k(prediction=keywords_stat,
       ground_truth=gt,
       k=10)

0.1

In [25]:
recall_at_k(prediction=keywords_stat,
       ground_truth=gt,
       k=10)

0.058823529411764705

### Statistical Consensus - YAKE and  Text Rank

In [18]:
import tqdm

In [19]:
f1s = list()

In [20]:
for i in tqdm.autonotebook.tqdm(indices) :
    gt = list(pd.read_csv(r'{}\{}.key'.format(path,i),sep='/n',header=None)[0])
    
    buff =open(r'{}\{}.txt'.format(path,i),mode='r')
    txt = buff.read()
    
    
    keywords = keyboost.extract_keywords(text=txt,
                       language='en',
                       n_top=10,
                       keyphrases_ngram_max=3,
                       stopwords=stopwords,
                       consensus='statistical',
                       models=['yake','textrank'])
    
    
    f1s.append(f1_at_k(prediction=keywords,ground_truth=gt,k=10))
    print('Current Micro Average F1@10 Score ',sum(f1s)/len(f1s))
    
    
    
    
    

  0%|          | 0/500 [00:00<?, ?it/s]

*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.0
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.0
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.0
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No 

*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.012656274116375984
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.012260765550239234
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.011889227200231984
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model

DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.009804943792454942
*** statistical discriminativeness check **
* keyword extraction model 1 *
No DSM behind data
* keyword extraction model 2 *
No DSM behind data
[False, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.009641528062580694
*** statistical discriminativeness check **
* keyword extraction model 1 *
No DSM behind data
* keyword extraction model 2 *
No DSM behind data
[False, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.009483470225489206
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current

DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.00785925105344343
*** statistical discriminativeness check **
* keyword extraction model 1 *
No DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[False, True]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.007769941382381573
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.007682638670219982
*** statistical discriminativeness check **
* keyword extraction model 1 *
No DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[False, True]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.007597276018328649
*** statistical discrim

*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.008214634467297609
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.008144423916295065
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.00952818787826351
*** statistical discriminativeness check **
* keyword extraction model 1 *
No DSM behind data
* keywor

No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.009988019804580823
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.009919136909376817
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.009851197615476976
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  

*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.009203011593675101
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.009150120722447082
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.009097834318318813
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model

DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.010966217771966567
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.010912196994764763
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.01085870583302572
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, Fa

DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.010877888439964031
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.010830798013817
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.010784113539619514
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.011033819134906036
*** statistical discrimin

No DSM behind data
* keyword extraction model 2 *
No DSM behind data
[False, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.011571740648910077
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.011527404477841457
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.011483406750826795
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Dedup

*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.011290038643875904
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.011250972766215433
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.011212176308400898
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper

*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.011060861306618819
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.011026078723893603
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.010991514213787353
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper

DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.010570722013925293
*** statistical discriminativeness check **
* keyword extraction model 1 *
No DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[False, True]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.010540170794231868
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.010509795662260018
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper 

DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.010405441230924158
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.010377619195547355
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.010349945544359229
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Curr

*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.011211853544950887
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.011184032568412548
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.011156349319480834
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model

*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.011072738538875736
*** statistical discriminativeness check **
* keyword extraction model 1 *
No DSM behind data
* keyword extraction model 2 *
No DSM behind data
[False, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.011047107199665375
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.011146430402562347
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Conso

DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.011211078178191756
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.011186706269108731
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.01116244009498919
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extrac

*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.011326004415742337
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.01130284285251996
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.011279775826290327
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 

In [21]:
sum(f1s)/len(f1s)

0.011235998491582702

### Rank Consensus - YAKE and  Text Rank

In [29]:
f1s = list()

In [30]:
for i in tqdm.autonotebook.tqdm(indices) :
    gt = list(pd.read_csv(r'{}\{}.key'.format(path,i),sep='/n',header=None)[0])
    
    buff =open(r'{}\{}.txt'.format(path,i),mode='r')
    txt = buff.read()
    
    
    keywords = keyboost.extract_keywords(text=txt,
                       language='en',
                       n_top=10,
                       keyphrases_ngram_max=3,
                       stopwords=stopwords,
                       consensus='rank',
                       models=['yake','textrank'])
    
    
    f1s.append(f1_at_k(prediction=keywords,ground_truth=gt,k=10))
    print('Current Micro Average F1@10 Score ',sum(f1s)/len(f1s))
    
    
    

  0%|          | 0/500 [00:00<?, ?it/s]

Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0196078431372549
Current Micro Average F1@10 Score  0.014705882352941176
Current Micro Average F1@10 Score  0.039350912778904665
Current Micro Average F1@10 Score  0.03279242731575389
Current Micro Average F1@10 Score  0.028107794842074763
Current Micro Average F1@10 Score  0.024594320486815417
Current Micro Average F1@10 Score  0.021861618210502594
Current Micro Average F1@10 Score  0.019675456389452332
Current Micro Average F1@10 Score  0.017886778535865757
Current Micro Average F1@10 Score  0.016396213657876944
Current Micro Average F1@10 Score  0.015134966453424872
Current Micro Average F1@10 Score  0.014053897421037381
Current Micro Average F1@10 Score  0.013116970926301557
Current Micro Average F1@10 Score  0.012297160243407709
Current Micro Average F1@10 Score  0.011573797876148432
Current Micro Average F1@10 Score  0.010930809105251297
Current Micro Average F1@10 S

Current Micro Average F1@10 Score  0.019295796270400165
Current Micro Average F1@10 Score  0.01967997814141801
Current Micro Average F1@10 Score  0.019549647160349015
Current Micro Average F1@10 Score  0.019421031060609877
Current Micro Average F1@10 Score  0.01929409621707648
Current Micro Average F1@10 Score  0.019890310599505277
Current Micro Average F1@10 Score  0.01976198601499234
Current Micro Average F1@10 Score  0.019635306617460338
Current Micro Average F1@10 Score  0.019510240970215368
Current Micro Average F1@10 Score  0.019386758432429194
Current Micro Average F1@10 Score  0.019264829134112028
Current Micro Average F1@10 Score  0.01964442395202383
Current Micro Average F1@10 Score  0.019522408896421196
Current Micro Average F1@10 Score  0.01940190019952971
Current Micro Average F1@10 Score  0.019282870136955906
Current Micro Average F1@10 Score  0.019165291660511053
Current Micro Average F1@10 Score  0.019049138377720078
Current Micro Average F1@10 Score  0.0189343845320711

Current Micro Average F1@10 Score  0.019107524129078218
Current Micro Average F1@10 Score  0.019043404920591377
Current Micro Average F1@10 Score  0.018979714603131203
Current Micro Average F1@10 Score  0.019131502651228295
Current Micro Average F1@10 Score  0.019067942841755774
Current Micro Average F1@10 Score  0.0190048039581738
Current Micro Average F1@10 Score  0.0189420818328993
Current Micro Average F1@10 Score  0.018879772353185817
Current Micro Average F1@10 Score  0.018817871460224553
Current Micro Average F1@10 Score  0.018756375148263033
Current Micro Average F1@10 Score  0.018695279463741003
Current Micro Average F1@10 Score  0.018634580504443145
Current Micro Average F1@10 Score  0.018574274418668248
Current Micro Average F1@10 Score  0.01877242192054351
Current Micro Average F1@10 Score  0.018712060435268452
Current Micro Average F1@10 Score  0.018652085882591308
Current Micro Average F1@10 Score  0.018592494553892933
Current Micro Average F1@10 Score  0.0185332827877977

Current Micro Average F1@10 Score  0.018587226539440924
Current Micro Average F1@10 Score  0.01854555114361258
Current Micro Average F1@10 Score  0.018504062214879668
Current Micro Average F1@10 Score  0.018462758504578597
Current Micro Average F1@10 Score  0.018421638775169735
Current Micro Average F1@10 Score  0.018380701800113804
Current Micro Average F1@10 Score  0.018806745290112385
Current Micro Average F1@10 Score  0.018986376384603286
Current Micro Average F1@10 Score  0.01910214281957893
Current Micro Average F1@10 Score  0.01906006761513052
Current Micro Average F1@10 Score  0.01901817735663573
Current Micro Average F1@10 Score  0.01897647082734486
Current Micro Average F1@10 Score  0.01893494682115811
Current Micro Average F1@10 Score  0.018893604142509294
Current Micro Average F1@10 Score  0.018852441606251102
Current Micro Average F1@10 Score  0.01881145803754186
Current Micro Average F1@10 Score  0.018770652271733745
Current Micro Average F1@10 Score  0.01873002315426246


In [32]:
sum(f1s)/len(f1s)

0.0187905426550915


### YAKE!

In [74]:
import yake

In [78]:
kw_extractor = yake.KeywordExtractor()



In [None]:
f1s = list()

In [81]:
for i in tqdm.autonotebook.tqdm(indices) :
    gt = list(pd.read_csv(r'{}\{}.key'.format(path,i),sep='/n',header=None)[0])
    
    buff =open(r'{}\{}.txt'.format(path,i),mode='r')
    txt = buff.read()
    
    keywords = kw_extractor.extract_keywords(txt)
    
    keywords = [k[0] for k in keywords]
    
    f1s.append(f1_at_k(prediction=keywords,ground_truth=gt,k=10))
    
    print('Current Micro Average F1@10 Score ',sum(f1s)/len(f1s))

  0%|          | 0/500 [00:00<?, ?it/s]

Current Micro Average F1@10 Score  0.01611270843654652
Current Micro Average F1@10 Score  0.015665133202198006
Current Micro Average F1@10 Score  0.015241751223760222
Current Micro Average F1@10 Score  0.0174722314547139
Current Micro Average F1@10 Score  0.01892356075264621
Current Micro Average F1@10 Score  0.018450471733830055
Current Micro Average F1@10 Score  0.018000460228126885
Current Micro Average F1@10 Score  0.01757187784174291
Current Micro Average F1@10 Score  0.017163229519841915
Current Micro Average F1@10 Score  0.018340554240785945
Current Micro Average F1@10 Score  0.019579076903747904
Current Micro Average F1@10 Score  0.01915344479714469
Current Micro Average F1@10 Score  0.01874592469507778
Current Micro Average F1@10 Score  0.01835538459726366
Current Micro Average F1@10 Score  0.017980784911605218
Current Micro Average F1@10 Score  0.017621169213373113
Current Micro Average F1@10 Score  0.018980685929564433
Current Micro Average F1@10 Score  0.018615672738611273


Current Micro Average F1@10 Score  0.02279001868793221
Current Micro Average F1@10 Score  0.022666159890715187
Current Micro Average F1@10 Score  0.023035040598922623
Current Micro Average F1@10 Score  0.02291119629462734
Current Micro Average F1@10 Score  0.02278867652834591
Current Micro Average F1@10 Score  0.023894956072180406
Current Micro Average F1@10 Score  0.023768527733174158
Current Micro Average F1@10 Score  0.023643430218789032
Current Micro Average F1@10 Score  0.023519642626020503
Current Micro Average F1@10 Score  0.023397144487343313
Current Micro Average F1@10 Score  0.023726467685539102
Current Micro Average F1@10 Score  0.023604166305716736
Current Micro Average F1@10 Score  0.024459920275821733
Current Micro Average F1@10 Score  0.024335124764210396
Current Micro Average F1@10 Score  0.02460206868379855
Current Micro Average F1@10 Score  0.024477815811658154
Current Micro Average F1@10 Score  0.024354811712102083
Current Micro Average F1@10 Score  0.024788593209097

Current Micro Average F1@10 Score  0.02722061754608137
Current Micro Average F1@10 Score  0.027515133758291968
Current Micro Average F1@10 Score  0.027432505728987786
Current Micro Average F1@10 Score  0.02774957407510858
Current Micro Average F1@10 Score  0.027666739525630646
Current Micro Average F1@10 Score  0.02758439803894722
Current Micro Average F1@10 Score  0.027785150849627183
Current Micro Average F1@10 Score  0.027702946261314677
Current Micro Average F1@10 Score  0.027621226655824074
Current Micro Average F1@10 Score  0.02753998775389518
Current Micro Average F1@10 Score  0.0274592253264644
Current Micro Average F1@10 Score  0.027556145897457373
Current Micro Average F1@10 Score  0.02747580727968053
Current Micro Average F1@10 Score  0.02739593574689076
Current Micro Average F1@10 Score  0.02760638230994325
Current Micro Average F1@10 Score  0.02789952067917612
Current Micro Average F1@10 Score  0.027819118602290884
Current Micro Average F1@10 Score  0.027739178606307292
Cu

Current Micro Average F1@10 Score  0.027599940966367278
Current Micro Average F1@10 Score  0.027696762077008334
Current Micro Average F1@10 Score  0.027847080658968813
Current Micro Average F1@10 Score  0.027789306632705395
Current Micro Average F1@10 Score  0.02773177183636439
Current Micro Average F1@10 Score  0.027674474787115703
Current Micro Average F1@10 Score  0.027617414014358764
Current Micro Average F1@10 Score  0.027560588059596706
Current Micro Average F1@10 Score  0.027503995476312115
Current Micro Average F1@10 Score  0.02744763482984426
Current Micro Average F1@10 Score  0.02750511524032629
Current Micro Average F1@10 Score  0.027929174428911456
Current Micro Average F1@10 Score  0.027872292199932
Current Micro Average F1@10 Score  0.027815641199525638
Current Micro Average F1@10 Score  0.0279436197993422
Current Micro Average F1@10 Score  0.027887053767359726
Current Micro Average F1@10 Score  0.027980360879090463
Current Micro Average F1@10 Score  0.02803915623446096
C

In [82]:
sum(f1s)/len(f1s)

0.027915824578115093

## Text Rank

In [29]:
from gensim.summarization import keywords

In [30]:
f1s = list()

In [32]:
for i in tqdm.autonotebook.tqdm(indices) :
    gt = list(pd.read_csv(r'{}\{}.key'.format(path,i),sep='/n',header=None)[0])
    
    buff =open(r'{}\{}.txt'.format(path,i),mode='r')
    txt = buff.read()
    
    kw = keywords(txt,scores=True,words=10)
    
    keys = [k[0] for k in kw]
    print(keys)
    
    f1s.append(f1_at_k(prediction=keys,ground_truth=gt,k=10))
    
    print('Current Micro Average F1@10 Score ',sum(f1s)/len(f1s))

  0%|          | 0/500 [00:00<?, ?it/s]

['hcv', 'fractions', 'fraction', 'patients', 'patient', 'tested', 'test', 'cell', 'cells', 'samples', 'sample', 'sampling', 'rna', 'pcr', 'assay', 'assays', 'semen']
Current Micro Average F1@10 Score  0.0
['percent', 'reviews', 'review', 'reviewer', 'trained reviewers', 'training', 'participants', 'participant', 'cochrane', 'survey', 'surveys', 'surveyed', 'responded', 'respondents', 'respond', 'time', 'times', 'timely', 'increase', 'increasing', 'increased', 'barriers', 'barrier']
Current Micro Average F1@10 Score  0.0
['following', 'followed', 'concentration', 'concentrations', 'concentrate', 'danofloxacin', 'treatment', 'treatments', 'animals', 'animal', 'samples', 'sampling', 'sample', 'clinically', 'clinical', 'lung', 'lungs', 'infusion', 'infused', 'model', 'modeling']
Current Micro Average F1@10 Score  0.0
['sequence', 'sequences', 'sequenced', 'estimate', 'estimation', 'estimates', 'estimated', 'estimating', 'virus', 'infections', 'infects', 'infection', 'infected', 'cougars', 

['plants', 'plant', 'planted', 'transcripts', 'transcription', 'transcriptional', 'leaves', 'transcript levels', 'sequence', 'sequences', 'sequenced', 'sequencing', 'figure', 'pinene', 'level', 'volatile', 'volatiles', 'products', 'product', 'protein']
Current Micro Average F1@10 Score  0.0
['rna', 'enzyme', 'enzymes', 'rnase', 'protein', 'proteins', 'lane', 'purification', 'cells', 'cell', 'tag', 'tagged', 'agarose', 'antibody', 'antibodies']
Current Micro Average F1@10 Score  0.0
['dna', 'probes', 'probe', 'amplification', 'primers', 'primer', 'mycobacterium', 'pcr', 'specific', 'specificity', 'specifically', 'specificities', 'targeting', 'target', 'targets', 'sequencing', 'sequence', 'sequences', 'detection', 'detected', 'detect']
Current Micro Average F1@10 Score  0.0
['cells', 'cell', 'culture', 'cultures', 'htlv', 'concentrations', 'concentration', 'infection', 'infected', 'infections', 'inhibited', 'inhibit', 'inhibition', 'inhibiting', 'level', 'levels', 'different', 'differenc

['kinase', 'kinases', 'protein', 'proteins', 'sequence', 'sequences', 'sequencing', 'sequenced', 'sequencer', 'lane', 'lanes', 'antibody', 'antibodies', 'stresses', 'stress', 'stressed', 'plants', 'plant', 'peanut', 'peanuts', 'sty', 'fig']
Current Micro Average F1@10 Score  0.0
['pla', 'plas', 'sequence', 'sequenced', 'sequencing', 'cdna sequences', 'protein', 'proteins', 'atpla', 'atplas', 'gene', 'genes', 'cdnas', 'arabidopsis', 'plant', 'plants', 'enzyme', 'enzymes', 'expression', 'expressed', 'expressing', 'express']
Current Micro Average F1@10 Score  0.0
['disease', 'diseases', 'included', 'include', 'infection', 'infections', 'infected', 'infect', 'grouping includes', 'group including', 'species', 'isolation', 'isolates', 'isolated', 'isolate', 'rgm', 'patients', 'patient', 'groups', 'grouped', 'studies', 'study', 'studied', 'studying', 'clinical', 'clinically', 'clinic', 'clinics']
Current Micro Average F1@10 Score  0.0
['proteins', 'protein', 'fluorescent', 'fluorescence', 'ds

['antibiotics', 'antibiotic', 'cell', 'cells', 'times', 'time', 'labeling', 'labeled', 'method', 'methods', 'acid', 'acidic', 'periplasm', 'periplasmic', 'beta', 'mum', 'blar']
Current Micro Average F1@10 Score  0.0
['mprs', 'mpr', 'lysosome', 'lysosomal', 'lysosomes', 'lamp', 'hepatocytes', 'hepatocyte', 'containing', 'contain', 'contains', 'cells', 'cell', 'figure', 'endosomes', 'endosome', 'endosomal', 'proteins', 'protein', 'membrane', 'membranes']
Current Micro Average F1@10 Score  0.0
['proteins', 'ptdins', 'activate protein', 'domains', 'domain', 'kinase', 'kinases', 'plant', 'plants', 'gene', 'genes', 'actively', 'activation', 'active', 'activities', 'activates', 'activity', 'activated', 'activator', 'activating', 'arabidopsis', 'cells', 'cell', 'enzymes', 'enzyme']
Current Micro Average F1@10 Score  0.0
['conformational', 'conformations', 'conformation', 'data', 'temperatures', 'temperature', 'experiments', 'experiment', 'analysis', 'concentration', 'concentrations', 'values',

['cells', 'cell', 'inversin', 'protein', 'proteins', 'figure', 'figures', 'membrane', 'membranes', 'antibodies', 'antibody', 'stain', 'stained', 'staining', 'catenins', 'fractions', 'fractionation', 'fraction', 'fractionated', 'fractionations', 'buffer', 'buffers']
Current Micro Average F1@10 Score  0.0
['health', 'information', 'informed', 'electronic', 'including', 'include', 'includes', 'standards', 'standardization', 'standard', 'standardizing', 'library', 'libraries', 'data', 'access', 'accessible', 'digital', 'record', 'records']
Current Micro Average F1@10 Score  0.0
['abstract', 'children', 'study', 'studies', 'medline', 'fibrinogen', 'fitness', 'fitted', 'fit', 'level', 'levels', 'measured', 'measures', 'measurements', 'measure', 'measurement', 'family', 'families', 'analyses']
Current Micro Average F1@10 Score  0.0
['sequencing', 'sequence', 'sequences', 'sequenced', 'sequencer', 'dna', 'cot', 'cloning', 'cloned', 'clones', 'clone', 'genome', 'genomic', 'genomes', 'based', 'b

['protein', 'proteins', 'figures', 'figure', 'survivin', 'survivins', 'xincenp', 'xsurvivin', 'xaurora', 'interactions', 'interaction', 'interacts', 'interact', 'interacting', 'interacted', 'kinase', 'kinases', 'cell', 'cells', 'extracts', 'extract']
Current Micro Average F1@10 Score  0.0
['pcr', 'cdna', 'cdnas', 'cells', 'cell', 'single', 'dna', 'rtqpcr', 'rtqpcrs', 'precipitation', 'precipitated', 'amplification', 'reaction', 'reactions', 'quantitative']
Current Micro Average F1@10 Score  0.0
['antibiotic', 'antibiotics', 'concentrations', 'concentration', 'pharmacodynamics', 'pharmacodynamic', 'infection', 'infections', 'study', 'studies', 'gentamicin', 'clinical', 'surgery', 'patients', 'patient', 'doses', 'dosing', 'dose']
Current Micro Average F1@10 Score  0.0
['tumour', 'tumours', 'patients', 'fas', 'analysis', 'patient survival', 'fasl', 'expression', 'expressed', 'express', 'expressions', 'breast', 'immune', 'tamoxifen']
Current Micro Average F1@10 Score  0.0
['auditory', 'ada

['peptide', 'peptides', 'mass', 'masses', 'hcv', 'primers', 'primer', 'sequencing', 'sequences', 'sequence', 'sequenced', 'pcr', 'samples', 'sample', 'followed', 'following', 'follow', 'mutation', 'mutations', 'quasispecies']
Current Micro Average F1@10 Score  0.0
['gene', 'genes', 'infection', 'infected', 'infections', 'cells', 'cell', 'hsv', 'neurons', 'neuronal', 'neuron', 'expression', 'expressed', 'ganglia', 'microarray', 'microarrays', 'mouse', 'pcr']
Current Micro Average F1@10 Score  0.0
['thiamine', 'thdp', 'cells', 'cell', 'mitochondria', 'uptake', 'transporter', 'transporters', 'transport', 'mitochondrial', 'lymphoblasts', 'lymphoblast', 'concentration', 'concentrations', 'membrane', 'membranes']
Current Micro Average F1@10 Score  0.0
['dna', 'pfge', 'bacterial', 'strains', 'strain', 'methods', 'degradation', 'gel', 'method analysis', 'difficile', 'remained']
Current Micro Average F1@10 Score  0.0
['cells', 'cell', 'hisactophilin', 'proteins', 'protein', 'phosphorylation', '

['dna', 'structural', 'structures', 'structurally', 'structure', 'epsilona', 'adducts', 'adduct', 'base', 'bases', 'site based', 'conformational', 'conformation', 'conformations', 'apng', 'complexed', 'complex', 'complexes', 'figure', 'sites']
Current Micro Average F1@10 Score  0.0
['tsg', 'gag', 'protein', 'proteins', 'cells', 'cell', 'contains', 'containing', 'contained', 'fraction', 'fractionation', 'fractions', 'fractionated', 'antibody', 'antibodies', 'endosomes', 'endosomal', 'endosome', 'binds', 'binding', 'bind', 'expression', 'express', 'expressing', 'expressed']
Current Micro Average F1@10 Score  0.0
['informatics', 'researchers', 'research', 'different', 'genomics', 'genomic', 'genome', 'collaboration', 'collaborations', 'collaborative', 'collaborators', 'collaborating', 'data', 'involved', 'involving', 'involve', 'involves', 'neuroinformatics', 'clinical', 'levels', 'level']
Current Micro Average F1@10 Score  0.0
['nrtis', 'nrti', 'associated', 'association', 'amino', 'ddi'

['cells', 'cell', 'tat', 'protein', 'proteins', 'mvv', 'transfected', 'transfection', 'france', 'flag', 'virus', 'plasmid', 'plasmids', 'domain', 'domains']
Current Micro Average F1@10 Score  0.0
['genes', 'gene', 'resistance', 'resistant', 'isolated', 'isolates', 'isolate', 'isolation', 'plasmid', 'plasmids', 'dna', 'sequences', 'sequence', 'sequenced', 'sequencing', 'sequencer', 'pcr', 'mic', 'mics', 'staphylococcal', 'genetic', 'genetics']
Current Micro Average F1@10 Score  0.0
['genes', 'gene', 'nisin', 'lisr', 'mug', 'growth', 'pcr', 'plasmids', 'plasmid', 'sensitivity', 'sensitivities', 'sensitive', 'mutant', 'mutants', 'significant']
Current Micro Average F1@10 Score  0.0
['patient', 'patients', 'health', 'tests', 'testing', 'test', 'tested', 'care', 'data', 'medical', 'included', 'includes', 'including', 'include', 'variable', 'variables', 'study', 'studies', 'behaviors', 'behavioral', 'behavior']
Current Micro Average F1@10 Score  0.0
['mutants', 'mutant', 'site', 'sites', 'dn

['cells', 'cell', 'hsv', 'infections', 'infection', 'infecting', 'dna', 'infected protein', 'proteins', 'lane', 'lanes', 'effects', 'effective', 'effect', 'icp', 'viral', 'gene', 'genes']
Current Micro Average F1@10 Score  0.0
['adolescents', 'adolescent', 'adolescence', 'cells', 'cell', 'age', 'ages', 'university', 'differences', 'difference', 'study', 'studies', 'studying', 'studied', 'hiv', 'gender', 'subjects', 'subject', 'children']
Current Micro Average F1@10 Score  0.0
['fragment', 'fragments', 'aflp', 'typing', 'types', 'type', 'phage', 'phages', 'sequenced', 'sequences', 'sequence', 'sequencer', 'sequencing', 'dna', 'plasmid', 'plasmids', 'genes', 'gene', 'serovar', 'serovars based', 'base', 'bases']
Current Micro Average F1@10 Score  0.0
['agl', 'senescence', 'senescing', 'senesced', 'senesce', 'plants', 'plant', 'seed', 'seeds', 'tissues', 'tissue', 'overexpression', 'overexpressing', 'overexpressed', 'overexpress', 'abscission', 'activity', 'activation', 'activated', 'activ

['sequence', 'sequencing', 'sequences', 'sequenced', 'cleavage', 'dna', 'primer', 'amino', 'reaction', 'reactions', 'fragment', 'fragments', 'fragmentation', 'products', 'product', 'containing', 'contained', 'acid', 'acids', 'acidic']
Current Micro Average F1@10 Score  0.0
['cells', 'cell', 'shiv', 'gag', 'dna', 'vaccine', 'vaccines', 'vaccination', 'vaccinated', 'vaccinations', 'virus', 'sev', 'levels', 'level', 'macaque', 'macaques', 'showed']
Current Micro Average F1@10 Score  0.0
['cells', 'cell', 'rna', 'rnas', 'hcv', 'mutations', 'mutation', 'lines', 'line', 'replication', 'replicate', 'replicated', 'replicating', 'viral', 'subgenomes', 'subgenomic', 'subgenome', 'results', 'resulting', 'resulted', 'result', 'replicons', 'replicon']
Current Micro Average F1@10 Score  0.0
['responses', 'response', 'questionnaire', 'questionnaires', 'knowledge', 'respondents', 'respondent', 'surveys', 'survey', 'surveyed', 'analysis', 'tables', 'table', 'tabled', 'term', 'terms', 'sample', 'questio

['ertapenem', 'infection', 'infected', 'infections', 'study', 'studies', 'studied', 'animals', 'animal', 'concentration', 'concentrations', 'observed', 'observation', 'observations', 'model', 'models', 'modeled', 'modeling', 'fig', 'method']
Current Micro Average F1@10 Score  0.0
['strains', 'strain', 'resistance', 'resistant', 'resistances', 'dna', 'mug', 'plasmid', 'plasmids', 'profiling', 'profiles', 'profile', 'patterns', 'pattern', 'serovar', 'serovars', 'germany', 'salmonella']
Current Micro Average F1@10 Score  0.0
['haps', 'proteins', 'protein', 'sequence', 'sequences', 'sequencing', 'sequenced', 'cell', 'cells', 'enzymes', 'enzymic', 'enzyme', 'human', 'genes', 'gene', 'expression', 'expressed', 'express', 'substrate', 'substrates', 'hydrolases', 'hydrolase']
Current Micro Average F1@10 Score  0.0
['phytochromes', 'phytochrome', 'protein', 'proteins', 'light', 'lights', 'levels', 'level', 'plants', 'plant', 'seedlings', 'seedling', 'extracts', 'extractable', 'extracted', 'extr

['protein', 'proteins', 'cell', 'cells', 'antibodies', 'antibody', 'replication', 'replicate', 'replicating', 'transdominant', 'transdominance', 'expressed', 'expressing', 'expression', 'express', 'mutants', 'mutant', 'results', 'result', 'resulted', 'resulting', 'fragment', 'fragments', 'wild']
Current Micro Average F1@10 Score  0.0
['dna binding', 'enzyme', 'enzymes', 'subunit', 'subunits', 'dimerization', 'dimerized', 'dimer', 'dimers', 'dimerize', 'dimeric', 'loop', 'loops', 'bind', 'binds', 'activities', 'active', 'activity', 'mutant', 'mutants', 'structural', 'structure', 'structures', 'figure']
Current Micro Average F1@10 Score  0.0
['sheep', 'chromosomal', 'chromosome', 'chromosomes', 'hybridization', 'hybrid', 'hybridized', 'hybridize', 'hybridizing', 'hybrids', 'cell', 'cells', 'enjsrv', 'enjsrvs', 'dna', 'jsrv', 'genome', 'genomic', 'genomes', 'contain', 'containing', 'contains', 'contained', 'lines', 'line']
Current Micro Average F1@10 Score  0.0
['reviews', 'review', 'stud

['cholera', 'cholerae', 'water', 'waters', 'disease', 'diseases', 'environment', 'environments', 'temperature', 'temperatures', 'environmental', 'environmentally', 'furthermore', 'study', 'studies', 'studied', 'years', 'year', 'including', 'include', 'includes']
Current Micro Average F1@10 Score  0.0
['aldosterone', 'activation', 'active', 'activity', 'activate', 'activates', 'activated', 'activating', 'mapk', 'mapks', 'ras', 'cells', 'cell', 'protein', 'proteins', 'increases', 'increased', 'increasing', 'increase', 'signal', 'signaling', 'kinase', 'kinases', 'steroid', 'steroids']
Current Micro Average F1@10 Score  0.0
['hospital', 'hospitals', 'hospitalizations', 'competitiveness', 'competitive', 'mortality', 'competition resource', 'table', 'markets', 'market', 'marketing', 'clinical', 'resources', 'patients', 'patient', 'increase', 'increased', 'increasing', 'increases', 'care']
Current Micro Average F1@10 Score  0.0
['cftrs', 'cftr mutants', 'proteins', 'protein', 'pest', 'cells',

['spt', 'transcriptional', 'saga', 'complex', 'complexes', 'activated transcription', 'salsa', 'activating', 'activation', 'activator', 'activities', 'activity', 'activators', 'active', 'activate', 'protein', 'proteins', 'function', 'functions', 'functional', 'subunits', 'subunit', 'genes', 'gene']
Current Micro Average F1@10 Score  0.0
['patients', 'patient', 'pylori', 'studied', 'study', 'studies', 'gastritis', 'infection', 'infected', 'antral', 'standards', 'standard', 'levels', 'level', 'correlate', 'correlated', 'correlation', 'correlations', 'ulcer', 'ulcers']
Current Micro Average F1@10 Score  0.0
['icu', 'icus', 'patients', 'patient', 'brain', 'organs', 'organ', 'ventilation', 'ventilated', 'neurological', 'paediatric', 'paediatrics', 'decision', 'decisions', 'donated', 'donation', 'donate', 'ethics', 'ethical', 'ethically']
Current Micro Average F1@10 Score  0.0
['axonemal', 'axonemes', 'axoneme', 'calcium', 'dynein', 'calmodulin', 'increase', 'increases', 'increasing', 'incre

['differences', 'difference', 'different', 'differing', 'differed', 'differs', 'subjects', 'subject', 'languages differ', 'trained', 'training', 'trainings', 'japanese', 'durational', 'duration', 'durations', 'discrimination', 'discriminate', 'discriminated', 'language', 'speech', 'responses', 'response', 'responsible', 'words', 'word']
Current Micro Average F1@10 Score  0.0
['cell', 'cells', 'activation', 'activities', 'activity', 'active', 'activate', 'activates', 'activated', 'activator', 'activating', 'protein', 'proteins', 'eif', 'eifs', 'increase', 'increased', 'increases', 'phosphorylation', 'phosphorylated', 'phosphorylates', 'pma', 'kinase', 'antibodies', 'alphacd antibody']
Current Micro Average F1@10 Score  0.0
['protein', 'proteins', 'membrane', 'membranes', 'transport', 'snare', 'snares', 'fusion', 'vesicles', 'vesicle', 'ykt', 'sec', 'cell', 'cells', 'strains', 'strain']
Current Micro Average F1@10 Score  0.0
['hss', 'effect', 'effective', 'cells', 'cell', 'hemodynamic ef

['forces', 'transcription', 'transcript', 'transcriptional', 'pausing', 'pause', 'pauses', 'paused', 'mechanical force', 'arrest', 'arrested', 'arrests', 'enzyme', 'enzymes', 'mechanism', 'mechanically', 'mechanisms', 'rna', 'dna', 'complex', 'complexes', 'rnap']
Current Micro Average F1@10 Score  0.0
['patients', 'patient', 'samples', 'sample', 'parasites', 'parasitic', 'parasite', 'antibodies', 'antibody', 'ppfp', 'positive', 'negative', 'tests', 'test', 'tested', 'testing', 'results', 'result', 'analysis']
Current Micro Average F1@10 Score  0.0
['patients', 'patient', 'specifically', 'specificities', 'specificity', 'leishmaniasis', 'sera', 'specific pattern', 'antigen', 'antigens', 'antigenic', 'bands', 'band', 'gel', 'gels', 'serum', 'immunoblot', 'immunoblots', 'immunoblotting', 'patterns']
Current Micro Average F1@10 Score  0.0
['test', 'tested', 'tests', 'testing', 'antigen', 'antigenic', 'antigens', 'parasite', 'parasites', 'parasitic', 'specific', 'specificities', 'specificity

In [33]:
sum(f1s)/len(f1s)

0.0