In [1]:
import  os
import pandas as pd
import numpy as np
from warnings import filterwarnings
filterwarnings('ignore')

### PubMed Data Wrangling

In [2]:
path = os.path.join('PubMed','docs_keys')

In [3]:
indices = set([f[:-4]  for f in os.listdir(path)])

In [4]:
pd.read_csv(r'{}\10933267.key'.format(path),sep='/n',header=None)

Unnamed: 0,0
0,Adult
1,Alleles
2,Biological Markers
3,Body Mass Index
4,Cardiovascular Diseases
5,Child
6,"Child, Preschool"
7,Ethnic Groups
8,Female
9,Fibrinogen


In [5]:
buff =open(r'{}\10933267.txt'.format(path),mode='r')

In [6]:
txt = buff.read()

### Metrics Definition

In [7]:
def precision_at_k(prediction,ground_truth,k):
    
    pak= sum([1 if  p==g else 0  for p in prediction[:k] for g in ground_truth])/k
    
    return pak    

In [8]:
def recall_at_k(prediction,ground_truth,k):
    
    rak =  sum([1 if  p==g else 0 for g in ground_truth for p in prediction[:k]])/len(ground_truth)
    
    return rak

In [9]:
def f1_at_k(prediction,ground_truth,k):
    
    pak = precision_at_k(prediction,ground_truth,k)
    rak = recall_at_k(prediction,ground_truth,k)
    
    
    try:
        f1ak = 2 * (pak*rak)/(pak+rak) 
    except :
        f1ak = 0
        
    return f1ak
           

### keyBoost test with one example

In [10]:
import pandas as pd
import numpy as np
from keyBoost_lib.keyBoost import *

###### Stopwords

In [18]:
import spacy

nlp = spacy.load('en_core_web_sm')

stopwords = nlp.Defaults.stop_words

In [12]:
keyboost = KeyBoost(transformers_model='distilbert-base-nli-mean-tokens')

In [15]:
keywords = keyboost.extract_keywords(text=txt,
                       language='en',
                       n_top=10,
                       keyphrases_ngram_max=2,
                       stopwords=stopwords,
                       consensus='statistical',
                       models=['yake','keybert'])

*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !


In [16]:
keywords

[('Plasma Fibrinogen', 8.241170639758252),
 ('Fibrinogen Level', 6.452818330088773),
 ('pediatric cardiology', 4.315605640589661),
 ('department medicine', 3.883835479365052),
 ('clinics pediatric', 3.3304989620674945),
 ('university biomarkers', 3.1481494279580726),
 ('cardiology department', 3.1167098531116206),
 ('biochemistry university', 2.9008247724993153),
 ('medicine columbia', 2.8882489425607347),
 ('surgery coronary', 2.835849651149981)]

In [19]:
keywords_stat = [k[0] for k in keywords]

In [20]:
keywords_stat

['Plasma Fibrinogen',
 'Fibrinogen Level',
 'pediatric cardiology',
 'department medicine',
 'clinics pediatric',
 'university biomarkers',
 'cardiology department',
 'biochemistry university',
 'medicine columbia',
 'surgery coronary']

In [21]:
gt = list(pd.read_csv(r'{}\10933267.key'.format(path),sep='/n',header=None)[0])

In [22]:
gt

['Adult',
 'Alleles',
 'Biological Markers',
 'Body Mass Index',
 'Cardiovascular Diseases',
 'Child',
 'Child, Preschool',
 'Ethnic Groups',
 'Female',
 'Fibrinogen',
 'Heart Rate',
 'Humans',
 'Linear Models',
 'Male',
 'Physical Fitness',
 'Questionnaires',
 'Risk Factors']

In [23]:
len(gt)

17

In [24]:
f1_at_k(prediction=keywords,
       ground_truth=gt,
       k=10)

0

In [25]:
precision_at_k(prediction=keywords,
       ground_truth=gt,
       k=10)

0.0

In [26]:
recall_at_k(prediction=keywords,
       ground_truth=gt,
       k=10)

0.0

In [27]:
f1_at_k(prediction=keywords_stat,
       ground_truth=gt,
       k=10)

0

In [28]:
precision_at_k(prediction=keywords_stat,
       ground_truth=gt,
       k=10)

0.0

In [29]:
recall_at_k(prediction=keywords_stat,
       ground_truth=gt,
       k=10)

0.0

### Statistical Consensus - YAKE and keyBert

In [13]:
import tqdm

In [34]:
f1s = list()

In [35]:
for i in tqdm.autonotebook.tqdm(indices) :
    gt = list(pd.read_csv(r'{}\{}.key'.format(path,i),sep='/n',header=None)[0])
    
    buff =open(r'{}\{}.txt'.format(path,i),mode='r')
    txt = buff.read()
    
    
    keywords = keyboost.extract_keywords(text=txt,
                       language='en',
                       n_top=10,
                       keyphrases_ngram_max=3,
                       stopwords=stopwords,
                       consensus='statistical',
                       models=['yake','keybert'])
    keywords = [k[0] for k in keywords]
    
    
    f1s.append(f1_at_k(prediction=keywords,ground_truth=gt,k=10))
    print('Current Micro Average F1@10 Score ',sum(f1s)/len(f1s))
    
    
    
    
    

  0%|          | 0/500 [00:00<?, ?it/s]

*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.0
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.0
*** statistical discriminativeness check **
* keyword extraction model 1 *
No DSM behind data
* keyword extraction model 2 *
No DSM behind data
[False, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.0
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Cur

Done !
Current Micro Average F1@10 Score  0.0060426929392446635
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.005841269841269842
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.00565284178187404
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.0054761904761904765
*** statistical discriminativeness ch

DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.006087994963557182
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.007458647959753339
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.0073343371604241165
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper

DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.009628781922634997
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.009519363946241418
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.009412404800778032
*** statistical discriminativeness check **
* keyword extraction model 1 *
No DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[False, True]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.009307822525213831
*** statistical discri

DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.008042604504373621
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.00797386429493453
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.007906289173791017
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Con

DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.006434083603498897
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.0063900145377215065
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.00634654505107034
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Co

No DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[False, True]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.007743727234583644
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.007699223055074542
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.008199445181834662
*** statistical discriminativeness check **
* keyword extraction model 1 *
No DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[False, 

DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.007895558944658742
*** statistical discriminativeness check **
* keyword extraction model 1 *
No DSM behind data
* keyword extraction model 2 *
No DSM behind data
[False, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.007856664565621015
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.008263784302598807
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, 

DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.009078533204853677
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.009039232195308855
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.009000269987570456
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extra

*** statistical discriminativeness check **
* keyword extraction model 1 *
No DSM behind data
* keyword extraction model 2 *
No DSM behind data
[False, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.00891313052173403
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.00887884925049659
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.008844830670992773
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK


*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.009610579285242381
*** statistical discriminativeness check **
* keyword extraction model 1 *
No DSM behind data
* keyword extraction model 2 *
No DSM behind data
[False, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.009577209218279734
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.009544070086036551
*** statistical discriminativeness check **
* keyword extraction model 1 *
No DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[False, True]
Fallback to Rank Based Conse

Done !
Current Micro Average F1@10 Score  0.009791326986972411
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.00976034177498832
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.009729552053300661
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.00969895597766135
*** statistical discriminativeness check

*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.010583928902238037
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.010553161667057113
*** statistical discriminativeness check **
* keyword extraction model 1 *
No DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[False, True]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.010522572792659847
*** statistical discriminativeness check **
* keyword extraction model 1 *
No DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK

DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.009785141815276677
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.009758837670611954
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.009732674566937391
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation 

No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.009325719033669118
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.009302462876477922
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.009279322421561311
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  

*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.009323505402672402
*** statistical discriminativeness check **
* keyword extraction model 1 *
No DSM behind data
* keyword extraction model 2 *
No DSM behind data
[False, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.009301822831968513
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.009280240876441904
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
No DSM behind data

*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.009263960550530885
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.009243733562429288
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.0092235947093521
*** statistical discriminativeness check **
* keyword extrac

No DSM behind data
[True, False]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.00903729173882834
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.009018696488336924
*** statistical discriminativeness check **
* keyword extraction model 1 *
No DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[False, True]
Fallback to Rank Based Consensus
Current Micro Average F1@10 Score  0.0090001776043773
*** statistical discriminativeness check **
* keyword extraction model 1 *
DSM Params OK
KS OK
Proper DSM behind data
* keyword extraction model 2 *
DSM Params OK
KS OK
Proper DSM behind data
[True, True]
Consolidation + Rank Extraction
Deduplication
Done !
Current Micro Average F1@10 Score  0.0

In [36]:
sum(f1s)/len(f1s)

0.00895664917713968

### Rank Consensus - YAKE and  keyBert

In [39]:
f1s = list()

In [40]:
for i in tqdm.autonotebook.tqdm(indices) :
    gt = list(pd.read_csv(r'{}\{}.key'.format(path,i),sep='/n',header=None)[0])
    
    buff =open(r'{}\{}.txt'.format(path,i),mode='r')
    txt = buff.read()
    
    
    keywords = keyboost.extract_keywords(text=txt,
                       language='en',
                       n_top=10,
                       keyphrases_ngram_max=3,
                       stopwords=stopwords,
                       consensus='rank',
                       models=['yake','keybert'])
    
    keywords = [k[0] for k in keywords]
    
    
    f1s.append(f1_at_k(prediction=keywords,ground_truth=gt,k=10))
    print('Current Micro Average F1@10 Score ',sum(f1s)/len(f1s))
    
    
    

  0%|          | 0/500 [00:00<?, ?it/s]

Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@

Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@

Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@10 Score  0.0
Current Micro Average F1@

In [41]:
sum(f1s)/len(f1s)

0.0


### YAKE!

In [74]:
import yake

In [78]:
kw_extractor = yake.KeywordExtractor()



In [None]:
f1s = list()

In [81]:
for i in tqdm.autonotebook.tqdm(indices) :
    gt = list(pd.read_csv(r'{}\{}.key'.format(path,i),sep='/n',header=None)[0])
    
    buff =open(r'{}\{}.txt'.format(path,i),mode='r')
    txt = buff.read()
    
    keywords = kw_extractor.extract_keywords(txt)
    
    keywords = [k[0] for k in keywords]
    
    f1s.append(f1_at_k(prediction=keywords,ground_truth=gt,k=10))
    
    print('Current Micro Average F1@10 Score ',sum(f1s)/len(f1s))

  0%|          | 0/500 [00:00<?, ?it/s]

Current Micro Average F1@10 Score  0.01611270843654652
Current Micro Average F1@10 Score  0.015665133202198006
Current Micro Average F1@10 Score  0.015241751223760222
Current Micro Average F1@10 Score  0.0174722314547139
Current Micro Average F1@10 Score  0.01892356075264621
Current Micro Average F1@10 Score  0.018450471733830055
Current Micro Average F1@10 Score  0.018000460228126885
Current Micro Average F1@10 Score  0.01757187784174291
Current Micro Average F1@10 Score  0.017163229519841915
Current Micro Average F1@10 Score  0.018340554240785945
Current Micro Average F1@10 Score  0.019579076903747904
Current Micro Average F1@10 Score  0.01915344479714469
Current Micro Average F1@10 Score  0.01874592469507778
Current Micro Average F1@10 Score  0.01835538459726366
Current Micro Average F1@10 Score  0.017980784911605218
Current Micro Average F1@10 Score  0.017621169213373113
Current Micro Average F1@10 Score  0.018980685929564433
Current Micro Average F1@10 Score  0.018615672738611273


Current Micro Average F1@10 Score  0.02279001868793221
Current Micro Average F1@10 Score  0.022666159890715187
Current Micro Average F1@10 Score  0.023035040598922623
Current Micro Average F1@10 Score  0.02291119629462734
Current Micro Average F1@10 Score  0.02278867652834591
Current Micro Average F1@10 Score  0.023894956072180406
Current Micro Average F1@10 Score  0.023768527733174158
Current Micro Average F1@10 Score  0.023643430218789032
Current Micro Average F1@10 Score  0.023519642626020503
Current Micro Average F1@10 Score  0.023397144487343313
Current Micro Average F1@10 Score  0.023726467685539102
Current Micro Average F1@10 Score  0.023604166305716736
Current Micro Average F1@10 Score  0.024459920275821733
Current Micro Average F1@10 Score  0.024335124764210396
Current Micro Average F1@10 Score  0.02460206868379855
Current Micro Average F1@10 Score  0.024477815811658154
Current Micro Average F1@10 Score  0.024354811712102083
Current Micro Average F1@10 Score  0.024788593209097

Current Micro Average F1@10 Score  0.02722061754608137
Current Micro Average F1@10 Score  0.027515133758291968
Current Micro Average F1@10 Score  0.027432505728987786
Current Micro Average F1@10 Score  0.02774957407510858
Current Micro Average F1@10 Score  0.027666739525630646
Current Micro Average F1@10 Score  0.02758439803894722
Current Micro Average F1@10 Score  0.027785150849627183
Current Micro Average F1@10 Score  0.027702946261314677
Current Micro Average F1@10 Score  0.027621226655824074
Current Micro Average F1@10 Score  0.02753998775389518
Current Micro Average F1@10 Score  0.0274592253264644
Current Micro Average F1@10 Score  0.027556145897457373
Current Micro Average F1@10 Score  0.02747580727968053
Current Micro Average F1@10 Score  0.02739593574689076
Current Micro Average F1@10 Score  0.02760638230994325
Current Micro Average F1@10 Score  0.02789952067917612
Current Micro Average F1@10 Score  0.027819118602290884
Current Micro Average F1@10 Score  0.027739178606307292
Cu

Current Micro Average F1@10 Score  0.027599940966367278
Current Micro Average F1@10 Score  0.027696762077008334
Current Micro Average F1@10 Score  0.027847080658968813
Current Micro Average F1@10 Score  0.027789306632705395
Current Micro Average F1@10 Score  0.02773177183636439
Current Micro Average F1@10 Score  0.027674474787115703
Current Micro Average F1@10 Score  0.027617414014358764
Current Micro Average F1@10 Score  0.027560588059596706
Current Micro Average F1@10 Score  0.027503995476312115
Current Micro Average F1@10 Score  0.02744763482984426
Current Micro Average F1@10 Score  0.02750511524032629
Current Micro Average F1@10 Score  0.027929174428911456
Current Micro Average F1@10 Score  0.027872292199932
Current Micro Average F1@10 Score  0.027815641199525638
Current Micro Average F1@10 Score  0.0279436197993422
Current Micro Average F1@10 Score  0.027887053767359726
Current Micro Average F1@10 Score  0.027980360879090463
Current Micro Average F1@10 Score  0.02803915623446096
C

In [82]:
sum(f1s)/len(f1s)

0.027915824578115093

## keyBert

In [19]:
from keybert import *

In [20]:
f1s = list()

In [21]:
for i in tqdm.autonotebook.tqdm(indices) :
    gt = list(pd.read_csv(r'{}\{}.key'.format(path,i),sep='/n',header=None)[0])
    
    buff =open(r'{}\{}.txt'.format(path,i),mode='r')
    txt = buff.read()
    
    kb = KeyBERT('distilbert-base-nli-mean-tokens')

    keywords = kb.extract_keywords(txt,
                                      keyphrase_ngram_range = (1,3),
                                      top_n = 10,
                                      stop_words=stopwords)
    
    keywords = [k[0] for k in keywords]
    
    print(keywords)
    
    f1s.append(f1_at_k(prediction=keywords,ground_truth=gt,k=10))
    
    print('Current Micro Average F1@10 Score ',sum(f1s)/len(f1s))

  0%|          | 0/500 [00:00<?, ?it/s]

['petals chlorophyll degraded', 'lipid metabolism removed', 'lipoxygenase lox senescence', 'decline lipid occurs', 'remobilization nutrients floral', 'photooxidation lipids loss', 'studies extraction lipids', 'cycle detoxifying enzymes', 'antioxidants decline sepals', 'lipids loss chlorophyll']
Current Micro Average F1@10 Score  0.0
['ideal pcr amplification', 'efficiency cdna synthesis', 'pcr efficiency significantly', 'doubling dna pcr', 'similar pcr amplification', 'kinetics valid pcr', 'pcr reliable method', 'cdna synthesis improve', 'efficiencies pcr reactions', 'slope ideal pcr']
Current Micro Average F1@10 Score  0.0
['peanut genomic dna', 'family kinases peanut', 'isolated peanut seedlings', 'kinase peanut protein', 'immunohistochemistry microscopy peanut', 'dna isolated peanut', 'genes peanut seedlings', 'peanut sty kinase', 'specificity kinase peanut', 'gene genome peanut']
Current Micro Average F1@10 Score  0.0
['antibiotic pharmacodynamics surgical', 'antibiotic surgery dur

['cells recombinant baculoviruses', 'recombinant baculoviruses cell', 'transfected infectious dna', 'ul9 control cytomegalovirus', 'cells infected baculovirus', 'variants recombinant baculoviruses', 'generate recombinant baculoviruses', 'recombinant virus bearing', 'cells infected recombinant', 'mutation potentiating ul9']
Current Micro Average F1@10 Score  0.0
['replication needs herpesvirus', 'needs herpesvirus genes', 'herpesvirus colocalized cdk2', 'disease like herpesviruses', 'cytomegalovirus polyomavirus cdk2', 'disease herpesvirus colocalized', 'herpesvirus genes protect', 'report herpesvirus replication', 'herpesvirus genes', 'target cells herpesvirus']
Current Micro Average F1@10 Score  0.0
['pharyngitis pneumonia common', 'patients causing pneumonia', 'pneumonia tracheobronchitis abscess', 'dogmatis common pathogens', 'pneumonia common manifestation', 'causing pneumonia patients', 'immunoglobulin deficiency frequently', 'septica common pathogens', 'mild form pneumonia', 'pne

['repress lhc genes', 'lhcb4 mrnas reinhardtii', 'reduces lhc mrna', 'lhc mrnas repressed', 'lhc genes redox', 'lhcb4 lhcb5 mrnas', 'repressed lhc mrnas', 'reinhardtii cells photoautotrophically', 'repressing lhc mrnas', 'repress lhc mrnas']
Current Micro Average F1@10 Score  0.0
['808 blood donors', 'retested 400 serum', 'collected 808 donors', 'infections 808 blood', 'million blood donations', 'ghana thirty donors', 'ghanaian blood donation', 'female 762 donors', 'blood donations worldwide', '808 donors labeled']
Current Micro Average F1@10 Score  0.0
['decreased levels dehydroepiandrosterone', 'adrenal insufficiency dehydroepiandrosterone', 'illness dopamine dosage', 'adrenal axis sepsis', 'dehydroepiandrosterone sulphate severe', 'cortisol tumour necrosis', 'adrenal hpa hypofunction', 'critical illness adrenal', 'immunostimulating antiglucocorticoid effects', 'hormones immunostimulating antiglucocorticoid']
Current Micro Average F1@10 Score  0.0
['waxy barley cultivars', 'amylose b

['hisactophilin involved osmoprotection', 'electrophoresis preparation cytoskeletal', 'polymerization vitro cells', 'proteins gel electrophoresis', 'geneticin hyperosmotic shock', 'cells osmosensitive hisactophilin', 'shock hisactophilin immunoprecipitated', 'shock actin phosphorylation', 'stress immunoprecipitated hisactophilin', 'autoradiography fluorescence microscopy']
Current Micro Average F1@10 Score  0.0
['readmissions frequent outpatient', 'twice patients dr', 'frequent outpatient clinic', 'reviewed twice patients', 'saw twice patients', 'physician hospital outpatient', 'patients admitted emergency', 'patients followed outpatients', 'physicians different outpatient', 'outpatient clinic follow']
Current Micro Average F1@10 Score  0.0
['peptostreptococcus sp arcanobacterium', 'actinomyces novel bacterium', 'peptostreptococcus micros peptostreptococcus', 'profile novel bacterium', 'novel bacterium possess', 'sources novel bacterium', 'novel bacterium forms', 'streptococcus sp stra

['reoviruses infect l929', 'coxsackievirus adenovirus receptor', 'jam1 coxsackievirus adenovirus', 'coxsackievirus adenovirus', 'antibodies inhibit reovirus', 'reovirus infection cellular', 'reovirus induced myocarditis', 'apoptosis reovirus infected', 'apoptosis reovirus infection', 't3 reoviruses infect']
Current Micro Average F1@10 Score  0.0
['viruses overexpression atpase', 'containing viruses overexpression', 'monkey virus mpmv', 'viruses contain mutated', 'pfizer monkey virus', 'monkey virus pppy', 'mutation pronounced virus', 'viruses overexpression', 'exchangeable retroviruses viruses', 'virus recovered ultracentrifugation']
Current Micro Average F1@10 Score  0.0
['dideoxynucleotide technology dna', 'technology dna sequencing', 'ehrlichial genomic dna', 'examined dna platys', 'assay platys dna', 'platys dna analyzed', 'amplify platys dna', 'dna sequencing reactions', 'molecular tools phylogenetic', 'genomic dna digested']
Current Micro Average F1@10 Score  0.0
['nutrition harv

['aids vaccine dna', 'vector promising vaccine', 'promising aids vaccine', 'dna vaccines recombinant', 'vaccines recombinant viral', 'aids vaccines inducing', 'vaccine dna priming', 'efficacy aids vaccine', 'macaques vaccinated dna', 'promising vaccine tool']
Current Micro Average F1@10 Score  0.0
['ypt1p gtpgammas recombinant', 'binding ypt1p gtpgammas', 'gtpgammas recombinant', 'gtpgammas presence recombinant', 'gtpgammas uptake ypt1p', 'ypt1p mediated fusion', 'gtpgammas recombinant gst', 'activity ypt1p gtpgammas', 'ypt32p recombinant ypt1p', 'recombinant ypt1p ypt32p']
Current Micro Average F1@10 Score  0.0
['simulated bacteremia detected', 'processed simulated bacteremia', 'bacteremia detected child', 'simulated bacteremia assess', 'simulated bacteremia blood', 'detection simulated bacteremia', 'detecting odontogenic bacteremia', 'milliliter detection bacteremia', 'sample simulated bacteremia', 'estimating bacteremia simulated']
Current Micro Average F1@10 Score  0.0
['apoptosis 

['passaging recombinant viruses', 'recovery recombinant viruses', 'editing paramyxoviruses recombinant', 'compensatory mutation sequencing', 'amplify mrnas dna', 'eggs recombinant viruses', 'recombinant viruses transfections', 'recombinant virus possessing', 'interferon recombinant virus', 'mutation restoring defects']
Current Micro Average F1@10 Score  0.0
['chewing tobacco bacillus', 'brands chewing tobacco', 'chewing tobacco commercially', 'skoal spearmint tobacco', 'chewing tobacco sold', 'chewing tobacco brands', 'tobacco oral mucosa', 'tobacco bacillus species', 'chewing tobacco 106', 'constituents chewing tobacco']
Current Micro Average F1@10 Score  0.0
['rna packaging retroviruses', 'leukemia virus blv', 'viral rna assembling', 'rna packaging deltaretroviruses', 'virus blv deltaretroviruses', 'cell leukemia virus', 'hypothesized viral rna', 'leukemia virus spleen', 'bovine leukemia virus', 'leukemia virus types']
Current Micro Average F1@10 Score  0.0
['cancer women worldwide',

['seed desiccation effect', 'pollination transgenic plants', 'plants pollinated transgenic', 'seed desiccation consequences', 'seed desiccation genetic', 'seed desiccation delays', 'pollinated transgenic plants', 'seed desiccation controlled', 'floral overexpression phenotype', 'seed desiccation affected']
Current Micro Average F1@10 Score  0.0
['search complete genome', 'masterpuretm complete dna', 'purification genomic dna', 'coli extended genomes', 'organism transcriptome genome', 'extended genomes organisms', 'genome wide transcriptome', 'computational approaches genome', 'accurate catalogs genes', 'transcriptome genome array']
Current Micro Average F1@10 Score  0.0
['hepatitis frequently causes', 'spectrometry mutation detection', 'agent hepatitis frequently', 'myeloblastosis virus reverse', 'peptides detect mutations', 'hepatitis mass spectrometry', 'myeloblastosis virus', 'detection hepatitis virus', 'mass spectrometry mutation', 'mutation detection hepatitis']
Current Micro Ave

['counterlabeling antibiotic microliters', 'periplasmic concentration antibiotic', 'antibiotic microliters cells', 'dilutions antibiotics antibiotic', 'concentration inhibiting bacterial', 'antibiotic concentration inoculum', 'antibiotic microliters', 'antibiotic centrifugation pellet', 'antibiotic radiolabeled microliters', 'fluorescent antibiotic enterobacter']
Current Micro Average F1@10 Score  0.0
['protein database addition', 'examining genomes transcriptomes', 'build database proteins', 'database gene ontology', 'files gene ontology', 'gene ontology report', 'probes genes significantly', 'protein annotations analysis', 'annotating genes organisms', 'comprehensive understanding genes']
Current Micro Average F1@10 Score  0.0
['nucleic acids successful', 'strong telomerase inhibitors', 'oligonucleotide concentration remarkable', 'oligonucleotide aptamers potent', 'profiles powerful multivariate', 'powerful multivariate approach', 'nucleic acids allows', 'crystallography nmr spectros

['activation fission yeast', 'fission yeast tup1', 'fission yeast sty1', 'tup12delta mutant induction', 'role fission yeast', 'stress fission yeast', 'salt stress schizosaccharomyces', 'kinase atf1 transcription', 'derepression activation fission', 'transcriptional response oxidative']
Current Micro Average F1@10 Score  0.0
['hydrolysis seed proteins', 'enrichment seed protein', 'carboxylase embryo refix', 'cotyledon embryos cultured', 'experiments cultured embryos', 'hydrolysis seed protein', 'vitro biochemical data', 'reston embryos cultured', 'micrograms protein embryo', 'protein oil biosynthesis']
Current Micro Average F1@10 Score  0.0
['cd45 staining improves', 'reliable cd4 lymphocyte', 'efficient monitoring hiv', 'volumetric cd45 staining', 'cd4 value lymphocytes', 'cd45 antigen lymphocytes', 'scatter cd4 lymphocytes', 'identify lymphocytes cd45', 'cd4 values lymphocytes', 'optimally count blood']
Current Micro Average F1@10 Score  0.0
['rubella virus p150', 'viruses genes robo5

['asymptomatic schoolchildren germany', 'schoolchildren germany demonstrated', 'schoolchildren southern germany', 'schoolchildren germany', 'schoolchildren german nationality', '63 schoolchildren german', 'children born germany', 'immunoblotting infected children', 'infected schoolchildren estonia', 'schoolchildren german']
Current Micro Average F1@10 Score  0.0
['aldosterone induction proteins', 'aldosterone phosphorylation mapk', 'aldosterone activate mapk', 'kinase mapk extracellular', 'activation map kinase', 'aldosterone stimulated mapk', 'aldosterone activating mapk', 'kinase cascade aldosterone', 'aldosterone stimulates mapk', 'phosphorylation exogenous myelin']
Current Micro Average F1@10 Score  0.0
['resistance streptococcus pneumoniae', 'streptococcus pneumoniae haemophilus', 'antimicrobials tested pneumoniae', 'streptococcus pneumoniae', 'levofloxacin resistance pneumoniae', 'resistant pneumoniae ampicillin', 'antibiotic resistance streptococcus', 'penicillin resistant pneum

['streptococcus pneumoniae isolates', 'streptococcus pneumoniae blood', 'streptococcus pneumoniae', 'isolates streptococcus pneumoniae', 'patient counted pneumoniae', 'resistance streptococcus pneumoniae', 'site streptococcus pneumoniae', 'erythromycin resistant pneumoniae', 'pneumoniae tn1207 homologue', 'tn1207 pneumoniae genbank']
Current Micro Average F1@10 Score  0.0
['increased herbicide metabolism', 'phenylurea increased herbicide', 'increase herbicide metabolism', 'increased herbicide tolerance', 'transformants increased herbicide', 'increase herbicide tolerance', 'isoproturon increase herbicide', 'increase herbicide resistance', 'phenylurea herbicides transgenic', 'cases increased herbicide']
Current Micro Average F1@10 Score  0.0
['brandenburg ruptured abdominal', '32 cases salmonella', 'fever patient hypercholesterolemia', 'abscess caused salmonella', 'pilot pain leg', 'ruptured abdominal aorta', 'patients atherosclerosis diabetes', 'typhi salmonella bacteremia', 'negative g

['new antibacterial drugs', 'discovery new antibacterial', 'search new antibiotics', 'new antibiotics', 'new antibiotics carried', 'new antibacterial', 'bacterial growth new', 'methionine bacterial translation', 'synthesizing machineries bacterial', 'new class peptide']
Current Micro Average F1@10 Score  0.0
['genes computational dna', 'reporter plasmids dna', 'dna microarray experiments', 'thermodynamics protein dna', 'identified novel dna', 'university genome sequencing', 'genome determined dna', 'elegans dna microarray', 'computational dna pattern', 'dna sequencing microinjection']
Current Micro Average F1@10 Score  0.0
['salmonella isolates annually', '000 salmonella isolates', 'widespread salmonella serovars', 'bacteria widespread salmonella', 'salmonellosis salmonella serovars', 'receives 000 salmonella', 'years national salmonella', 'global salmonella survey', 'salmonella serovars predominate', 'human salmonellosis salmonella']
Current Micro Average F1@10 Score  0.0
['lymphocyte

['dna isolated tomato', 'tomato genomic dna', 'genomic tomato dna', 'novel tomato rna', 'tomato mrna extracted', 'analysis tomato genomic', 'virp1 mrna tomato', 'tomato hybridized virp1', 'hybridization experiments tomato', 'cloning dna sequencing']
Current Micro Average F1@10 Score  0.0
['trials hypothermia cardiac', 'therapy cardiac arrest', 'cardiac arrest improve', 'medicine studies hypothermia', 'cardiac arrest trials', 'hypothermic therapy cardiac', 'following cardiac arrest', 'hypothermia cardiac arrest', 'treatment cardiac arrest', 'hypothermia treatment cardiac']
Current Micro Average F1@10 Score  0.0
['cytomegalovirus promoter put535', 'encephalitis virus genomes', 'simian immunodeficiency virus', 'bovine immunodeficiency virus', 'virus bovine immunodeficiency', 'srlv hiv vpr', 'plasmid dnas virus', 'hiv3 ccaagcttcgagctattccttc3 cells', 'virus hiv simian', 'feline immunodeficiency virus']
Current Micro Average F1@10 Score  0.0
['antibiotic inhibits synthesis', 'antifungal ant

['enzymes arabidopsis recombinant', 'plant polyadenylation signals', 'ketoglutarate reductase polyadenylation', 'plant polyadenylation sites', 'cdna coding dna', 'mrna hybridization monofunctional', 'arabidopsis recombinant lkr', 'primers amplify dna', 'primers amplified dnas', 'genetic control mrnas']
Current Micro Average F1@10 Score  0.0
['simian immunodeficiency virus', 'simian immunodeficiency viruses', 'immunodeficiency virus mac251', 'feline immunodeficiency virus', 'hiv simian immunodeficiency', 'anti immunoglobulin antibodies', 'immunodeficiency viruses hiv', 'immunodeficiency virus hiv', 'packaging simian immunodeficiency', 'cd56 anti immunoglobulin']
Current Micro Average F1@10 Score  0.0
['sequencing facilitates genome', 'dna cloning strategies', 'cloning sequencing techniques', 'dna blotting experiments', 'detailed genetic map', 'chloroplast dna desired', 'analysis dna cloning', 'cloning techniques genome', 'dna cloned sequenced', 'analysis complete genome']
Current Micro 

['l74v mutation multivariate', 't69d mutation experience', 't69d mutation confers', 'v118i q151m mutations', 'transcriptase human immunodeficiency', 'observed t69d mutation', 'antiretroviral regimens mutations', 'selection t69d mutations', 't69d mutations multi', 'stavudine t69d mutation']
Current Micro Average F1@10 Score  0.0
['collected norwegian sheep', '38 sheep infected', '43 sheep infected', 'sheep infected phagocytophila', 'inoculation study lambs', 'flocks norway sheep', 'sheep flocks norway', 'norwegian sheep breeds', 'investigated norway lambs', 'phagocytophila infected lambs']
Current Micro Average F1@10 Score  0.0
['genotyping sampled sheep', 'sheep postmortem analysis', 'biopsy samples sheep', 'flocks infected sheep', 'neurologic disease sheep', 'sheep tested wyoming', 'asymptomatic sheep diagnosis', 'flocks sheep wyoming', 'samples collected sheep', 'infected sheep lambing']
Current Micro Average F1@10 Score  0.0
['year genomic investigators', 'decade human genome', 'mon

['haemophilus influenzae tested', 'tested multistep azithromycin', 'haemophilus influenzae amoxicillin', 'influenzae tested multistep', 'mutants selected amoxicillin', 'selected azithromycin mutations', 'mutants selected clarithromycin', '10 strains tested', 'lactam antibiotics tested', '10 influenzae strains']
Current Micro Average F1@10 Score  0.0
['patients proven pneumonia', 'radiographically confirmed pneumonia', 'confirmed legionella pneumonia', 'legionella pneumonia included', 'ld acute pneumonia', 'pneumonia showed radiological', 'pneumonia fulfilled epidemiological', 'patients diagnosed urinary', 'legionella urinary antigen', 'studies urinary antigen']
Current Micro Average F1@10 Score  0.0
['nonspecific antibodies inhibited', 'opsonophagocytosis assay antibodies', 'samples children antibody', 'specificity antibodies inhibited', 'antibodies samples children', 'antibody binding inhibited', 'antibodies inhibited type', 'methods measuring antibodies', 'probable antibodies removed

['carcinogenesis suggested prolonged', 'cervical carcinogenesis vitamin', 'cervical squamous carcinoma', 'dysplasias immortalized keratinocytes', 'carcinoma cells retinoic', 'cervical dysplasias vivo', 'cervical carcinoma cells', 'cervical dysplasias retinoids', 'including cervical cancer', 'differentiation cervical carcinoma']
Current Micro Average F1@10 Score  0.0
['lymphocytes reexposed antigen', 'zdv antiviral therapy', 'zdv lymphocytes previously', 'cells reencounter antigen', 'suppressed zdv antigen', 'cytotoxic lymphocytes previously', 'zdv inhibition antigen', 'antiviral drugs lymphocyte', 'zdv related immunosuppression', 'lymphocyte culture reactions']
Current Micro Average F1@10 Score  0.0
['improve memory drosophila', 'learning fly drosophila', 'memory tests flies', 'conditioning test flies', 'training test flies', 'drosophila extended memory', 'flies learned spatial', 'training test brain', 'trained flies test', 'learning experiment flies']
Current Micro Average F1@10 Score

['cancer research meeting', 'designing cancer therapy', 'advances cancer therapeutics', 'cancer research ultimate', 'cancer therapeutics continually', 'meeting british cancer', 'cancer therapeutics candidate', 'experimental cancer models', 'meeting contributions cancer', 'technology cancer research']
Current Micro Average F1@10 Score  0.0
['iranian breast cancer', 'cancer iranian women', 'women breast cancer', 'breast cancer patients', 'breast cancer iran', 'breast cancer study', 'breast cancer studies', 'breast cancer iranian', 'breast cancer diagnosis', 'breast cancer cases']
Current Micro Average F1@10 Score  0.0
['speechmagic findings tests', 'medicine vocabulary ibm', 'radiology cardiology search', 'training ibm software', 'cardiology search medical', 'speech file transcribed', 'viavoice 98 ibm', 'scored trials clinician', 'philips speechmagic findings', 'samples sample medical']
Current Micro Average F1@10 Score  0.0
['kinase vegf stimulated', 'tyrosine kinase vegf', 'endothelial

['peptides novel antimicrobial', 'novel antimicrobial peptides', 'antibacterial peptides cathelicidin', 'sphingomyelin affinity antimicrobial', 'amphipathic antimicrobial peptides', 'antibacterial peptides diverse', 'peptides possess antibacterial', 'antimicrobial peptides cathelicidins', 'affinity antimicrobial peptides', 'helical antimicrobial peptides']
Current Micro Average F1@10 Score  0.0
['qtbp42 destabilizes cgg', 'dna destabilizing tetraplex', 'telomeric dna destabilizing', 'tetraplex dna destabilization', 'tetraplex dna destabilizing', 'dna destabilization assay', 'destabilization tetraplex dna', 'dna oligonucleotides deoxyoligonucleotides', 'truncation mutations oligonucleotide', 'dna destabilization conditions']
Current Micro Average F1@10 Score  0.0
['immunoblots sera patients', 'leishmaniasis studied immunoblot', 'examined patients antibody', 'detection antibodies patients', 'immunosorbent assay elisa', 'specific antileishmanial antibodies', 'patients mucocutaneous leishm

KeyboardInterrupt: 

In [None]:
sum(f1s)/len(f1s)