In [1]:
from xmen.linkers import default_ensemble
from xmen.data import filter_and_apply_threshold
from xmen.evaluation import evaluate, evaluate_at_k, error_analysis

  from .autonotebook import tqdm as notebook_tqdm


## Load dataset in BigBIO format

In [None]:
# Clone forked repo until loader with SNOMED CT layer is on HF Hub
!git clone -b grascco_snomed git@github.com:phlobo/biomedical.git

In [2]:
import datasets
ds = datasets.load_dataset(
    'biomedical/bigbio/hub/hub_repos/grascco/grascco.py', 
    'grascco_snomed_bigbio_kb',
    data_dir='../gemtex_oncology/annotation/json/'
)['train']

## Remove literals for evaluation

In [4]:
ds_no_literals = ds.map(lambda d: {'entities' : [e for e in d['entities'] if e['type'] != 'Literal']})

Map: 100%|██████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.20 examples/s]


## Predict using xMEN ensemble linker

In [5]:
linker = default_ensemble('xmen_index/index/', k_ngram=10, k_sapbert=10, cuda=False)

In [6]:
predictions = linker.predict_batch(ds_no_literals)

Map: 100%|██████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  1.01 examples/s]


## Evaluation

In [7]:
_ = evaluate_at_k(ds_no_literals, predictions, eval_k=[1,2,3,4])

Recall@1 0.375
Recall@2 0.44642857142857145
Recall@3 0.5
Recall@4 0.5


In [8]:
ea_df = error_analysis(ds_no_literals, predictions, tasks=['nen'])

In [9]:
ea_df

Unnamed: 0,_word_len,_abbrev,gt_start,gt_end,gt_text,gold_type,gold_concept,pred_index,pred_index_score,pred_top,pred_top_score,document_id
0,1,False,215,226,[Verbrennung],Concept,"{'db_name': 'SNOMED CT', 'db_id': '125666000'}",2,1.0,48333001,1.0,Albers.txt
1,1,False,227,229,[1.],Concept,"{'db_name': 'SNOMED CT', 'db_id': '258351006'}",-1,,68085002,0.948133,Albers.txt
2,2,False,232,241,[3. Grades],Concept,"{'db_name': 'SNOMED CT', 'db_id': '258353009'}",1,0.934421,61026006,0.934421,Albers.txt
3,1,False,243,247,[Kopf],Concept,"{'db_name': 'SNOMED CT', 'db_id': '302548004'}",-1,,69536005,1.0,Albers.txt
4,1,False,250,254,[Hals],Concept,"{'db_name': 'SNOMED CT', 'db_id': '49928004'}",-1,,45048000,1.0,Albers.txt
5,1,False,255,257,[5%],Concept,"{'db_name': 'SNOMED CT', 'db_id': '113341005'}",-1,,260304006,0.847881,Albers.txt
6,1,True,260,263,[KOF],Concept,"{'db_name': 'SNOMED CT', 'db_id': '301898006'}",-1,,34763001,0.831992,Albers.txt
7,1,False,267,281,[Handamputation],Concept,"{'db_name': 'SNOMED CT', 'db_id': '46028000'}",0,1.0,46028000,1.0,Albers.txt
8,1,True,282,284,[LI],Concept,"{'db_name': 'SNOMED CT', 'db_id': '7771000'}",-1,,267372009,1.0,Albers.txt
9,1,False,287,292,[Akute],Concept,"{'db_name': 'SNOMED CT', 'db_id': '272118002'}",0,0.917403,272118002,0.917403,Albers.txt
