In [2]:
import os
import json
import pickle
import random
from collections import defaultdict, Counter

from indra.literature.adeft_tools import universal_extract_text
from indra.databases.hgnc_client import get_hgnc_name, get_hgnc_id

from adeft.discover import AdeftMiner
from adeft.gui import ground_with_gui
from adeft.modeling.label import AdeftLabeler
from adeft.modeling.classify import AdeftClassifier
from adeft.disambiguate import AdeftDisambiguator, load_disambiguator

from indra_db_lite.api import get_entrez_pmids_for_hgnc
from indra_db_lite.api import get_entrez_pmids_for_uniprot
from indra_db_lite.api import get_plaintexts_for_text_ref_ids
from indra_db_lite.api import get_text_ref_ids_for_agent_text
from indra_db_lite.api import get_text_ref_ids_for_pmids


from adeft_indra.grounding import AdeftGrounder
from adeft_indra.s3 import model_to_s3
from adeft_indra.model_building.escape import escape_filename

In [3]:
def get_text_ref_ids_for_entity(ns, id_):
    if ns == 'HGNC':
        pmids = get_entrez_pmids_for_hgnc(id_)
    elif ns == 'UP':
        pmids = get_entrez_pmids_for_uniprot(id_)
    return list(get_text_ref_ids_for_pmids(pmids).values())

In [4]:
adeft_grounder = AdeftGrounder()

In [5]:
shortforms = ['DP', 'DPs']
model_name = ':'.join(sorted(escape_filename(shortform) for shortform in shortforms))
results_path = os.path.abspath(os.path.join('../../', 'results', model_name))

In [6]:
miners = dict()
all_texts = {}
for shortform in shortforms:
    text_ref_ids = get_text_ref_ids_for_agent_text(shortform)
    content = get_plaintexts_for_text_ref_ids(text_ref_ids, contains=shortforms)
    text_dict = content.flatten()
    miners[shortform] = AdeftMiner(shortform)
    miners[shortform].process_texts(text_dict.values())
    all_texts.update(text_dict)

longform_dict = {}
for shortform in shortforms:
    longforms = miners[shortform].get_longforms()
    longforms = [(longform, count, score) for longform, count, score in longforms
                 if count*score > 2]
    longform_dict[shortform] = longforms
    
combined_longforms = Counter()
for longform_rows in longform_dict.values():
    combined_longforms.update({longform: count for longform, count, score
                               in longform_rows})
grounding_map = {}
names = {}
for longform in combined_longforms:
    groundings = adeft_grounder.ground(longform)
    if groundings:
        grounding = groundings[0]['grounding']
        grounding_map[longform] = grounding
        names[grounding] = groundings[0]['name']
longforms, counts = zip(*combined_longforms.most_common())
pos_labels = []

In [7]:
list(zip(longforms, counts))

[('double positive', 347),
 ('dermal papilla', 119),
 ('degree of polymerization', 86),
 ('desmoplakin', 81),
 ('dipyridamole', 58),
 ('disability pension', 50),
 ('dihydroartemisinin piperaquine', 42),
 ('dipyridyl', 36),
 ('dietary patterns', 27),
 ('d penicillamine', 26),
 ('depotentiation', 21),
 ('dental pulp', 20),
 ('diabetic prone', 18),
 ('depersonalization', 18),
 ('distal pancreatectomy', 16),
 ('disease progression', 11),
 ('dendropanoxide', 11),
 ('dracorhodin perchlorate', 10),
 ('dermatophagoides pteronyssinus', 10),
 ('digital pathology', 10),
 ('d prostanoid receptor', 10),
 ('delayed puberty', 9),
 ('dynamic programming', 9),
 ('dried plum', 9),
 ('double product', 9),
 ('development pressure', 8),
 ('disopyramide', 8),
 ('degree of polymerisation', 8),
 ('dexamethasone palmitate', 7),
 ('donepezil', 7),
 ('dysregulation profile', 6),
 ('dirichlet process', 6),
 ('dimerization partner', 6),
 ('date pits', 6),
 ('deformational plagiocephaly', 6),
 ('developmental proso

In [8]:
grounding_map, names, pos_labels = ground_with_gui(longforms, counts, 
                                                   grounding_map=grounding_map,
                                                   names=names, pos_labels=pos_labels, no_browser=True, port=8890)

In [9]:
result = [grounding_map, names, pos_labels]

In [10]:
result

[{'d panthenol': 'CHEBI:CHEBI:27373',
  'd penicillamine': 'CHEBI:CHEBI:7959',
  'd prostanoid receptor': 'ungrounded',
  'd pteronyssinus': 'ungrounded',
  'd type prostanoid': 'HGNC:9591',
  'dandelion polysaccharide': 'ungrounded',
  'dantonic pill': 'ungrounded',
  'date pits': 'ungrounded',
  'days present': 'ungrounded',
  'deafferentation pain': 'MESH:D010146',
  'death to preservation': 'ungrounded',
  'dechlorane plus': 'MESH:C526360',
  'decomposition product': 'ungrounded',
  'decoy peptide': 'CHEBI:CHEBI:16670',
  'deep potential': 'ungrounded',
  'deep pyramidal': 'ungrounded',
  'deformational plagiocephaly': 'MESH:D049068',
  'degradation products': 'ungrounded',
  'degree of polymerisation': 'MESH:D058105',
  'degree of polymerization': 'MESH:D058105',
  'delayed pollination': 'ungrounded',
  'delayed puberty': 'MESH:D011628',
  'deliberate practice': 'ungrounded',
  'delusional parasitosis': 'ungrounded',
  'denatured protein': 'ungrounded',
  'dendropanoxide': 'MESH:C

In [11]:
grounding_map, names, pos_labels = [{'d panthenol': 'CHEBI:CHEBI:27373',
  'd penicillamine': 'CHEBI:CHEBI:7959',
  'd prostanoid receptor': 'ungrounded',
  'd pteronyssinus': 'ungrounded',
  'd type prostanoid': 'HGNC:9591',
  'dandelion polysaccharide': 'ungrounded',
  'dantonic pill': 'ungrounded',
  'date pits': 'ungrounded',
  'days present': 'ungrounded',
  'deafferentation pain': 'MESH:D010146',
  'death to preservation': 'ungrounded',
  'dechlorane plus': 'MESH:C526360',
  'decomposition product': 'ungrounded',
  'decoy peptide': 'CHEBI:CHEBI:16670',
  'deep potential': 'ungrounded',
  'deep pyramidal': 'ungrounded',
  'deformational plagiocephaly': 'MESH:D049068',
  'degradation products': 'ungrounded',
  'degree of polymerisation': 'MESH:D058105',
  'degree of polymerization': 'MESH:D058105',
  'delayed pollination': 'ungrounded',
  'delayed puberty': 'MESH:D011628',
  'deliberate practice': 'ungrounded',
  'delusional parasitosis': 'ungrounded',
  'denatured protein': 'ungrounded',
  'dendropanoxide': 'MESH:C546793',
  'dental plaque': 'ungrounded',
  'dental prophylaxis': 'MESH:D003777',
  'dental pulp': 'MESH:D003782',
  'depersonalisation': 'ungrounded',
  'depersonalization': 'MESH:D003861',
  'deposition precipitation': 'ungrounded',
  'depotentiation': 'ungrounded',
  'depression patients': 'ungrounded',
  'deproteinated polysaccharide': 'CHEBI:CHEBI:18154',
  'depsipeptide': 'CHEBI:CHEBI:23643',
  'dermal papilla': 'UBERON:UBERON:0000412',
  'dermatofibrosarcoma protuberans': 'MESH:D018223',
  'dermatophagoides pteronissinus': 'ungrounded',
  'dermatophagoides pteronyssinus': 'MESH:D040002',
  'dermatopontin': 'HGNC:3011',
  'desalted duck egg white peptides': 'ungrounded',
  'desmoplakin': 'HGNC:3052',
  'deuteroporphyrin': 'MESH:C028839',
  'development pressure': 'ungrounded',
  'developmental prosopagnosia': 'MESH:D020238',
  'dexamethasone palmitate': 'CHEBI:CHEBI:31471',
  'diabetic placebo': 'ungrounded',
  'diabetic prone': 'ungrounded',
  'diabetic rats treated with placebo': 'ungrounded',
  'diaphragm pacing': 'ungrounded',
  'diastolic pressure': 'EFO:0006336',
  'dietary patterns': 'ungrounded',
  'differentiation phosphorylation': 'ungrounded',
  'differentiation privacy': 'ungrounded',
  'digital panoramic': 'ungrounded',
  'digital pathology': 'ungrounded',
  'dihydroartemisinin piperaquine': 'CHEBI:CHEBI:91231',
  'dimerization partner': 'ungrounded',
  'dipeptidase': 'ungrounded',
  'diphenyl': 'MESH:C010574',
  'diphosphate': 'CHEBI:CHEBI:18361',
  'dipole potential': 'ungrounded',
  'dipyridamole': 'CHEBI:CHEBI:4653',
  'dipyridyl': 'CHEBI:CHEBI:30351',
  'direct programming': 'ungrounded',
  'dirichlet process': 'ungrounded',
  'dirigent protein': 'ungrounded',
  'disability pension': 'MESH:D010415',
  'disc proper': 'ungrounded',
  'discharge planning': 'ungrounded',
  'discriminatory power': 'ungrounded',
  'disease progression': 'ungrounded',
  'disgust propensity': 'ungrounded',
  'disopyramide': 'CHEBI:CHEBI:4657',
  'distal pancreatectomy': 'MESH:D010180',
  'distal phase': 'ungrounded',
  'distal protection': 'ungrounded',
  'docetaxel cisplatin': 'CHEBI:CHEBI:27899',
  'dominant positive': 'ungrounded',
  'dominant power': 'ungrounded',
  'donepezil': 'CHEBI:CHEBI:53289',
  'dopamine nanoparticle': 'ungrounded',
  'dorsal prostate': 'MESH:D011467',
  'dorsopeduncular': 'ungrounded',
  'double pancake': 'ungrounded',
  'double perovskite': 'MESH:C059910',
  'double poling': 'ungrounded',
  'double population': 'ungrounded',
  'double positive': 'CL:CL:0000809',
  'double precipitation': 'ungrounded',
  'double prioritized': 'ungrounded',
  'double product': 'ungrounded',
  'dracorhodin perchlorate': 'PUBCHEM:74787691',
  'dried period': 'ungrounded',
  'dried plum': 'ungrounded',
  'dried polar': 'ungrounded',
  'driving pressure': 'ungrounded',
  'drug product': 'ungrounded',
  'during pregnancy': 'ungrounded',
  'dynamic programming': 'ungrounded',
  'dysregulation profile': 'ungrounded',
  'polymerization degree': 'ungrounded',
  'prostaglandin d2 receptor': 'HGNC:9591'},
 {'CHEBI:CHEBI:27373': 'pantothenol',
  'CHEBI:CHEBI:7959': 'D-penicillamine',
  'HGNC:9591': 'PTGDR',
  'MESH:D010146': 'Pain',
  'MESH:C526360': 'dechlorane plus',
  'CHEBI:CHEBI:16670': 'peptide',
  'MESH:D049068': 'Plagiocephaly, Nonsynostotic',
  'MESH:D058105': 'Polymerization',
  'MESH:D011628': 'Puberty, Delayed',
  'MESH:C546793': 'dendropanoxide',
  'MESH:D003777': 'Dental Prophylaxis',
  'MESH:D003782': 'Dental Pulp',
  'MESH:D003861': 'Depersonalization',
  'CHEBI:CHEBI:18154': 'polysaccharide',
  'CHEBI:CHEBI:23643': 'depsipeptide',
  'UBERON:UBERON:0000412': 'dermal papilla',
  'MESH:D018223': 'Dermatofibrosarcoma',
  'MESH:D040002': 'Dermatophagoides pteronyssinus',
  'HGNC:3011': 'DPT',
  'HGNC:3052': 'DSP',
  'MESH:C028839': 'deuteroporphyrin-IX',
  'MESH:D020238': 'Prosopagnosia',
  'CHEBI:CHEBI:31471': 'Dexamethasone palmitate',
  'EFO:0006336': 'diastolic blood pressure',
  'CHEBI:CHEBI:91231': 'piperaquine',
  'MESH:C010574': 'diphenyl',
  'CHEBI:CHEBI:18361': 'diphosphate(4-)',
  'CHEBI:CHEBI:4653': 'dipyridamole',
  'CHEBI:CHEBI:30351': "2,2'-bipyridine",
  'MESH:D010415': 'Pensions',
  'CHEBI:CHEBI:4657': 'disopyramide',
  'MESH:D010180': 'Pancreatectomy',
  'CHEBI:CHEBI:27899': 'cisplatin',
  'CHEBI:CHEBI:53289': 'donepezil',
  'MESH:D011467': 'Prostate',
  'MESH:C059910': 'perovskite',
  'CL:CL:0000809': 'double-positive, alpha-beta thymocyte',
  'PUBCHEM:74787691': 'Dracorhodin perchlorate'},
 ['CHEBI:CHEBI:30351',
  'CHEBI:CHEBI:4653',
  'CHEBI:CHEBI:7959',
  'CHEBI:CHEBI:91231',
  'CL:CL:0000809',
  'HGNC:3052',
  'UBERON:UBERON:0000412']]

In [12]:
excluded_longforms = []

In [13]:
grounding_dict = {shortform: {longform: grounding_map[longform] 
                              for longform, _, _ in longforms if longform in grounding_map
                              and longform not in excluded_longforms}
                  for shortform, longforms in longform_dict.items()}
result = [grounding_dict, names, pos_labels]

if not os.path.exists(results_path):
    os.mkdir(results_path)
with open(os.path.join(results_path, f'{model_name}_preliminary_grounding_info.json'), 'w') as f:
    json.dump(result, f)

In [14]:
additional_entities = {}

In [15]:
unambiguous_agent_texts = {}

In [16]:
labeler = AdeftLabeler(grounding_dict)
corpus = labeler.build_from_texts(
    (text, text_ref_id) for text_ref_id, text in all_texts.items()
)
agent_text_text_ref_id_map = defaultdict(list)
for text, label, id_ in corpus:
    agent_text_text_ref_id_map[label].append(id_)

entity_text_ref_id_map = {
    entity: set(
        get_text_ref_ids_for_entity(*entity.split(':', maxsplit=1))
    )
    for entity in additional_entities
}

In [17]:
intersection1 = []
for entity1, trids1 in entity_text_ref_id_map.items():
    for entity2, trids2 in entity_text_ref_id_map.items():
        intersection1.append((entity1, entity2, len(trids1 & trids2)))

In [18]:
intersection2 = []
for entity1, trids1 in agent_text_text_ref_id_map.items():
    for entity2, pmids2 in entity_text_ref_id_map.items():
        intersection2.append((entity1, entity2, len(set(trids1) & trids2)))

In [19]:
intersection1

[]

In [20]:
intersection2

[]

In [21]:
all_used_trids = set()
for entity, agent_texts in unambiguous_agent_texts.items():
    used_trids = set()
    for agent_text in agent_texts[1]:
        trids = set(get_text_ref_ids_for_agent_text(agent_text))
        new_trids = list(trids - all_texts.keys() - used_trids)
        content = get_plaintexts_for_text_ref_ids(new_trids, contains=agent_texts[1])
        text_dict = content.flatten()
        corpus.extend(
            [
                (text, entity, trid) for trid, text in text_dict.items() if len(text) >= 5
            ]
        )
        used_trids.update(new_trids)
    all_used_trids.update(used_trids)
        
for entity, trids in entity_text_ref_id_map.items():
    new_trids = list(set(trids) - all_texts.keys() - all_used_trids)
    _, contains = additional_entities[entity]
    content = get_plaintexts_for_text_ref_ids(new_trids, contains=contains)
    text_dict = content.flatten()
    corpus.extend(
        [
            (text, entity, trid) for trid, text in text_dict.items() if len(text) >= 5
        ]
    )

In [22]:
names.update({key: value[0] for key, value in additional_entities.items()})
names.update({key: value[0] for key, value in unambiguous_agent_texts.items()})
pos_labels = list(set(pos_labels) | additional_entities.keys() |
                  unambiguous_agent_texts.keys())

In [23]:
%%capture

classifier = AdeftClassifier(shortforms, pos_labels=pos_labels, random_state=1729)
param_grid = {'C': [100.0], 'max_features': [10000]}
texts, labels, pmids = zip(*corpus)
classifier.cv(texts, labels, param_grid, cv=5, n_jobs=5)

INFO: [2021-10-06 15:58:35] /adeft/Py/adeft/adeft/modeling/classify.py - Beginning grid search in parameter space:
{'C': [100.0], 'max_features': [10000]}
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(


INFO: [2021-10-06 15:59:19] /adeft/Py/adeft/adeft/modeling/classify.py - Best f1 score of 0.9281930999977457 found for parameter values:
{'logit__C': 100.0, 'tfidf__max_features': 10000}


In [24]:
classifier.stats

{'label_distribution': {'ungrounded': 209,
  'CL:CL:0000809': 266,
  'MESH:D003782': 15,
  'UBERON:UBERON:0000412': 87,
  'CHEBI:CHEBI:16670': 2,
  'HGNC:3052': 62,
  'MESH:D020238': 5,
  'CHEBI:CHEBI:91231': 21,
  'CHEBI:CHEBI:18361': 4,
  'MESH:D003861': 14,
  'CHEBI:CHEBI:18154': 1,
  'EFO:0006336': 7,
  'CHEBI:CHEBI:4653': 51,
  'MESH:D010415': 17,
  'MESH:D010180': 11,
  'MESH:C059910': 2,
  'CHEBI:CHEBI:30351': 25,
  'MESH:D040002': 9,
  'MESH:D011467': 2,
  'MESH:D058105': 67,
  'CHEBI:CHEBI:31471': 6,
  'MESH:C010574': 1,
  'CHEBI:CHEBI:27899': 2,
  'CHEBI:CHEBI:7959': 22,
  'MESH:C028839': 3,
  'MESH:D003777': 1,
  'MESH:C546793': 4,
  'HGNC:3011': 4,
  'HGNC:9591': 8,
  'MESH:D018223': 2,
  'PUBCHEM:74787691': 7,
  'MESH:D011628': 4,
  'CHEBI:CHEBI:4657': 8,
  'CHEBI:CHEBI:23643': 3,
  'MESH:D010146': 1,
  'MESH:D049068': 4,
  'CHEBI:CHEBI:53289': 4,
  'CHEBI:CHEBI:27373': 1,
  'MESH:C526360': 5},
 'f1': {'mean': 0.928193, 'std': 0.0137},
 'precision': {'mean': 0.935745, 'std

In [25]:
disamb = AdeftDisambiguator(classifier, grounding_dict, names)

In [26]:
disamb.dump(model_name, results_path)

In [27]:
print(disamb.info())

Disambiguation model for DP, and DPs

Produces the disambiguations:
	2,2'-bipyridine*	CHEBI:CHEBI:30351
	D-penicillamine*	CHEBI:CHEBI:7959
	DPT	HGNC:3011
	DSP*	HGNC:3052
	Dental Prophylaxis	MESH:D003777
	Dental Pulp	MESH:D003782
	Depersonalization	MESH:D003861
	Dermatofibrosarcoma	MESH:D018223
	Dermatophagoides pteronyssinus	MESH:D040002
	Dexamethasone palmitate	CHEBI:CHEBI:31471
	Dracorhodin perchlorate	PUBCHEM:74787691
	PTGDR	HGNC:9591
	Pain	MESH:D010146
	Pancreatectomy	MESH:D010180
	Pensions	MESH:D010415
	Plagiocephaly, Nonsynostotic	MESH:D049068
	Polymerization	MESH:D058105
	Prosopagnosia	MESH:D020238
	Prostate	MESH:D011467
	Puberty, Delayed	MESH:D011628
	cisplatin	CHEBI:CHEBI:27899
	dechlorane plus	MESH:C526360
	dendropanoxide	MESH:C546793
	depsipeptide	CHEBI:CHEBI:23643
	dermal papilla*	UBERON:UBERON:0000412
	deuteroporphyrin-IX	MESH:C028839
	diastolic blood pressure	EFO:0006336
	diphenyl	MESH:C010574
	diphosphate(4-)	CHEBI:CHEBI:18361
	dipyridamole*	CHEBI:CHEBI:4653
	disopyramid

In [29]:
model_to_s3(disamb)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [26]:
from adeft.disambiguate import load_disambiguator

In [27]:
disamb = load_disambiguator("BAL")

In [28]:
disamb

<adeft.disambiguate.AdeftDisambiguator at 0x7f4f001b33a0>

In [29]:
print(_28.info())

Disambiguation model for BAL

Produces the disambiguations:
	Bronchoalveolar Lavage	MESH:D018893
	CEL*	HGNC:1848
	Liver, Artificial	MESH:D019164
	benzaldehyde lyase*	MESH:C059416
	betaine aldehyde*	CHEBI:CHEBI:15710
	dimercaprol*	CHEBI:CHEBI:64198

Class level metrics:
--------------------
Grounding             	Count	F1     
Bronchoalveolar Lavage	1259	 0.9929
                   CEL*	  36	    1.0
     Liver, Artificial	  18	0.83619
            Ungrounded	  17	   0.65
           dimercaprol*	   8	    0.4
    benzaldehyde lyase*	   3	    0.2
      betaine aldehyde*	   2	    0.2

Global Metrics:
-----------------
	F1 score:	0.90773
	Precision:	1.0
	Recall:		0.83293

* Positive labels
See Docstring for explanation



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
