# Drug Name Normalization
In this notebook, we evaluate our normalizer with three drug name gold standards.

In [1]:
import sys
sys.path.append('../../')

In [2]:
from preon.normalization import PrecisionOncologyNormalizer
from preon.drug import load_ebi_drugs, load_charite_drug_goldstandard, load_database_drug_goldstandard, load_ctg_drug_goldstandard
from preon.tests.utils import f1_score

Let's first load the reference drug names from EBI and fit the normalizer.

In [3]:
drug_names, chembl_ids = load_ebi_drugs()
normalizer = PrecisionOncologyNormalizer().fit(drug_names, chembl_ids)

Now, we can evaluate it using the provided provided gold standards.

In [4]:
goldstandards = [
    ("charite", load_charite_drug_goldstandard),
    ("database", load_database_drug_goldstandard),
    ("ctg", load_ctg_drug_goldstandard)
]

In [5]:
for dataset_name, load_dataset in goldstandards:
    drug_names, chembl_ids = load_dataset()
    df_eval = normalizer.evaluate(drug_names, chembl_ids)
    print(f"{dataset_name}: f1_score={f1_score(df_eval)}")

charite: f1_score=0.9144050104384134
database: f1_score=0.9496402877697843
ctg: f1_score=0.9397590361445782
