# Cancer Type Normalization
In this notebook, we evaluate our normalizer with two cancer type gold standards.

In [1]:
import sys
sys.path.append('../../')

In [2]:
from preon.normalization import PrecisionOncologyNormalizer
from preon.cancer import load_do_cancers, load_do_flat_mapping, apply_do_flat_mapping_to_ontology, apply_do_flat_mapping_to_goldstandard, \
    load_database_cancer_goldstandard, load_ncbi_cancer_goldstandard
from preon.tests.utils import f1_score

Let's first load the reference cancer types from Disease Ontology and fit the normalizer.

In [3]:
cancer_types, doids = load_do_cancers()

# reduce the cancer type hierachy to just two levels
do_flat_mapping = load_do_flat_mapping()
cancer_types, doids = apply_do_flat_mapping_to_ontology(cancer_types, doids, do_flat_mapping)

normalizer = PrecisionOncologyNormalizer().fit(cancer_types, doids)

Now, we can evaluate it using the provided provided gold standards.

In [4]:
goldstandards = [
    ("database", load_database_cancer_goldstandard),
    ("ncbi", load_ncbi_cancer_goldstandard)
]

In [5]:
for dataset_name, load_dataset in goldstandards:
    cancer_types, doids = load_dataset()
    
    # reduce cancer type hierachy in gold standard as well
    cancer_types, doids = apply_do_flat_mapping_to_goldstandard(cancer_types, doids, do_flat_mapping)
    
    df_eval = normalizer.evaluate(cancer_types, doids, n_grams=3)
    print(f"{dataset_name}: f1_score={f1_score(df_eval)}")

database: f1_score=0.934131736526946
ncbi: f1_score=0.8208955223880596
