# going to analyze the results of logmap matching
- first lets load all the data frames 

In [1]:
import polars as pl
from mapnet.utils import format_mappings, load_known_mappings_df, load_biomappings_df, quality_check_mappings
import obonet
from indra.databases import mesh_client

In [None]:
g = obonet.read_obo(
    "https://raw.githubusercontent.com/DiseaseOntology/"
    "HumanDiseaseOntology/main/src/ontology/HumanDO.obo"
)
maps_path = "/home/buzgalbraith/workspace/mapnet/mapnet/logmap-matcher/output/logmap2_mappings.tsv"
raw_maps = pl.read_csv(
    maps_path,
    separator="\t",
    has_header=False,
    new_columns=["source iri", "target iri", "confidence"],
)
predicted_maps = format_mappings(
    df=raw_maps,
    source_prefix="DOID",
    target_prefix="MESH",
    matching_source="LogMap",
    source_name_func=lambda x: g.nodes[x]["name"],
    target_name_func=lambda x: mesh_client.get_mesh_name(x),
)

known_mappings = load_known_mappings_df(known_mappings_path="/home/buzgalbraith/workspace/mapnet/knownMaps/doid_to_mesh_provided_maps.tsv",
    source_name_func=lambda x: g.nodes[x]["name"],
    target_name_func=lambda x: mesh_client.get_mesh_name(x),)

biomappings_maps = load_biomappings_df(
    source_prefix="DOID",
    target_prefix="MESH",
)
evidence = known_mappings.select(['source identifier', 'target identifier']).vstack(biomappings_maps.select(['source identifier', 'target identifier']))



INFO: [2025-04-14 19:25:03] root - Will decode content from https://raw.githubusercontent.com/DiseaseOntology/HumanDiseaseOntology/main/src/ontology/HumanDO.obo using utf-8 charset.


How many mappings are present directly from the known mappings?

In [21]:
predictions_in_known_maps = predicted_maps.join(known_mappings, on=['source identifier', 'target identifier'], how='inner')
print(f'out of {len(predicted_maps)} total mappings, there are {len(predictions_in_known_maps)} that is {len(predictions_in_known_maps)/len(predicted_maps)}')
predictions_in_known_maps.head()

out of 3115 total mappings, there are 2429 that is 0.7797752808988764


source prefix,source identifier,source name,relation,target prefix,target identifier,target name,type,confidence,source,SrcEntity,TgtEntity,Score
str,str,str,str,str,str,str,str,f64,str,str,str,str
"""DOID""","""DOID:0001816""","""angiosarcoma""","""skos:exactMatch""","""MESH""","""D006394""","""Hemangiosarcoma""","""semapv:SemanticSimilarityThres…",0.86,"""LogMap""","""http://purl.obolibrary.org/obo…","""http://purl.bioontology.org/on…","""1.0"""
"""DOID""","""DOID:0014667""","""disease of metabolism""","""skos:exactMatch""","""MESH""","""D008659""","""Metabolic Diseases""","""semapv:SemanticSimilarityThres…",0.81,"""LogMap""","""http://purl.obolibrary.org/obo…","""http://purl.bioontology.org/on…","""1.0"""
"""DOID""","""DOID:0040091""","""autoimmune pancreatitis""","""skos:exactMatch""","""MESH""","""D000081012""","""Autoimmune Pancreatitis""","""semapv:SemanticSimilarityThres…",0.77,"""LogMap""","""http://purl.obolibrary.org/obo…","""http://purl.bioontology.org/on…","""1.0"""
"""DOID""","""DOID:0050052""","""Rocky Mountain spotted fever""","""skos:exactMatch""","""MESH""","""D012373""","""Rocky Mountain Spotted Fever""","""semapv:SemanticSimilarityThres…",0.61,"""LogMap""","""http://purl.obolibrary.org/obo…","""http://purl.bioontology.org/on…","""1.0"""
"""DOID""","""DOID:0050061""","""erysipeloid""","""skos:exactMatch""","""MESH""","""D004887""","""Erysipeloid""","""semapv:SemanticSimilarityThres…",0.79,"""LogMap""","""http://purl.obolibrary.org/obo…","""http://purl.bioontology.org/on…","""1.0"""


How many mappings are present directly from biomappings?

In [22]:
predictions_in_biomappings = predicted_maps.join(biomappings_maps, on=['source identifier', 'target identifier'], how='inner')
print(f'out of {len(predicted_maps)} total mappings, there are {len(predictions_in_biomappings)} that is {len(predictions_in_biomappings)/len(predicted_maps)}')
predictions_in_biomappings.head()

out of 3115 total mappings, there are 56 that is 0.017977528089887642


source prefix,source identifier,source name,relation,target prefix,target identifier,target name,type,confidence,source,source prefix_right,source name_right,relation_right,target prefix_right,target name_right,type_right,source_right,prediction_type,prediction_source,prediction_confidence
str,str,str,str,str,str,str,str,f64,str,str,str,str,str,str,str,str,str,str,str
"""DOID""","""DOID:0081082""","""acute myelomonocytic leukemia""","""skos:exactMatch""","""MESH""","""D015479""","""Leukemia, Myelomonocytic, Acut…","""semapv:SemanticSimilarityThres…",0.89,"""LogMap""","""doid""","""acute myelomonocytic leukemia""","""skos:exactMatch""","""mesh""","""Leukemia, Myelomonocytic, Acut…","""semapv:ManualMappingCuration""","""orcid:0000-0001-9439-5346""",,,
"""DOID""","""DOID:9675""","""pulmonary emphysema""","""skos:exactMatch""","""MESH""","""D011656""","""Pulmonary Emphysema""","""semapv:SemanticSimilarityThres…",0.87,"""LogMap""","""doid""","""pulmonary emphysema""","""skos:exactMatch""","""mesh""","""Pulmonary Emphysema""","""semapv:ManualMappingCuration""","""orcid:0000-0001-9439-5346""",,,
"""DOID""","""DOID:0081041""","""B-cell prolymphocytic leukemia""","""skos:exactMatch""","""MESH""","""D054403""","""Leukemia, Prolymphocytic, B-Ce…","""semapv:SemanticSimilarityThres…",0.87,"""LogMap""","""doid""","""B-cell prolymphocytic leukemia""","""skos:exactMatch""","""mesh""","""Leukemia, Prolymphocytic, B-Ce…","""semapv:ManualMappingCuration""","""orcid:0000-0001-9439-5346""",,,
"""DOID""","""DOID:0080940""","""hereditary angioedema type III""","""skos:exactMatch""","""MESH""","""D056828""","""Hereditary Angioedema Type III""","""semapv:SemanticSimilarityThres…",0.73,"""LogMap""","""doid""","""hereditary angioedema type III""","""skos:exactMatch""","""mesh""","""Hereditary Angioedema Type III""","""semapv:ManualMappingCuration""","""orcid:0000-0001-9439-5346""",,,
"""DOID""","""DOID:14453""","""farmer's lung""","""skos:exactMatch""","""MESH""","""D005203""","""Farmer's Lung""","""semapv:SemanticSimilarityThres…",0.96,"""LogMap""","""doid""","""farmer's lung""","""skos:exactMatch""","""mesh""","""Farmer's Lung""","""semapv:ManualMappingCuration""","""orcid:0000-0003-4423-4370""",,,


Now lets look at the novel mappings, here we are defining a novel mapping as a map that involves classes that have no maps to them in the evidence

In [30]:
novel_predictions = predicted_maps.join(evidence, on=['target identifier'], how='anti')
novel_predictions = novel_predictions.join(evidence, on=['source identifier'], how='anti')

print(f'out of {len(predicted_maps)} total mappings, there are {len(novel_predictions)} that is {len(novel_predictions)/len(predicted_maps)}')
novel_predictions.head()

out of 3115 total mappings, there are 223 that is 0.07158908507223113


source prefix,source identifier,source name,relation,target prefix,target identifier,target name,type,confidence,source
str,str,str,str,str,str,str,str,f64,str
"""DOID""","""DOID:12369""","""prolapse of urethra""","""skos:exactMatch""","""MESH""","""D011391""","""Prolapse""","""semapv:SemanticSimilarityThres…",0.54,"""LogMap"""
"""DOID""","""DOID:0110859""","""polycystic kidney disease 2""","""skos:exactMatch""","""MESH""","""D016891""","""Polycystic Kidney, Autosomal D…","""semapv:SemanticSimilarityThres…",0.78,"""LogMap"""
"""DOID""","""DOID:0081075""","""Marsili syndrome""","""skos:exactMatch""","""MESH""","""D000699""","""Pain Insensitivity, Congenital""","""semapv:SemanticSimilarityThres…",0.92,"""LogMap"""
"""DOID""","""DOID:9771""","""transient neonatal thrombocyto…","""skos:exactMatch""","""MESH""","""D054098""","""Thrombocytopenia, Neonatal All…","""semapv:SemanticSimilarityThres…",0.57,"""LogMap"""
"""DOID""","""DOID:0080522""","""anaplastic thyroid carcinoma""","""skos:exactMatch""","""MESH""","""D065646""","""Thyroid Carcinoma, Anaplastic""","""semapv:SemanticSimilarityThres…",0.81,"""LogMap"""


# next check if any of the target or source identfiers are present in matches from either biomappings or the known mappings. 

In [31]:
targets_mapped_wrong = predicted_maps.join(evidence, on=['target identifier'], how='inner')
targets_mapped_wrong = targets_mapped_wrong.join(evidence, on=['source identifier'], how='anti')

print(f'out of {len(predicted_maps)} total mappings, there are {len(targets_mapped_wrong)} that is {len(targets_mapped_wrong)/len(predicted_maps)}')
targets_mapped_wrong.head()

out of 3115 total mappings, there are 260 that is 0.08346709470304976


source prefix,source identifier,source name,relation,target prefix,target identifier,target name,type,confidence,source,source identifier_right
str,str,str,str,str,str,str,str,f64,str,str
"""DOID""","""DOID:0110921""","""familial hemophagocytic lympho…","""skos:exactMatch""","""MESH""","""D051359""","""Lymphohistiocytosis, Hemophago…","""semapv:SemanticSimilarityThres…",0.67,"""LogMap""","""DOID:0050120"""
"""DOID""","""DOID:0110594""","""primary ciliary dyskinesia 1""","""skos:exactMatch""","""MESH""","""D007619""","""Kartagener Syndrome""","""semapv:SemanticSimilarityThres…",0.66,"""LogMap""","""DOID:0050144"""
"""DOID""","""DOID:0060941""","""interstitial lung disease 1""","""skos:exactMatch""","""MESH""","""D054990""","""Idiopathic Pulmonary Fibrosis""","""semapv:SemanticSimilarityThres…",0.74,"""LogMap""","""DOID:0050156"""
"""DOID""","""DOID:0050179""","""Powassan encephalitis""","""skos:exactMatch""","""MESH""","""D004675""","""Encephalitis, Tick-Borne""","""semapv:SemanticSimilarityThres…",0.54,"""LogMap""","""DOID:0050175"""
"""DOID""","""DOID:0110081""","""arrhythmogenic right ventricul…","""skos:exactMatch""","""MESH""","""D019571""","""Arrhythmogenic Right Ventricul…","""semapv:SemanticSimilarityThres…",0.85,"""LogMap""","""DOID:0050431"""


In [32]:
sources_mapped_wrong = predicted_maps.join(evidence, on=['source identifier'], how='inner')
sources_mapped_wrong = sources_mapped_wrong.join(evidence, on=['target identifier'], how='anti')

print(f'out of {len(predicted_maps)} total mappings, there are {len(sources_mapped_wrong)} that is {len(sources_mapped_wrong)/len(predicted_maps)}')
sources_mapped_wrong.head()

out of 3115 total mappings, there are 168 that is 0.05393258426966292


source prefix,source identifier,source name,relation,target prefix,target identifier,target name,type,confidence,source,target identifier_right
str,str,str,str,str,str,str,str,f64,str,str
"""DOID""","""DOID:0050428""","""nonepidermolytic palmoplantar …","""skos:exactMatch""","""MESH""","""D015776""","""Keratoderma, Palmoplantar, Dif…","""semapv:SemanticSimilarityThres…",0.71,"""LogMap""","""C563422"""
"""DOID""","""DOID:0050445""","""X-linked hypophosphatemic rick…","""skos:exactMatch""","""MESH""","""D063730""","""Rickets, Hypophosphatemic""","""semapv:SemanticSimilarityThres…",0.89,"""LogMap""","""D053098"""
"""DOID""","""DOID:0050597""","""intestinal schistosomiasis""","""skos:exactMatch""","""MESH""","""D012555""","""Schistosomiasis mansoni""","""semapv:SemanticSimilarityThres…",0.82,"""LogMap""","""D012554"""
"""DOID""","""DOID:0050773""","""paraganglioma""","""skos:exactMatch""","""MESH""","""D010236""","""Paraganglioma, Extra-Adrenal""","""semapv:SemanticSimilarityThres…",0.71,"""LogMap""","""D010235"""
"""DOID""","""DOID:0050835""","""generalized dystonia""","""skos:exactMatch""","""MESH""","""D020821""","""Dystonic Disorders""","""semapv:SemanticSimilarityThres…",0.75,"""LogMap""","""C538001"""


In [33]:
known_mappings

SrcEntity,TgtEntity,Score,source identifier,target identifier
str,str,str,str,str
"""http://purl.obolibrary.org/obo…","""http://purl.bioontology.org/on…","""1.0""","""DOID:0001816""","""D006394"""
"""http://purl.obolibrary.org/obo…","""http://purl.bioontology.org/on…","""1.0""","""DOID:0014667""","""D008659"""
"""http://purl.obolibrary.org/obo…","""http://purl.bioontology.org/on…","""1.0""","""DOID:0040091""","""D000081012"""
"""http://purl.obolibrary.org/obo…","""http://purl.bioontology.org/on…","""1.0""","""DOID:0050052""","""D012373"""
"""http://purl.obolibrary.org/obo…","""http://purl.bioontology.org/on…","""1.0""","""DOID:0050061""","""D004887"""
…,…,…,…,…
"""http://purl.obolibrary.org/obo…","""http://purl.bioontology.org/on…","""1.0""","""DOID:998""","""D016603"""
"""http://purl.obolibrary.org/obo…","""http://purl.bioontology.org/on…","""1.0""","""DOID:9986""","""C537131"""
"""http://purl.obolibrary.org/obo…","""http://purl.bioontology.org/on…","""1.0""","""DOID:9988""","""D009494"""
"""http://purl.obolibrary.org/obo…","""http://purl.bioontology.org/on…","""1.0""","""DOID:999""","""D004802"""
