In [116]:
import pandas as pd
from pprint import pprint
import requests
import yaml

In [117]:
# supp_5.tsv is downloaded from https://docs.google.com/spreadsheets/d/1ZrLgcGYOi5m760LoTJ7BshDC_y8mWKW1OvzKMrWh7vk/edit#gid=1272950057
mappings = pd.read_csv('data/supp_5.tsv', sep='\t')
mappings.head()

Unnamed: 0,PL COMMENTS,Variable / Field Name,Field Label,Nicole and Liz feedback/Relevant standards,HPO ID,HPO Label,Mondo ID,Mondo Label,MAxO ID,MAxO Label,...,Matrix Ranking?,Field Annotation,NIH-Endorsed CDEs,"Link to NIH CDE Repository NIH-endorsed CDEs have been reviewed and approved by an expert panel, and meet established criteria. They are designated with a gold ribbon.","Notes Bundles are indivisible. They are not considered valid and reliable if not used in their entirety, as intended.",Link to NIH CDE Repository - Bundle,"Additional Notes (e.g., present in more than one bundle; related but not exact conditions; etc.)",Related CDE at NIH CDE Repository (not Gold Ribbon),Link to https://cde.nlm.nih.gov/home site,Notes
0,changed to _status field but left annotations ...,auto_other_status,[auto_other_specify],,HP:0002960,Autoimmunity,MONDO:0007179,autoimmune disease,,,...,,,Comorbidity or Underlying Condition Specify Ot...,https://cde.nlm.nih.gov/deView?tinyId=G0hugkm3M,Part of a bundle: Form: Project 5 - Comorbidit...,https://cde.nlm.nih.gov/formView?tinyId=_YC_9QoeC,,Other autoimmune disease text,https://cde.nlm.nih.gov/deView?tinyId=mygR2IK66U,
1,can we put something in the legend about origi...,auto_alopecia_status,Alopecia areata,,HP:0002232,Patchy alopecia,MONDO:0005340,alopecia areata,,,...,,,,,,,,Scarring chronic alopecia PhenX,https://cde.nlm.nih.gov/deView?tinyId=X1pQ5qinjl,
2,,auto_arthropathy_status,Arthropathy,,HP:0001367,Abnormal joint morphology,MONDO:0006816,arthropathy,,,...,,,,,,,,Joints muscles elbows assessment status,https://cde.nlm.nih.gov/deView?tinyId=7kJKwKTTL,
3,,auto_atopic_status,Atopic dermatitis/eczema,,HP:0001047,Atopic dermatitis,MONDO:0004980,atopic eczema,,,...,,,,,,,,Atopic allergy disorder condition diagnosed in...,https://cde.nlm.nih.gov/deView?tinyId=JgLOcN_vFr,https://cde.nlm.nih.gov/cde/search?q=dermatitis
4,,auto_celiac_status,Celiac disease,,HP:0002608,Celiac disease,MONDO:0005130,celiac disease,,,...,,,,,,,,Celiac disease gluten sensitivity diagnosis ev...,https://cde.nlm.nih.gov/deView?tinyId=qieRB4328M,


In [126]:
# rename columns for easier access
split_columns = {
    'Variable / Field Name': 'field_name',
    'Field Label': 'field_label',
    'ICD code': 'ICD_code',
    'HPO ID': 'HPO_ID',
    'HPO Label': 'HPO_label',
    'Mondo ID': 'MONDO_ID',
    'Mondo Label': 'MONDO_label',
    'MAxO ID': 'MAxO_ID',
    'MAxO Label': 'MAxO_label',
    'Related CDE at NIH CDE Repository (not Gold Ribbon)': 'Related_CDE_at_NIH_CDE_Repository'
                 }
mappings = mappings.rename(columns=split_columns)

In [128]:
data = []
for ms in mappings.itertuples():
    for k, v in ms._asdict().items():
        if 'ICD' in k and isinstance(v, str):
            data.append({
                'field_name': ms.field_name,
                'field_label': ms.field_label,
                'Related_CDE_at_NIH_CDE_Repository': ms.Related_CDE_at_NIH_CDE_Repository,
                'ICD_code': v,
                'HPO_ID': ms.HPO_ID,
                'HPO_label': ms.HPO_label,
                'Mondo_ID': ms.MONDO_ID,
                'Mondo_label': ms.MONDO_label,
                'MAxO_ID': ms.MAxO_ID,
                'MAxO_label': ms.MAxO_label
            })

In [129]:
mapping_view = pd.DataFrame(data)
print(mapping_view.shape)
mapping_view.head()

(133, 10)


Unnamed: 0,field_name,field_label,Related_CDE_at_NIH_CDE_Repository,ICD_code,HPO_ID,HPO_label,Mondo_ID,Mondo_label,MAxO_ID,MAxO_label
0,auto_alopecia_status,Alopecia areata,Scarring chronic alopecia PhenX,ICD9:704.01,HP:0002232,Patchy alopecia,MONDO:0005340,alopecia areata,,
1,auto_arthropathy_status,Arthropathy,Joints muscles elbows assessment status,ICD10:M00-M02,HP:0001367,Abnormal joint morphology,MONDO:0006816,arthropathy,,
2,auto_atopic_status,Atopic dermatitis/eczema,Atopic allergy disorder condition diagnosed in...,ICD10:L20.81,HP:0001047,Atopic dermatitis,MONDO:0004980,atopic eczema,,
3,auto_celiac_status,Celiac disease,Celiac disease gluten sensitivity diagnosis ev...,ICD10:K90.0,HP:0002608,Celiac disease,MONDO:0005130,celiac disease,,
4,auto_dermatomyositis_status,Dermatomyositis,,ICD10:M33.90,,,MONDO:0016367,dermatomyositis,,


In [134]:
# get bioportal api key from secrets.yaml
with open('secrets.yaml', 'r') as f:
    secrets = yaml.safe_load(f)

In [135]:
def get_icd_label(code):
    """
    looks up the label for an ICD code using the bioportal api
    :param code: 
    :return: label 
    """
    ontology_id = code.split(":")[0]
    local_id = code.split(":")[1]
    if 'ICD9' in ontology_id:
        ontology_id = 'ICD9CM'
    bp_url = "https://data.bioontology.org" 
    parameters = {
        'apikey': secrets['api_key'],
        'q': local_id,
        'ontologies': [ontology_id]
    
    }
    r = requests.get(url=bp_url+"/search", params=parameters)
    results = r.json()
    if 'collection' in results.keys() and len(results['collection']) != 0:
        for res in results['collection']:
            if res['@id'].split("/")[-1] == local_id:
                return res['prefLabel']
              
    


In [136]:
mapping_view['ICD_label'] = mapping_view['ICD_code'].apply(get_icd_label)

In [137]:
mapping_view.head()

Unnamed: 0,field_name,field_label,Related_CDE_at_NIH_CDE_Repository,ICD_code,HPO_ID,HPO_label,Mondo_ID,Mondo_label,MAxO_ID,MAxO_label,ICD_label
0,auto_alopecia_status,Alopecia areata,Scarring chronic alopecia PhenX,ICD9:704.01,HP:0002232,Patchy alopecia,MONDO:0005340,alopecia areata,,,Alopecia areata
1,auto_arthropathy_status,Arthropathy,Joints muscles elbows assessment status,ICD10:M00-M02,HP:0001367,Abnormal joint morphology,MONDO:0006816,arthropathy,,,
2,auto_atopic_status,Atopic dermatitis/eczema,Atopic allergy disorder condition diagnosed in...,ICD10:L20.81,HP:0001047,Atopic dermatitis,MONDO:0004980,atopic eczema,,,
3,auto_celiac_status,Celiac disease,Celiac disease gluten sensitivity diagnosis ev...,ICD10:K90.0,HP:0002608,Celiac disease,MONDO:0005130,celiac disease,,,Coeliac disease
4,auto_dermatomyositis_status,Dermatomyositis,,ICD10:M33.90,,,MONDO:0016367,dermatomyositis,,,


In [138]:
mapping_view.to_csv('data/CDE_ICD_Ontology_Mappings.tsv', sep='\t', index=False)