In [None]:
import pandas as pd
import xmltodict
from tqdm import tqdm
from IPython.display import display, HTML


def runProcess():
    print('--> Computing data ...')

    ## SELECT ACTIVES ENTITIES
    xml_data_cross = getData('../xml/en_product1.xml')
    actives_entities_cross = getActivesEntities(xml_data_cross)
    n_actives_entities_cross = len(actives_entities_cross)

    ## SELECT DISORDERS
    disorders = getDisorders(actives_entities_cross)
    n_disorders = len(disorders)
    percent_disorders = str(round(((n_disorders * 100) / n_actives_entities_cross), 2))

    ### GET STATISTICS MAPPINGS
    n_disorders_non_aligned, percent_disorders_non_aligned, \
    n_disorders_ICD_OMIM_aligned, percent_disorders_ICD_OMIM_aligned, \
    n_disorders_OMIM_aligned, percent_disorders_OMIM_aligned, \
    n_disorders_OMIM_exact_aligned, percent_disorders_OMIM_exact_aligned, \
    n_disorders_ICD10_aligned, percent_disorders_ICD10_aligned, \
    n_disorders_ICD10_exact_aligned, percent_disorders_ICD10_exact_aligned, \
    n_disorders_ICD11_aligned, percent_disorders_ICD11_aligned, \
    n_disorders_ICD11_exact_aligned, percent_disorders_ICD11_exact_aligned \
        = getAlignmentsData(disorders)

    dict_results={}
    dict_results['Alignment'] = []
    dict_results['Amount'] = []
    dict_results['%'] = []

    dict_results['Alignment'].append('Actives Clinical Entities')
    dict_results['Amount'].append(n_actives_entities_cross)
    dict_results['%'].append('-')

    dict_results['Alignment'].append('Actives Disorders')
    dict_results['Amount'].append(n_disorders)
    dict_results['%'].append(percent_disorders)

    dict_results['Alignment'].append('Non Aligned Disorder')
    dict_results['Amount'].append(n_disorders_non_aligned)
    dict_results['%'].append(percent_disorders_non_aligned)

    dict_results['Alignment'].append('Disorders Aligned with at least OMIM or ICD-10')
    dict_results['Amount'].append(n_disorders_ICD_OMIM_aligned)
    dict_results['%'].append(percent_disorders_ICD_OMIM_aligned)

    dict_results['Alignment'].append('Disorders Aligned with at least one OMIM')
    dict_results['Amount'].append(n_disorders_OMIM_aligned)
    dict_results['%'].append(percent_disorders_OMIM_aligned)

    dict_results['Alignment'].append('Disorders Aligned with at least one OMIM by an Exact relationship')
    dict_results['Amount'].append(n_disorders_OMIM_exact_aligned)
    dict_results['%'].append(percent_disorders_OMIM_exact_aligned)

    dict_results['Alignment'].append('Disorders Aligned with at least one ICD-10')
    dict_results['Amount'].append(n_disorders_ICD10_aligned)
    dict_results['%'].append(percent_disorders_ICD10_aligned)

    dict_results['Alignment'].append('Disorders Aligned with at least one ICD-10 by an Exact relationship')
    dict_results['Amount'].append(n_disorders_ICD10_exact_aligned)
    dict_results['%'].append(percent_disorders_ICD10_exact_aligned)

    dict_results['Alignment'].append('Disorders Aligned with at least one ICD-11')
    dict_results['Amount'].append(n_disorders_ICD11_aligned)
    dict_results['%'].append(percent_disorders_ICD11_aligned)

    dict_results['Alignment'].append('Disorders Aligned with at least one ICD-11 by an Exact relationship')
    dict_results['Amount'].append(n_disorders_ICD11_exact_aligned)
    dict_results['%'].append(percent_disorders_ICD11_exact_aligned)

    myDataframe = pd.DataFrame(dict_results)
    with pd.ExcelWriter('../output_tables/GeneticDisorders.xlsx', engine='xlsxwriter') as writer:
        myDataframe.to_excel(writer,index=False)
    display(HTML(myDataframe.to_html(index=False)))

def getData(xmlfile):
    """
    Read an xml return a dict with xmltodict package
    :return: xml parsed as dict
    """
    with open(xmlfile, "r", encoding='ISO-8859-1') as ini:
        xml_dict = xmltodict.parse(ini.read())
    return xml_dict


def getActivesEntities(xml_data):
    """
    filter out inactive entities highlighting flag 8192
    :param xml_data:
    :return:
    """
    actives_entities = []
    for entity in tqdm(xml_data["JDBOR"]["DisorderList"]["Disorder"]):
        DisorderFlagList = entity['DisorderFlagList']['DisorderFlag']
        if isinstance(DisorderFlagList, dict):
            if DisorderFlagList['Value'] != '8192':
                actives_entities.append(entity)
        else:
            DisorderFlags = []
            for DisorderFlag in DisorderFlagList:
                DisorderFlags.append(DisorderFlag['Value'])
            if not '8192' in DisorderFlags:
                actives_entities.append(entity)
    return actives_entities


def getDisorders(actives_entities):
    """
    keeping only disorders (with flag 36547)
    :param actives_entities:
    :return:
    """
    disorders = []
    for entity in actives_entities:
        if entity['DisorderGroup']['@id'] == '36547':
            disorders.append(entity)
    return disorders


def getAlignmentsData(disorders):
    ExternalReferences_Sources = ['ICD-10', 'ICD-11', 'OMIM']
    n_disorders_ICD_OMIM_aligned = 0
    n_disorders_non_aligned = 0
    n_disorders_OMIM_aligned = 0
    n_disorders_OMIM_exact_aligned = 0
    n_disorders_ICD10_aligned = 0
    n_disorders_ICD10_exact_aligned = 0
    n_disorders_ICD11_aligned = 0
    n_disorders_ICD11_exact_aligned = 0

    for disorder in tqdm(disorders):
        flag_aligned = 0
        flag_OMIM_aligned = 0
        flag_OMIM_exact_aligned = 0
        flag_ICD10_aligned = 0
        flag_ICD10_exact_aligned = 0
        flag_ICD11_aligned = 0
        flag_ICD11_exact_aligned = 0

        if disorder['ExternalReferenceList']['@count'] != '0':
            ExternalReferences= disorder['ExternalReferenceList']['ExternalReference']
            if isinstance(ExternalReferences, dict):
                if ExternalReferences['Source'] in ExternalReferences_Sources:
                    n_disorders_ICD_OMIM_aligned +=1
                    if ExternalReferences['Source'] == 'OMIM':
                        n_disorders_OMIM_aligned+=1
                        if ExternalReferences['DisorderMappingRelation']['@id']=='21527':
                            n_disorders_OMIM_exact_aligned += 1
                    elif ExternalReferences['Source'] == 'ICD-10':
                        n_disorders_ICD10_aligned += 1
                        if ExternalReferences['DisorderMappingRelation']['@id'] == '21527':
                            n_disorders_ICD10_exact_aligned += 1
                    elif ExternalReferences['Source'] == 'ICD-11':
                        n_disorders_ICD11_aligned += 1
                        if ExternalReferences['DisorderMappingRelation']['@id'] == '21527':
                            n_disorders_ICD11_exact_aligned += 1
            else:
                for ExternalReference in ExternalReferences:
                    if ExternalReference['Source'] in ExternalReferences_Sources:
                        if flag_aligned == 0:
                            flag_aligned = 1
                            n_disorders_ICD_OMIM_aligned += 1
                        if ExternalReference['Source'] == 'OMIM' and flag_OMIM_aligned == 0:
                            flag_OMIM_aligned = 1
                            n_disorders_OMIM_aligned += 1
                            if ExternalReference['DisorderMappingRelation']['@id']=='21527' and flag_OMIM_exact_aligned == 0:
                                flag_OMIM_exact_aligned +=1
                                n_disorders_OMIM_exact_aligned += 1
                        elif ExternalReference['Source'] == 'ICD-10' and flag_ICD10_aligned == 0:
                            flag_ICD10_aligned = 1
                            n_disorders_ICD10_aligned += 1
                            if ExternalReference['DisorderMappingRelation']['@id']=='21527' and flag_ICD10_exact_aligned == 0:
                                flag_ICD10_exact_aligned +=1
                                n_disorders_ICD10_exact_aligned += 1
                        elif ExternalReference['Source'] == 'ICD-11' and flag_ICD11_aligned == 0:
                            flag_ICD11_aligned = 1
                            n_disorders_ICD11_aligned += 1
                            if ExternalReference['DisorderMappingRelation']['@id']=='21527' and flag_ICD11_exact_aligned == 0:
                                flag_ICD11_exact_aligned +=1
                                n_disorders_ICD11_exact_aligned += 1

        elif disorder['ExternalReferenceList']['@count'] == '0':
            n_disorders_non_aligned += 1
        else:
            pass

    percent_disorders_non_aligned = str(round(((n_disorders_non_aligned * 100) / len(disorders)), 2))
    percent_disorders_ICD_OMIM_aligned = str(round(((n_disorders_ICD_OMIM_aligned*100)/len(disorders)),2))
    percent_disorders_OMIM_aligned = str(round(((n_disorders_OMIM_aligned * 100) / len(disorders)), 2))
    percent_disorders_OMIM_exact_aligned = str(round(((n_disorders_OMIM_exact_aligned * 100) / len(disorders)), 2))
    percent_disorders_ICD10_aligned = str(round(((n_disorders_ICD10_aligned * 100) / len(disorders)), 2))
    percent_disorders_ICD10_exact_aligned = str(round(((n_disorders_ICD10_exact_aligned * 100) / len(disorders)), 2))
    percent_disorders_ICD11_aligned = str(round(((n_disorders_ICD11_aligned * 100) / len(disorders)), 2))
    percent_disorders_ICD11_exact_aligned = str(round(((n_disorders_ICD11_exact_aligned * 100) / len(disorders)), 2))

    return n_disorders_non_aligned, percent_disorders_non_aligned, \
           n_disorders_ICD_OMIM_aligned, percent_disorders_ICD_OMIM_aligned, \
           n_disorders_OMIM_aligned, percent_disorders_OMIM_aligned, \
           n_disorders_OMIM_exact_aligned, percent_disorders_OMIM_exact_aligned, \
           n_disorders_ICD10_aligned, percent_disorders_ICD10_aligned, \
           n_disorders_ICD10_exact_aligned, percent_disorders_ICD10_exact_aligned, \
           n_disorders_ICD11_aligned, percent_disorders_ICD11_aligned, \
           n_disorders_ICD11_exact_aligned, percent_disorders_ICD11_exact_aligned


if __name__ == '__main__':
    runProcess()