In [None]:
import pandas as pd
import xmltodict
from tqdm import tqdm
from IPython.display import display, HTML
import configuration

def runProcess():
    print('--> Computing data ...')

    ## SELECT ACTIVES ENTITIES
    xml_data = getData('../xml/en_product1.xml')
    n_full_entities, actives_entities, inactives_entities = getActivesEntities(xml_data)
    ## COUNT NOMENCLATURE
    dict_results_full, dict_results_details = countEntities(n_full_entities,inactives_entities,actives_entities)

    myDataframe_full = pd.DataFrame(dict_results_full)
    with pd.ExcelWriter('../output_tables/CountNomenclatureFull.xlsx', engine='xlsxwriter') as writer_full:
        myDataframe_full.to_excel(writer_full,index=False)
    display(HTML(myDataframe_full.to_html(index=False)))
    
    myDataframe_details = pd.DataFrame(dict_results_details)
    with pd.ExcelWriter('../output_tables/CountNomenclatureDetails.xlsx', engine='xlsxwriter') as writer_details:
        myDataframe_details.to_excel(writer_details,index=False)
    display(HTML(myDataframe_details.to_html(index=False)))

def getData(xmlfile):
    """
    Read an xml return a dict with xmltodict package
    :return: xml parsed as dict
    """
    with open(xmlfile, "r", encoding='ISO-8859-1') as ini:
        xml_dict = xmltodict.parse(ini.read())
    return xml_dict


def getActivesEntities(xml_data):
    """
    filter out inactive entities highlighting flag 8192
    :param xml_data:
    :return:
    """
    n_full_entities = 0
    inactives_entities = []
    actives_entities = []
    for entity in tqdm(xml_data["JDBOR"]["DisorderList"]["Disorder"]):
        n_full_entities +=1
        DisorderFlagList = entity['DisorderFlagList']['DisorderFlag']
        if isinstance(DisorderFlagList, dict):
            if DisorderFlagList['Value'] != '8192':
                actives_entities.append(entity)
            else:
                inactives_entities.append(entity)
        else:
            DisorderFlags = []
            for DisorderFlag in DisorderFlagList:
                DisorderFlags.append(DisorderFlag['Value'])
            if not '8192' in DisorderFlags:
                actives_entities.append(entity)
            else:
                inactives_entities.append(entity)
    return n_full_entities, actives_entities,inactives_entities



def countEntities(n_entities, inactives_entities, actives_entities):
    """
    keeping only disorders (with flag 36547)
    :param actives_entities:
    :return:
    """
    groups_of_disorders = []
    categories = []
    clinical_groups = []
    disorders = []
    biological_anomalies = []
    clinical_syndromes = []
    diseases = []
    malformation_syndromes = []
    morphological_anomalies = []
    particular_clinical_situations_in_a_disease_or_syndrome_anomalies = []
    subtypes_of_disorders = []
    clinical_subtype_of_disorders = []
    etiological_subtype_of_disorders = []
    histopathological_subtype_of_disorders = []
                                                                                                                      
    for entity in tqdm(actives_entities):
        if entity['DisorderGroup']['@id'] == '36540':
            groups_of_disorders.append(entity)
            if entity['DisorderType']['@id'] == '36561':
                categories.append(entity)
            elif entity['DisorderType']['@id'] == '21436':
                clinical_groups.append(entity)
            
        elif entity['DisorderGroup']['@id'] == '36547':
            disorders.append(entity)
            if entity['DisorderType']['@id'] == '21408':
                biological_anomalies.append(entity)
            elif entity['DisorderType']['@id'] == '21422':
                clinical_syndromes.append(entity)
            elif entity['DisorderType']['@id'] == '21394':
                diseases.append(entity)
            elif entity['DisorderType']['@id'] == '21401':
                malformation_syndromes.append(entity)
            elif entity['DisorderType']['@id'] == '21415':
                morphological_anomalies.append(entity)
            elif entity['DisorderType']['@id'] == '21429':
                particular_clinical_situations_in_a_disease_or_syndrome_anomalies.append(entity)
                                          
        elif entity['DisorderGroup']['@id'] == '36554':
            subtypes_of_disorders.append(entity)
            if entity['DisorderType']['@id'] == '21450':
                clinical_subtype_of_disorders.append(entity)
            elif entity['DisorderType']['@id'] == '21443':
                etiological_subtype_of_disorders.append(entity)
            elif entity['DisorderType']['@id'] == '21457':
                histopathological_subtype_of_disorders.append(entity)
                
    n_inactives = len(inactives_entities)
    n_actives = len(actives_entities)
    percent_inactives = str(round(((n_inactives * 100) / n_entities), 2))
    percent_actives = str(round(((n_actives * 100) / n_entities), 2))
    
    n_groups_of_disorders = len(groups_of_disorders)
    n_categories = len(categories)
    n_clinical_groups = len(clinical_groups)
    percent_groups_of_disorders = str(round(((n_groups_of_disorders * 100) / n_actives), 2))
    percent_categories = str(round(((n_categories * 100) / n_actives), 2))
    percent_clinical_groups = str(round(((n_clinical_groups * 100) / n_actives), 2))
    
    n_disorders = len(disorders)
    n_biological_anomalies = len(biological_anomalies)
    n_clinical_syndromes = len(clinical_syndromes)
    n_diseases = len(diseases)
    n_malformation_syndromes = len(malformation_syndromes)
    n_morphological_anomalies = len(morphological_anomalies)
    n_particular_clinical_situations_in_a_disease_or_syndrome_anomalies = len(particular_clinical_situations_in_a_disease_or_syndrome_anomalies)
    percent_disorders = str(round(((n_disorders * 100) / n_actives), 2))
    percent_biological_anomalies = str(round(((n_biological_anomalies * 100) / n_actives), 2))
    percent_clinical_syndromes = str(round(((n_clinical_syndromes * 100) / n_actives), 2))
    percent_diseases = str(round(((n_diseases * 100) / n_actives), 2))
    percent_malformation_syndromes = str(round(((n_malformation_syndromes * 100) / n_actives), 2))
    percent_morphological_anomalies = str(round(((n_morphological_anomalies * 100) / n_actives), 2))
    percent_particular_clinical_situations_in_a_disease_or_syndrome_anomalies = str(round(((n_particular_clinical_situations_in_a_disease_or_syndrome_anomalies * 100) / n_actives), 2))  
    
    n_subtypes_of_disorders = len(subtypes_of_disorders)
    n_clinical_subtype_of_disorders = len(clinical_subtype_of_disorders)
    n_etiological_subtype_of_disorders = len(etiological_subtype_of_disorders)
    n_histopathological_subtype_of_disorders = len(histopathological_subtype_of_disorders)
    percent_subtypes_of_disorders = str(round(((n_subtypes_of_disorders * 100) / n_actives), 2))
    percent_clinical_subtype_of_disorders = str(round(((n_clinical_subtype_of_disorders * 100) / n_actives), 2))
    percent_etiological_subtype_of_disorders = str(round(((n_etiological_subtype_of_disorders * 100) / n_actives), 2))
    percent_histopathological_subtype_of_disorders = str(round(((n_histopathological_subtype_of_disorders * 100) / n_actives), 2))

    dict_results_full = {}
    dict_results_full['Nomenclature'] = ['Orphanet clinical entities',
                                         'Orphanet actives clinical entities',
                                         'Orphanet inactives clinical entities'
                                        ]

    dict_results_full['Amount'] = [n_entities ,
                                   n_actives,
                                   n_inactives
                                  ]
    dict_results_full['%'] = ['-',
                              percent_actives,
                              percent_inactives
                             ]
    
    
    dict_results_details = {}
    dict_results_details['Nomenclature'] = ['Orphanet actives group of disorders',
                                            'Orphanet actives categories',
                                            'Orphanet actives clinical groups',                                            
                                            'Orphanet actives disorders',
                                            'Orphanet actives biological anomalies',
                                            'Orphanet actives clinical syndromes',
                                            'Orphanet actives diseases',
                                            'Orphanet actives malformation syndromes',
                                            'Orphanet actives morphological anomalies',
                                            'Orphanet actives particular clinical situations in a disease or syndrome anomalies',
                                            'Orphanet actives subtypes of disorder',
                                            'Orphanet actives clinical subtype of disorders',
                                            'Orphanet actives etiological subtype of disorders',
                                            'Orphanet actives histopathological subtype of disorders'
                                           ]

    dict_results_details['Amount'] = [n_groups_of_disorders,
                                      n_categories,
                                      n_clinical_groups,
                                      n_disorders,
                                      n_biological_anomalies,
                                      n_clinical_syndromes,
                                      n_diseases,
                                      n_malformation_syndromes,
                                      n_morphological_anomalies,
                                      n_particular_clinical_situations_in_a_disease_or_syndrome_anomalies,
                                      n_subtypes_of_disorders,
                                      n_clinical_subtype_of_disorders,
                                      n_etiological_subtype_of_disorders,
                                      n_histopathological_subtype_of_disorders
                                     ]
    
    dict_results_details['%'] = [percent_groups_of_disorders,
                                 percent_categories,
                                 percent_clinical_groups,
                                 percent_disorders,
                                 percent_biological_anomalies,
                                 percent_clinical_syndromes,
                                 percent_diseases,
                                 percent_malformation_syndromes,
                                 percent_morphological_anomalies,
                                 percent_particular_clinical_situations_in_a_disease_or_syndrome_anomalies,
                                 percent_subtypes_of_disorders,
                                 percent_clinical_subtype_of_disorders,
                                 percent_etiological_subtype_of_disorders,
                                 percent_histopathological_subtype_of_disorders
                                ] 
        
    return dict_results_full, dict_results_details


if __name__ == '__main__':
    runProcess()