In [None]:
import pandas as pd
import xmltodict
from tqdm import tqdm
from IPython.display import display, HTML


def runProcess():
    print('--> Computing data ...')

    ## SELECT ACTIVES ENTITIES
    xml_data_cross = getData('../xml/en_product1.xml')
    actives_entities_cross = getActivesEntities(xml_data_cross)

    ## SELECT DISORDERS
    disorders_cross = getDisorders(actives_entities_cross)
    n_disorders_cross = len(disorders_cross)

    ## SELECT ENTITIES WITH PREFERENTIAL PARENT
    xml_data_linear = getData('../xml/en_product7.xml')
    dict_preferential_parents_entities = getEntitiesWithPreferentialParent(xml_data_linear)
    ## FILTER DISORDERS WITH PREFERENTIAL PARENT
    dict_preferential_parents, dict_filtered_preferential_parents = filterDisorderWithPreferentialParent(disorders_cross, dict_preferential_parents_entities)
    dict_results = generateMatrixResults(dict_preferential_parents, dict_filtered_preferential_parents)
    myDataframe = pd.DataFrame(dict_results)
    with pd.ExcelWriter('../output_tables/DisordersByPreferentialParent.xlsx', engine='xlsxwriter') as writer:
        myDataframe.to_excel(writer,index=False)
    display(HTML(myDataframe.to_html(index=False)))


def getData(xmlfile):
    """
    Read an xml return a dict with xmltodict package
    :return: xml parsed as dict
    """
    with open(xmlfile, "r", encoding='ISO-8859-1') as ini:
        xml_dict = xmltodict.parse(ini.read())
    return xml_dict


def getActivesEntities(xml_data):
    """
    filter out inactive entities highlighting flag 8192
    :param xml_data:
    :return:
    """
    actives_entities = []
    for entity in tqdm(xml_data["JDBOR"]["DisorderList"]["Disorder"]):
        DisorderFlagList = entity['DisorderFlagList']['DisorderFlag']
        if isinstance(DisorderFlagList, dict):
            if DisorderFlagList['Value'] != '8192':
                actives_entities.append(entity)
        else:
            DisorderFlags = []
            for DisorderFlag in DisorderFlagList:
                DisorderFlags.append(DisorderFlag['Value'])
            if not '8192' in DisorderFlags:
                actives_entities.append(entity)
    return actives_entities


def getDisorders(actives_entities):
    """
    keeping only disorders (with flag 36547)
    :param actives_entities:
    :return:
    """
    disorders = []
    for entity in tqdm(actives_entities):
        if entity['DisorderGroup']['@id'] == '36547':
            disorders.append(entity)
    return disorders


def getEntitiesWithPreferentialParent(xml_data):
    """
    Filter entities with a preferential parent
    :param disorders:
    :return:
    """
    dict_preferential_parents_entities = {}
    for entity in tqdm(xml_data["JDBOR"]["DisorderList"]["Disorder"]):
        if entity['DisorderDisorderAssociationList']['@count'] != '0':
            DisorderDisorderAssociation = entity['DisorderDisorderAssociationList']['DisorderDisorderAssociation']
            if DisorderDisorderAssociation['DisorderDisorderAssociationType']['Name']['#text'] == "Preferential parent":
                dict_preferential_parents_entities[entity['OrphaCode']] = { 'Preferential parent' : DisorderDisorderAssociation['TargetDisorder']['Name']['#text'],
                                                                   'OrphaCode' : DisorderDisorderAssociation['TargetDisorder']['OrphaCode']
                                                                   }
    return dict_preferential_parents_entities

def filterDisorderWithPreferentialParent(actives_disorders, dict_preferential_parents_entities):
    dict_filtered_preferential_parents = {}
    dict_preferential_parents = {}
    for disorder in tqdm(actives_disorders):
        try:
            preferential_parent_label = dict_preferential_parents_entities[disorder['OrphaCode']]['Preferential parent']
            preferential_parent_orpha = dict_preferential_parents_entities[disorder['OrphaCode']]['OrphaCode']
            if not preferential_parent_orpha in dict_filtered_preferential_parents:
                dict_filtered_preferential_parents[preferential_parent_orpha] = [disorder['OrphaCode']]
            else:
                dict_filtered_preferential_parents[preferential_parent_orpha].append(disorder['OrphaCode'])
            dict_preferential_parents[preferential_parent_orpha] = preferential_parent_label
        except:
            pass
    return dict_preferential_parents, dict_filtered_preferential_parents

def generateMatrixResults(dict_preferential_parents, dict_filtered_preferential_parents):
    dict_results = {}
    dict_results['Preferential parent'] = []
    dict_results['Disorders'] = []
    for preferential_parent_orpha , list_orpha_disorders  in tqdm(dict_filtered_preferential_parents.items()):
        dict_results['Preferential parent'].append(dict_preferential_parents[preferential_parent_orpha])
        dict_results['Disorders'].append(len(list_orpha_disorders))
    return dict_results

if __name__ == '__main__':
    runProcess()