#### Check, if a metabolite is in a libsbml model

Note: takes way to much time -> maybe still include it in the toolbox, it is already programmed ....

In [None]:
from refinegems.curation.polish import get_set_of_curies

# @TODO make it a bit more suffisticated
def getAnnotationDict_libsbml(entity):
    try: 
        for cvterm in entity.getCVTerms():
            current_uris = [cvterm.getResourceURI(i) for i in range(cvterm.getNumResources())]
            return get_set_of_curies(current_uris)[0]
    except Exception as e:
        return None
    
def hasAnnotation_libmodel(id, idtype, entitytype, libmodel):
    match entitytype:
        case 'reaction':
            entitylist = libmodel.getListOfReactions()
        case 'species':
            entitylist = libmodel.getListOfSpecies()
        case _:
            mes = f'Unknown entity type: {entitytype}'
            raise ValueError(mes)
        
    found = []
    for r in entitylist:
        annots = getAnnotationDict_libsbml(r)
        if annots and idtype in annots.keys() and id in annots[idtype]:
            found.append(r.getId())
    return found 


### Slower than the implemented stuff in kegg.py

In [None]:
from Bio import KEGG
import re 

# @NEW : better version of the KEGG parser in get_locus_ec
def bioservices_parse_KEGG_gene(locus_tag):
    
    gene_info = dict()
    gene_info['orgid:locus'] = locus_tag
    
    # retireve KEGG gene entry 
    k = KEGG()
    gene_entry = k.parse(k.get(locus_tag))
    
    # skip, if no entry found
    if not gene_entry:
        gene_info['ec-code'] = None
        return gene_info
    
    # extract orthology and ec-code
    if 'ORTHOLOGY' in gene_entry.keys():
        # gett KEGG orthology ID
        kegg_orthology = list(gene_entry['ORTHOLOGY'].keys())
        gene_info['kegg.orthology'] = kegg_orthology
        # get EC number
        ec_numbers = [re.search('(?<=EC:).*(?=\])',_).group(0) for _ in gene_entry['ORTHOLOGY'].values() if re.search('(?<=EC:).*(?=\])',_)]
        if isinstance(ec_numbers,list) and len(ec_numbers) > 0:
            gene_info['ec-code'] = [ec for ec_str in ec_numbers for ec in ec_str.split(' ')]
            
    if not 'ec-code' in gene_info.keys():
        gene_info['ec-code'] = None

    # get more information about connections to other databases
    if 'DBLINKS' in gene_entry.keys():
        for dbname, ids in gene_entry['DBLINKS'].items():
            conform_dbname = re.sub(pattern='(NCBI)(.*)(ID$)', repl='\\1\\2',string=dbname) # Remove ID if NCBI in name
            conform_dbname = re.sub('[^\w]','',conform_dbname) # remove special signs except underscore
            conform_dbname = conform_dbname.lower() # make lower case
            gene_info[conform_dbname] = ids

    return gene_info

# @NEW : better version of the KEGG parser in get_locus_ec_kegg
def bioservices_parse_KEGG_ec(ec):
    
    ec_info = dict()
    ec_info['ec-code'] = ec
    
    # retrieve KEGG entry
    k = KEGG()
    ec_entry = k.parse(k.get(ec))
    
    # retrieve reaction information from entry 
    if 'ALL_REAC' in ec_entry.keys():
        ec_info['id'] = [_.rstrip(';') for _ in ec_entry['ALL_REAC'] ]
        if '(other)' in ec_info['id']:
            ec_info['id'].remove('(other)')
    else:
        ec_info['id'] = None
        
    # retrieve reaction equation
    if not ec_info['id'] and 'REACTION' in ec_entry.keys():
        ec_info['equation'] = [" ".join(_) for _ in ec_entry['REACTION']]
    else:
        ec_info['equation'] = None
    
    # retrieve database links from entry
    refs = dict()
    if 'ORTHOLOGY' in ec_entry.keys():
        refs['kegg.orthology'] = list(ec_entry['ORTHOLOGY'].keys())
    if 'PATHWAY' in ec_entry.keys():
        refs['kegg.pathway'] = list(ec_entry['PATHWAY'].keys())
    # @TODO extend as needed
    if 'DBLINKS' in ec_entry.keys():
        for dbname, ids in ec_entry['DBLINKS'].items():
            if 'BRENDA' in dbname:
                refs['brenda'] = ids
            if 'CAS' == dbname:
                refs['cas'] = ids
    ec_info['reference'] = refs
    
    return ec_info

