In [1]:
import pandas as pd

rxns = pd.read_csv('/workspaces/ECFERS/src/frenda_brenda/Files/KEGG_Filtered/Reactions_M3_plusCustom.csv')

## 1. Retrieve coordinates from KEGG
### 1a. Get the (x,y) coordinates for each species in the KEGG map -->

In [2]:
orthology_ids = {'R00315': ['K00925'],
 'R01353': ['K00925', 'K00932', 'K19697'],
 'R00235': ['K01895', 'K01913'],
 'R00236': ['K01895', 'K01913'],
 'R00316': ['K01895', 'K01913'],
 'R00925': ['K01895', 'K01908'],
 'R00742': ['K01946',
  'K01961',
  'K01962',
  'K01963',
  'K01964',
  'K02160',
  'K11262',
  'K11263',
  'K15036',
  'K15037',
  'K18472',
  'K18603',
  'K18604',
  'K18605',
  'K19312',
  'K22568'],
 'R01324': ['K01681', 'K01682', 'K27802'],
 'R01325': ['K01681', 'K01682', 'K27802'],
 'R01900': ['K01681', 'K01682', 'K27802'],
 'R00754': ['K00001',
  'K00121',
  'K04022',
  'K04072',
  'K13951',
  'K13952',
  'K13953',
  'K13954',
  'K13980',
  'K18857'],
 'R02124': ['K00001',
  'K00121',
  'K11149',
  'K11154',
  'K13369',
  'K13951',
  'K13952',
  'K13953',
  'K13980',
  'K15734'],
 'R04880': ['K00001',
  'K00121',
  'K04072',
  'K13951',
  'K13952',
  'K13953',
  'K13954',
  'K13980',
  'K18857'],
 'R05233': ['K00001', 'K00121', 'K04072', 'K13953', 'K13954'],
 'R05234': ['K00001', 'K00121', 'K04072', 'K13953', 'K13954'],
 'R08557': ['K11440'],
 'R08558': ['K11440'],
 'R00746': ['K00002', 'K12957', 'K13979'],
 'R00352': ['K01648', 'K15230', 'K15231'],
 'R00351': ['K01647', 'K01659', 'K05942', 'K27797'],
 'R03815': ['K00382'],
 'R07618': ['K00382'],
 'R00209': ['K00161', 'K00162', 'K00163', 'K00382', 'K00627'],
 'R01221': ['K00281', 'K00282', 'K00283', 'K00382', 'K00605', 'K02437'],
 'R01933': ['K00382', 'K00658', 'K15791'],
 'R08549': ['K00164', 'K00382', 'K00658', 'K01616'],
 'R00704': ['K03777', 'K03778'],
 'R01082': ['K01675', 'K01676', 'K01677', 'K01678', 'K01679', 'K01774'],
 'R01736': ['K01069'],
 'R00267': ['K00031'],
 'R00268': ['K00031'],
 'R01899': ['K00031'],
 'R00342': ['K00024', 'K00025', 'K00026'],
 'R00214': ['K00027', 'K00028'],
 'R00217': ['K01003'],
 'R00216': ['K00029'],
 'R00230': ['K00625', 'K04020', 'K13788', 'K15024'],
 'R00921': ['K00625', 'K13788', 'K13923', 'K15024'],
 'R00341': ['K01610'],
 'R00345': ['K01595'],
 'R00200': ['K00873', 'K12406'],
 'R01138': ['K00873', 'K12406'],
 'R01858': ['K00873', 'K12406'],
 'R02320': ['K00873', 'K12406'],
 'R00199': ['K01007'],
 'R02164': ['K00233',
  'K00234',
  'K00235',
  'K00236',
  'K00237',
  'K00239',
  'K00240',
  'K00241',
  'K00242',
  'K00244',
  'K00245',
  'K00246',
  'K00247',
  'K18859',
  'K18860',
  'K25801',
  'K25995',
  'K25996'],
 'R00405': ['K01902', 'K01903'],
 'R02404': ['K01902', 'K01903'],
 'R00220': ['K01752', 'K01754', 'K17989'],
 'R00519': ['K00122', 'K00123', 'K00124', 'K00126', 'K00127', 'K22515'],
 'R00344': ['K01958', 'K01959', 'K01960'],
 'R00226': ['K01652', 'K01653', 'K11258'],
 'R08648': ['K01652', 'K01653', 'K11258'],
 'R00945': ['K00600'],
 'R09099': ['K00600'],
 'R00497': ['K01920', 'K21456'],
 'R00371': ['K00639'],
 'R03425': ['K00281', 'K00282', 'K00283'],
 'R00479': ['K01637']}

In [3]:
import xml.etree.ElementTree as ET
from Bio.KEGG.KGML import KGML_parser
from Bio.Graphics.KGML_vis import KGMLCanvas

def map_reaction_to_entry_id(xml_file_path, reaction_orthology_dict):
    # Parse the XML file
    pathway = KGML_parser.read(open(xml_file_path, 'r'))

    # Initialize dictionary to hold the reaction ID -> entry ID mapping
    reaction_entry_map = {}

    # Loop through each entry in the pathway
    for entry in pathway.entries.values():
        # Check if the entry is of type 'ortholog'
        if entry.type == "ortholog":
            # Extract the orthology IDs (split by space, remove 'ko:' prefix)
            orthology_ids_in_entry = entry.name.replace('ko:', '').split()

            # Convert to set for comparison
            orthology_ids_in_entry_set = set(orthology_ids_in_entry)

            # Loop through the reaction dictionary
            for reaction_id, orthology_ids in reaction_orthology_dict.items():
                if len(orthology_ids)==0:
                    continue
                # Convert orthology_ids from the reaction dictionary to a set
                orthology_ids_set = set(orthology_ids)

                # Check if all orthology IDs in the entry are present in orthology_ids_set
                # CHANGING THIS AS AN EXPERIMENT: CHECK IF ANY OF THE ORTHOLOGY IDS IN THE ENTRY ARE PRESENT IN ORTHOLOGY_IDS_SET
                # if not orthology_ids_set.isdisjoint(orthology_ids_in_entry_set):
                # if orthology_ids_set.issubset(orthology_ids_in_entry_set):
                if not orthology_ids_set.isdisjoint(orthology_ids_in_entry_set):
                    # Add the mapping to the dictionary
                    if reaction_id not in reaction_entry_map:
                        reaction_entry_map[reaction_id] = []
                    reaction_entry_map[reaction_id].append(entry.id)  # Save the entry ID

    return reaction_entry_map

In [4]:
xml_file = '/workspaces/ECFERS/ko01100.xml'
reaction_entry_map = map_reaction_to_entry_id(xml_file, orthology_ids)

In [5]:
pathway = KGML_parser.read(open(xml_file, 'r'))
reaction_dict = reaction_entry_map
    
# Initialize dictionary to hold the cpd ID and Reaction ID mapping
cpd_coordinates_map = {}

# Loop through the reaction dictionary
for reaction_id, entry_ids in reaction_dict.items():
    for entry_id in entry_ids:
        # Search for the reaction in the pathway
        reaction_entry = next((reaction for reaction in pathway.reactions if reaction.id == entry_id), None)

        if reaction_entry:
            newrns = []
            for rn in reaction_entry._names:
                newrns.append(rn.replace('rn:',''))
            if reaction_id in newrns:
                # Collect substrates
                sloop_counter = 0
                for substrate in reaction_entry._substrates:
                    substrate_entry = next((sub for sub in pathway.entries.values() if sub.id == substrate), None)
                    x, y = substrate_entry.graphics[0].x, substrate_entry.graphics[0].y
                    
                    rxnentry = str(reaction_entry)
                    # Split the string into lines and find the line containing 'Substrates'
                    substrates_line = next(line for line in rxnentry.splitlines() if 'Substrates:' in line)
                    # Extract the part of the line after 'Substrates:'
                    substrates_part = substrates_line.split('Substrates: ')[1]
                    # Split the substrates by comma and strip the 'cpd:' prefix
                    cpd_id = [substrate.strip().replace('cpd:', '') for substrate in substrates_part.split(',')][sloop_counter]
                    sloop_counter += 1

                    cpd_coordinates_map[f"{cpd_id}_{reaction_id}_{entry_id}"] = (x, y)

                # Collect products
                ploop_counter = 0
                for product in reaction_entry._products:
                    product_entry = next((prod for prod in pathway.entries.values() if prod.id == product), None)
                    x, y = product_entry.graphics[0].x, product_entry.graphics[0].y
                    
                    rxnentry = str(reaction_entry)
                    # Split the string into lines and find the line containing 'Substrates'
                    products_line = next(line for line in rxnentry.splitlines() if 'Products:' in line)
                    # Extract the part of the line after 'Substrates:'
                    products_part = products_line.split('Products: ')[1]
                    # Split the substrates by comma and strip the 'cpd:' prefix
                    cpd_id = [product.strip().replace('cpd:', '') for product in products_part.split(',')][ploop_counter]
                    ploop_counter += 1

                    cpd_coordinates_map[f"{cpd_id}_{reaction_id}_{entry_id}"] = (x, y)

In [6]:
data = pd.read_csv('/workspaces/ECFERS/kegg_labels_add.csv',dtype='str',encoding='us-ascii',encoding_errors='ignore')
data = data.where(data.notnull(), None)

In [7]:
# Create a new dictionary with replaced keys
cpd_coordinates_map_tran = {}
for key, value in cpd_coordinates_map.items():
    new_key = key
    # Check for each "KEGG ID" in the key string
    for _, row in data.iterrows():
        kegg_id = row['KEGG ID']
        id_value = row['ID']
        # Replace any occurrence of the "KEGG ID" within the key string
        if kegg_id in new_key:
            new_key = new_key.replace(kegg_id, id_value)
    # Add the modified key-value pair to the new dictionary
    cpd_coordinates_map_tran[new_key] = value

## 2. Apply KEGG coordinates to SBMLNetwork layout

In [8]:
import tellurium as te
import sbmlnetwork

r = te.loada('/workspaces/ECFERS/src/frenda_brenda/Files/KEGG_Filtered/M3a_renamed.txt')
net = sbmlnetwork.load(r.getSBML())

# net = sbmlnetwork.load('/workspaces/ECFERS/src/frenda_brenda/Files/KEGG_Filtered/M3_renamed_fin.sbml')

In [9]:
net.auto_layout(max_num_connected_edges=1000)

In [10]:
df = pd.read_csv('/workspaces/ECFERS/src/frenda_brenda/Files/KEGG_Filtered/Reactions_M3_plusCustom.csv')

net_reactions = net.get_reactions_list()

label_to_reaction_id = dict(zip(df['Label'], df['Reaction ID']))
M3_reaction_labels = net.get_reaction_ids()
M3_reactionIDs = [label_to_reaction_id.get(item, item) for item in M3_reaction_labels]

# label_to_ID_dict = {}
# i = 0
# for label in M3_reaction_labels:
#     label_to_ID_dict[label] = M3_reactionIDs[i]
#     i = i + 1

In [11]:
label_to_ID_dict = {}
i = 0
for net_reaction in net_reactions:
    label_to_ID_dict[net_reaction.get_reaction_id()] = {'id': M3_reactionIDs[i], 'reaction_object': net_reaction}
    i = i+1

In [12]:
# Translate label_to_reaction_id dictionary
reaction_id_to_label = {v: k for k, v in label_to_reaction_id.items()}

# Process cpd_coordinates_map_tran to update reaction labels
translated_cpd_coordinates = {}
for key, coords in cpd_coordinates_map_tran.items():
    species, reaction, entry_id = key.rsplit("_", 2)
    reaction_label = reaction_id_to_label.get(reaction, reaction)  # Translate if possible
    size = net.get_species(species).get_size()
    updatedcoords = (coords[0]-(size[0]/2), coords[1]-(size[1]/2))

    translated_cpd_coordinates[(species, reaction_label, entry_id)] = updatedcoords

In [13]:
# Organize species mapping
species_mapping = {}
for (species, reaction, entry_id), coords in translated_cpd_coordinates.items():
    species_mapping.setdefault(species, {}).setdefault(reaction, {})[entry_id] = coords

# Organize species mapping
reaction_mapping = {}
for (species, reaction, entry_id), coords in translated_cpd_coordinates.items():
    reaction_mapping.setdefault(reaction, {}).setdefault(species, {})[entry_id] = coords

In [14]:
entry_mapping = {}

for species, reaction, entry_id in translated_cpd_coordinates.keys():
    if entry_id not in entry_mapping:
        entry_mapping[entry_id] = {}
    if reaction not in entry_mapping[entry_id]:
        entry_mapping[entry_id][reaction] = []
    entry_mapping[entry_id][reaction].append(species)

In [15]:
specieslist = net.get_species_list()

# Dictionary to store already set coordinates and their aliases
set_coordinates = {}
reaction_aliases = {}

multipleentryids = []
special_metabs = []

for spc in specieslist:
    try:
        allcoords = species_mapping[spc.get_species_id()]
    except KeyError:
        spc.hide()
        continue

    # Extract all coordinate values
    coordinates = {coord for subdict in allcoords.values() for coord in subdict.values()}

    # Check if all coordinates are the same
    all_same = len(coordinates) == 1

    if all_same:  # SETTING COORDINATES FOR ALL SPECIES WHICH ONLY HAVE ONE POSITION
        coordinate = next(iter(coordinates))
        if coordinate not in set_coordinates:
            spc.set_position(coordinate)
            set_coordinates[coordinate] = spc.get_species_id()  # Store the species ID associated with the coordinate
        else:
            # Species has already been set for this coordinate, no further action needed
            pass
    else:  # appears in more than one position, will need to make an alias
        special_metabs.append(spc.get_species_id())

        multiple_entries = False

        for reaction, entry_coords in allcoords.items():
            if len(entry_coords) > 1:
                multiple_entries = True

        if len(allcoords) == 1:
            # this means there is only one reaction, but >1 entry ID for that reaction. need to make an alias for the REACTION here
            multipleentryids.append(reaction)
        else:
            # multiple reactions
            if multiple_entries:
                # at least one of the reactions contains >1 entry ID
                multipleentryids.append(reaction)
            else:
                print(spc)
                print(allcoords)

Species(id=Pyr, index=0)
{'R346': {'1556': (2369.0, 1695.0)}, 'R406': {'4482': (2369.0, 1695.0)}, 'R347': {'3083': (2369.0, 1695.0)}, 'R350': {'3083': (2369.0, 1695.0)}, 'R181': {'1304': (2369.0, 1695.0)}, 'R62': {'2209': (2369.0, 1695.0)}, 'R63': {'2205': (2369.0, 1695.0)}, 'R270': {'2409': (2079.0, 3087.0)}, 'R272': {'2409': (2079.0, 3087.0)}, 'R273': {'3008': (2369.0, 1695.0)}, 'R345': {'3008': (2369.0, 1695.0)}, 'R351': {'3083': (2369.0, 1695.0)}}


In [16]:
duplicatereactions = []
for rxn in multipleentryids:
    for v in reaction_mapping[rxn].values():
        if(len(v)) > 1:
            duplicatereactions.append(rxn)

In [17]:
duplicatereactions = list(set(duplicatereactions))

In [18]:
for metabolite, entry_coords in reaction_mapping['R64'].items():
    entry_ids = list(entry_coords.keys())  # Get keys as a list

id_index_assignments = {}

In [19]:
# making alias reactions and setting coordinates for R64

for spc in net.get_reaction('R64').get_species_list():
    if spc.get_species_id() in reaction_mapping['R64'].keys():
        # print(f'setting position of {spc} with entry ID {entry_ids[0]} to {reaction_mapping['R64'][spc.get_species_id()][entry_ids[0]]}')
        spc.set_position(reaction_mapping['R64'][spc.get_species_id()][entry_ids[0]])
        id_index_assignments[entry_ids[0]] = 0
    else:
        print(f'hiding {spc}')
        spc.hide()

aliasreaction = net.get_reaction('R64').create_alias()

for spc in aliasreaction.get_species_list():
    if spc.get_species_id() in reaction_mapping['R64'].keys():
        if spc.get_species_id() == 'AcCoA':
            aliasreaction.assign_species(net.get_species_list('AcCoA')[0])
        else:
            # print(f'setting position of {spc} with entry ID {entry_ids[1]} to {reaction_mapping['R64'][spc.get_species_id()][entry_ids[1]]}')
            spc.set_position(reaction_mapping['R64'][spc.get_species_id()][entry_ids[1]])
            id_index_assignments[entry_ids[1]] = 1
    else:
        print(f'hiding {spc}')
        spc.hide()

hiding Species(id=ATP, index=0)
hiding Species(id=CO2, index=0)
hiding Species(id=eEC6412, index=0)
hiding Species(id=ADP, index=0)
hiding Species(id=PO4, index=0)
hiding Species(id=ATP, index=1)
hiding Species(id=CO2, index=1)
hiding Species(id=eEC6412, index=1)
hiding Species(id=ADP, index=1)
hiding Species(id=PO4, index=1)


In [20]:
# SETTING REACTION 227
net.get_reactions_list('R227').get_species_list()[0].hide()
net.get_reactions_list('R227').get_species_list()[1].set_position(reaction_mapping['R227']['Glycine']['7890']) # 7890 is the entry ID for index = 0 reaction
net.get_reactions_list('R227').get_species_list()[2].set_position(reaction_mapping['R227']['_510CH2THF']['7890'])
net.get_reactions_list('R227').get_species_list()[3].hide()
net.get_reactions_list('R227').get_species_list()[4].set_position(reaction_mapping['R227']['Serine']['7890'])
net.get_reactions_list('R227').get_species_list()[5].set_position(reaction_mapping['R227']['THF']['7890'])

alias227 = net.get_reactions_list('R227').create_alias()

alias227[0].assign_species(net.get_reactions_list('R227').get_species_list()[0])
# alias227.get_species_list()[0].hide()
alias227.get_species_list()[1].hide() # 5411 is the entry ID for index = 0 reaction
alias227.get_species_list()[2].set_position(reaction_mapping['R227']['_510CH2THF']['5411'])
alias227.get_species_list()[3].hide()
alias227.get_species_list()[4].hide()
alias227.get_species_list()[5].set_position(reaction_mapping['R227']['THF']['5411'])

True

In [21]:
net.get_reactions_list('R269').get_species_list()

alias_H = net.get_species("H").create_alias(net.get_reactions_list('R269')[0])
alias_H.hide()
net.get_reactions_list('R269').get_species_list()[0].hide() # NAD
net.get_reactions_list('R269').get_species_list()[1].set_position(reaction_mapping['R269']['Mal']['4357']) # 4357 is the entry ID for index = 0 reaction
net.get_reactions_list('R269').get_species_list()[2].hide() # enzyme
net.get_reactions_list('R269').get_species_list()[3].hide() # NADH
net.get_reactions_list('R269').get_species_list()[4].set_position(reaction_mapping['R269']['Oxa']['4357'])
# net.get_reactions_list('R269').get_species_list()[4].hide()
net.get_reactions_list('R269')[0].assign_species(alias_H)

alias269 = net.get_reactions_list('R269').create_alias()

alias269.get_species_list()[0].hide()
alias269.get_species_list()[1].set_position(reaction_mapping['R269']['Mal']['5622']) # 5622 is the entry ID for index = 1 reaction
alias269.get_species_list()[2].hide()
alias269.get_species_list()[3].hide()
alias269.get_species_list()[4].set_position(reaction_mapping['R269']['Oxa']['5622'])
# alias269.get_species_list()[4].hide()
alias269[0].assign_species(alias_H)

True

In [22]:
net.get_reactions_list('R308').get_species_list()

# for entry ID 3007
net.get_reactions_list('R308').get_species_list()[0].hide() # PO4
net.get_reactions_list('R308')[0].assign_species(net.get_reactions_list('R269').get_species_list()[4])
# net.get_reactions_list('R308').get_species_list()[1].set_position(reaction_mapping['R308']['Oxa']['4357']) # assign this reaction to have Oxa set earlier
net.get_reactions_list('R308').get_species_list()[2].hide() # enzyme
net.get_reactions_list('R308').get_species_list()[3].hide() # H2O
net.get_reactions_list('R308').get_species_list()[4].hide()
net.get_reactions_list('R308').get_species_list()[5].set_position(reaction_mapping['R308']['PEP']['3007'])

alias308 = net.get_reactions_list('R308').create_alias()

# for entry ID 2412
alias308.get_species_list()[0].hide()
alias308[0].assign_species(alias269.get_species_list()[4])
# alias308.get_species_list()[1].set_position(reaction_mapping['R269']['Mal']['5622']) # 5622 is the entry ID for index = 1 reaction
alias308.get_species_list()[2].hide()
alias308.get_species_list()[3].hide()
alias308[0].assign_species(net.get_reactions_list('R308').get_species_list()[2])
alias308.get_species_list()[4].set_position(reaction_mapping['R308']['CO2']['2412'])
alias308.get_species_list()[5].set_position(reaction_mapping['R308']['PEP']['2412'])

True

In [23]:
net.get_reactions_list('R307').get_species_list()[0].hide()
net.get_reactions_list('R307')[0].assign_species(net.get_reactions_list('R269').get_species_list()[4])
# net.get_reactions_list('R307').get_species_list()[1].set_position(reaction_mapping['R307']['Oxa']['4357']) # assign this reaction to have Oxa set earlier
net.get_reactions_list('R307').get_species_list()[2].hide()
net.get_reactions_list('R307').get_species_list()[3].hide()
net.get_reactions_list('R307').get_species_list()[4].hide()
net.get_reactions_list('R307')[0].assign_species(net.get_reactions_list('R308').get_species_list()[5])
# net.get_reactions_list('R307').get_species_list()[4].set_position(reaction_mapping['R307']['PEP']['4357'])

alias307 = net.get_reactions_list('R307').create_alias()

alias307.get_species_list()[0].hide()
alias307[0].assign_species(alias269.get_species_list()[4])
# alias307.get_species_list()[1].set_position(reaction_mapping['R269']['Mal']['5622']) # 5622 is the entry ID for index = 1 reaction
alias307.get_species_list()[2].hide()
alias307.get_species_list()[3].hide()
alias307.get_species_list()[4].hide()
alias307[0].assign_species(alias308.get_species_list()[5])
# alias307.get_species_list()[4].set_position(reaction_mapping['R269']['Oxa']['5622'])

True

In [24]:
net.get_species('Pyr').set_position(reaction_mapping['R346']['Pyr']['1556'])

True

In [25]:
# setting for Pyr
j1_reaction = net.get_reaction("R270")
alias_species = net.get_species("Pyr").create_alias(j1_reaction)
alias_species.set_position(reaction_mapping['R270']['Pyr']['2409'])

j2_reaction = net.get_reaction("R272")
j2_reaction.assign_species(alias_species)

True

## Set fluxes before setting reaction curves

In [26]:
# net.show_fluxes(60*60)
# net.fluxes.set_colors(["#AA0000", "#FFCCCC"])

## Grouping biosynthetic modules

In [27]:
pathway_dict = {
    'Glycolysis / Gluconeogenesis': [
        'R347', 'R348', 'R349', 'R350',  # Pyruvate kinase
        'R351',  # Pyruvate, water dikinase
        'R307'   # Phosphoenolpyruvate carboxykinase (atp)
    ],
    'TCA Cycle': [
        'R157',  # Citrate synthase
        'R69', 'R70', 'R71',  # Aconitate hydratase
        'R255', 'R256', 'R257',  # Isocitrate dehydrogenase (nadp+)
        'R258',  # Isocitrate lyase
        'R299', 'R300',  # Oxoglutarate dehydrogenase (succinyl-transferring)
        'R268', 'R269',  # Malate dehydrogenase
        'R270', 'R271', 'R272', 'R273',  # Malate dehydrogenase (oxaloacetate-decarboxylating)
        'R204',  # Fumarate hydratase
        'R371',  # Succinate dehydrogenase
        'R372', 'R373'  # Succinate-coa ligase (adp-forming)
    ],
    'Pyruvate Metabolism & Acetyl-CoA Formation': [
        'R346',  # Pyruvate dehydrogenase (acetyl-transferring)
        'R345',  # Pyruvate carboxylase
        'R56', 'R57',  # Acetate kinase
        'R58', 'R59', 'R60', 'R61',  # Acetate-coa ligase
        'R64',  # Acetyl-coa carboxylase
        'R304', 'R305'  # Phosphate acetyltransferase
    ],
    'Redox & Energy Metabolism': [
        'R83', 'R84', 'R85', 'R86', 'R87', 'R88', 'R89',  # Alcohol dehydrogenase
        'R90',  # Alcohol dehydrogenase (nadp+)
        'R181',  # D-lactate dehydrogenase
        'R197',  # Formate dehydrogenase
        'R169', 'R170', 'R171', 'R172', 'R173', 'R174',  # Dihydrolipoyl dehydrogenase
        'R175',  # Dihydrolipoyllysine-residue acetyltransferase
        'R176', 'R177'  # Dihydrolipoyllysine-residue succinyltransferase
    ],
    'Amino Acid Metabolism': [
        'R406',  # L-serine ammonia-lyase
        'R62', 'R63',  # Acetolactate synthase
        'R227', 'R228',  # Glycine hydroxymethyltransferase
        'R224',  # Glycine c-acetyltransferase
        'R225'  # Glycine dehydrogenase (aminomethyl-transferring)
    ],
    'Cofactor & Detoxification Pathways': [
        'R245',  # Hydroxyacylglutathione hydrolase
        'R219'  # Glutathione synthase
    ],
    'Anaplerotic & Carbon Fixation Pathways': [
        'R123',  # Atp citrate synthase
        'R308'  # Phosphoenolpyruvate carboxylase
    ]
}

In [28]:
# colors = ['red', 'blue', 'orange', 'yellow', 'green', 'grey', 'purple']
# for i, mod in enumerate(pathway_dict.keys()):
#     net.group_reactions(pathway_dict[mod], colors[i])

## Setting reaction curves

In [29]:
from Bio.KEGG.KGML import KGML_parser

xml_file_path = '/workspaces/ECFERS/ko01100.xml'
pathway = KGML_parser.read(open(xml_file_path, 'r'))

In [30]:
net.get_reactions_list().hide()

[True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True,
 True]

In [31]:
# Organize species mapping
reaction_mapping = {}
for (species, reaction, entry_id), coords in translated_cpd_coordinates.items():
    reaction_mapping.setdefault(reaction, {}).setdefault(entry_id, {})[species] = coords

In [32]:
def get_species(entry_mapping, entry_id):
    species = set()  # Use a set to avoid duplicates
    if entry_id in entry_mapping:
        for reaction in entry_mapping[entry_id].values():
            species.update(reaction)  # Add species from each reaction
    return list(species)

In [33]:
def get_unique_coordinates(entry_mapping, species_mapping, entry_id):
    species_list = get_species(entry_mapping, entry_id)  # Get species for entry ID
    coordinates = []  # Use a list to maintain order
    seen = set()  # Track unique coordinates
    
    for species in species_list:
        if species in species_mapping:
            for reaction_data in species_mapping[species].values():
                if entry_id in reaction_data:  # Check if entry ID exists in reaction data
                    coord = reaction_data[entry_id]
                    if coord not in seen:  # Ensure uniqueness while maintaining order
                        coordinates.append(coord)
                        seen.add(coord)
    
    return coordinates, species_list  # Return ordered list of unique coordinates

In [34]:
from scipy.spatial.distance import euclidean

def find_curve_origin(involved_species_coords, starting_point):
    # Compute distances
    distances = [euclidean(starting_point, p) for p in involved_species_coords]

    # Find the closest point
    closest_point = distances.index(min(distances))

    return closest_point

In [35]:
for reaction in reaction_mapping.keys():
    for i, entry_id in enumerate(reaction_mapping[reaction]):

        for graphicsid in range(len(pathway.entries[int(entry_id)].graphics)):

            linecoords = pathway.entries[int(entry_id)].graphics[graphicsid].coords

            rxn = net.get_reactions_list(reaction)[i]

            rxn.set_position(([(linecoords[0][0] + linecoords[-1][0]) / 2, 
                                    (linecoords[0][1] + linecoords[-1][1]) / 2]))
            
            first_point = linecoords[0]
            last_point = linecoords[-1]

            involved_species_coords = get_unique_coordinates(entry_mapping, species_mapping, entry_id)

            starting_index = find_curve_origin(involved_species_coords[0], first_point)
            ending_index = find_curve_origin(involved_species_coords[0], last_point)

            if len(linecoords) == 2:
                middle_point = ([(linecoords[0][0] + linecoords[-1][0]) / 2, 
                                    (linecoords[0][1] + linecoords[-1][1]) / 2])
            else:
                middle_point = linecoords[int(len(linecoords)/2)]

            for j, k in enumerate([starting_index, ending_index]):
                curve = rxn.get_curves_list(rxn.get_species(involved_species_coords[1][k])[0])[0]

                curve.show()

                terminate = False  # Flag to stop processing further coordinates
                current_index = 0

                # Precompute coverage regions for all involved species coordinates
                coverage_regions = [
                    (coord[0] - 18, coord[0] + 18, coord[1] - 18, coord[1] + 18) 
                    for coord in involved_species_coords[0]
                ]

                if j == 1:
                    linecoords.reverse()

                for j in range(len(linecoords) - 1):
                    if terminate:
                        break

                    line_start, line_end = linecoords[j], linecoords[j + 1]

                    if linecoords[j] == middle_point:
                        break

                    for x_min, x_max, y_min, y_max in coverage_regions:
                        # Check if line_end is inside a coverage region, but line_start is not
                        if ((x_min <= line_end[0] <= x_max and y_min <= line_end[1] <= y_max) and
                            not (x_min < line_start[0] < x_max and y_min < line_start[1] < y_max)):

                            # If the last point is not inside any coverage region, add a segment
                            if not any(x_min <= linecoords[-1][0] <= x_max and y_min <= linecoords[-1][1] <= y_max 
                                    for x_min, x_max, y_min, y_max in coverage_regions):
                                
                                curve.add_segment(line_start, line_end, line_start, line_end)
                                break  # Stop checking once a segment is added

                            # Adjust the segment endpoint to stay within coverage bounds
                            adjusted_x, adjusted_y = line_end
                            if line_start[0] < x_min or line_start[0] > x_max:
                                adjusted_x = x_max if line_start[0] >= x_max else x_min
                            if line_start[1] < y_min or line_start[1] > y_max:
                                adjusted_y = y_min if line_start[1] <= y_min else y_max

                            # Remove the first segment if needed
                            if current_index == 0:
                                curve.remove_segment(0)

                            # Add the adjusted segment
                            curve.add_segment(line_start, (adjusted_x, adjusted_y), line_start, (adjusted_x, adjusted_y))
                            terminate = True
                            break  # Stop checking once a segment is added

                    if not terminate:
                        if current_index == 0:
                            curve_segment = curve.get_segment(current_index)
                            curve_segment.set_start(line_start)
                            curve_segment.set_end(line_end)
                            curve_segment.set_control_point_1(line_start)
                            curve_segment.set_control_point_2(line_end)

                            curve.add_segment(line_start, line_end, line_start, line_end)
                        else:
                            curve.add_segment(line_start, line_end, line_start, line_end)

                    current_index += 1


In [37]:
species = net.get_species_list()

for s in species:
    s.set_shape('circle')
    spcname = s.get_species_id()
    for _, row in data.iterrows():
        label = row['Label']
        id_value = row['ID']
        # Replace any occurrence of the "KEGG ID" within the key string
        if spcname == id_value:
            s.set_text(label)
species.move_texts_by((30, 30))

[[True],
 [True],
 [True],
 [True],
 [True, True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [True],
 [Tr

In [38]:
net.draw('/workspaces/ECFERS/example_v3.pdf')

In [39]:
# net.save('/workspaces/ECFERS/example_v3.xml')