In [2]:
import os
os.environ['GRB_LICENSE_FILE'] = '../content/licenses/gurobi.lic'

In [4]:
import tensorflow
from cobra.io import read_sbml_model, write_sbml_model
from dnngior.gapfill_class import Gapfill
from dnngior.NN_Predictor import NN
import pandas as pd
import numpy as np
import cobra

In [4]:
path_to_draft_model = "Draft_models/MAG001.sbml"
draft_model = read_sbml_model(path_to_draft_model)
draft_model.summary()

Metabolite,Reaction,Flux,C-Number,C-Flux

Metabolite,Reaction,Flux,C-Number,C-Flux


**Draft model infomation print**


In [6]:
models_dir = "../models/Draft_models/" 
model_files = sorted([f for f in os.listdir(models_dir) if f.endswith(".sbml")])

for filename in model_files:
    model_path = os.path.join(models_dir, filename)
    try:
        model = cobra.io.read_sbml_model(model_path)
    except Exception as e:
        print(f"Error loading model {filename}: {e}")
        continue

    print(f"\n------ Model: {filename} (ID: {model.id}) ------")
    for metabolite in model.metabolites:
        reacStr = metabolite.name.lower()
        if 'o2' in reacStr:
            print(metabolite.name, reacStr, metabolite.id)
        if 'oxygen' in reacStr:
            print(metabolite.name, reacStr, metabolite.id)



------ Model: MAG033.sbml (ID: MAG033) ------
CO2 [c0] co2 [c0] cpd00011_c0
O2 [e0] o2 [e0] cpd00007_e0
O2 [c0] o2 [c0] cpd00007_c0
CO2 [e0] co2 [e0] cpd00011_e0
H2O2 [c0] h2o2 [c0] cpd00025_c0
Co2+ [c0] co2+ [c0] cpd00149_c0

------ Model: MAG034.sbml (ID: MAG034) ------
CO2 [c0] co2 [c0] cpd00011_c0
O2 [c0] o2 [c0] cpd00007_c0
H2O2 [c0] h2o2 [c0] cpd00025_c0
heptosyl-kdo2-lipidA [c0] heptosyl-kdo2-lipida [c0] cpd15485_c0
heptosyl-heptosyl-kdo2-lipidA [c0] heptosyl-heptosyl-kdo2-lipida [c0] cpd15484_c0
Lauroyl-KDO2-lipid IV(A) [c0] lauroyl-kdo2-lipid iv(a) [c0] cpd03736_c0
kdo2-lipid iva [c0] kdo2-lipid iva [c0] cpd03586_c0
O2 [e0] o2 [e0] cpd00007_e0
CO2 [e0] co2 [e0] cpd00011_e0
Co2+ [c0] co2+ [c0] cpd00149_c0

------ Model: MAG035.sbml (ID: MAG035) ------
CO2 [c0] co2 [c0] cpd00011_c0
O2 [c0] o2 [c0] cpd00007_c0
H2O2 [c0] h2o2 [c0] cpd00025_c0
O2- [c0] o2- [c0] cpd00532_c0
Co2+ [e0] co2+ [e0] cpd00149_e0
Co2+ [c0] co2+ [c0] cpd00149_c0
O2 [e0] o2 [e0] cpd00007_e0
CO2 [e0] co2 [e0]

**Construct a complete medium and print the list of oxygen-related reactions for each draft model.**

In [12]:
import os
import cobra
import re

# Define a blacklist of keywords related to oxygen (these keywords only match as independent words)
black_list = [
    "o2",
    "o2-",
    "oxygen",
    "superoxide",
    "peroxide"
]

# Modified helper function: use regular expressions to ensure only matching independent keywords (word boundaries)
def is_oxygen_related(met_name, blacklist):
    lower_name = met_name.lower()
    for term in blacklist:
        # Use \b word boundary to match independent keywords
        if re.search(rf"\b{re.escape(term)}\b", lower_name):
            return True
    return False

# Traverse all models and merge all metabolites into a dictionary; the key is the cleaned-up ID and the value is the metabolite name.
all_mets = {}
for mf in model_files:
    model_path = os.path.join(models_dir, mf)
    try:
        model = cobra.io.read_sbml_model(model_path)
    except Exception as e:
        print(f"[Error] Failed to load model {mf}: {e}")
        continue

    # Traverse all metabolites in the current model
    for met in model.metabolites:
        # If the metabolite name contains oxygen-related keywords, skip it
        if met.compartment=='c0':
          continue
        if is_oxygen_related(met.name, black_list):
            continue
        # Remove potential "_e0" and "_c0" suffixes to get a cleaner ID
        tidy_id = met.id.replace("_e0", "").replace("_c0", "")
        # If this tidy_id is not already added, then add it to the dictionary
        if tidy_id not in all_mets:
            all_mets[tidy_id] = met.name

# Write the merged metabolites dictionary into the oxygen-free medium file
tsv_path = "../models/complete_media_without_oxygen.tsv"
with open(tsv_path, "w") as f:
    f.write("id\tname\tconcentration\tminflux\tmaxflux\n")
    for met_id, met_name in all_mets.items():
        f.write(f"{met_id}\t{met_name}\t1\t-1000\t1000\n")

print(f"\n[Oxygen-free medium file saved] => {tsv_path}")

# ---------------------------------------------
# Now traverse all models and print the oxygen-related metabolite information,
# so that you can adjust the blacklist later if needed.
print("\n=== Checking oxygen-related metabolites in all draft models ===")
for mf in model_files:
    model_path = os.path.join(models_dir, mf)
    try:
        model = cobra.io.read_sbml_model(model_path)
    except Exception as e:
        print(f"[Error] Failed to load model {mf}: {e}")
        continue
    print(f"\n------ Model: {mf} (ID: {model.id if model.id else 'N/A'}) ------")
    for met in model.metabolites:
        if is_oxygen_related(met.name, black_list):
            print(f"{met.name} [{met.compartment}]  => {met.id}")

print("\n[Check completed]")


[Oxygen-free medium file saved] => ../models/complete_media_without_oxygen.tsv

=== Checking oxygen-related metabolites in all draft models ===

------ Model: MAG001.sbml (ID: MAG001) ------
O2 [c0] [c0]  => cpd00007_c0
O2- [c0] [c0]  => cpd00532_c0
O2 [e0] [e0]  => cpd00007_e0

------ Model: MAG002.sbml (ID: MAG002) ------
O2 [c0] [c0]  => cpd00007_c0
O2- [c0] [c0]  => cpd00532_c0
O2 [e0] [e0]  => cpd00007_e0

------ Model: MAG003.sbml (ID: MAG003) ------
O2- [c0] [c0]  => cpd00532_c0
O2 [c0] [c0]  => cpd00007_c0
O2 [e0] [e0]  => cpd00007_e0

------ Model: MAG004.sbml (ID: MAG004) ------
O2 [c0] [c0]  => cpd00007_c0
O2- [c0] [c0]  => cpd00532_c0
O2 [e0] [e0]  => cpd00007_e0

------ Model: MAG005.sbml (ID: MAG005) ------
O2 [c0] [c0]  => cpd00007_c0
O2- [c0] [c0]  => cpd00532_c0
O2 [e0] [e0]  => cpd00007_e0

------ Model: MAG006.sbml (ID: MAG006) ------
O2 [c0] [c0]  => cpd00007_c0
O2- [c0] [c0]  => cpd00532_c0
O2 [e0] [e0]  => cpd00007_e0

------ Model: MAG007.sbml (ID: MAG007) -----

In [7]:
!cat ../models/complete_media_without_oxygen.tsv

id	name	concentration	minflux	maxflux
cpd00067	H+ [e0]	1	-1000	1000
cpd00106	Fumarate [e0]	1	-1000	1000
cpd00108	Galactose [e0]	1	-1000	1000
cpd00036	Succinate [e0]	1	-1000	1000
cpd11576	L-methionine R-oxide [e0]	1	-1000	1000
cpd04097	Pb [e0]	1	-1000	1000
cpd00033	Glycine [e0]	1	-1000	1000
cpd11597	ARSENOBETAINE [e0]	1	-1000	1000
cpd00012	PPi [e0]	1	-1000	1000
cpd01030	Salicin [e0]	1	-1000	1000
cpd00129	L-Proline [e0]	1	-1000	1000
cpd00971	Na+ [e0]	1	-1000	1000
cpd01012	Cd2+ [e0]	1	-1000	1000
cpd00635	Cbl [e0]	1	-1000	1000
cpd00276	GLUM [e0]	1	-1000	1000
cpd00082	D-Fructose [e0]	1	-1000	1000
cpd00080	Glycerol-3-phosphate [e0]	1	-1000	1000
cpd00023	L-Glutamate [e0]	1	-1000	1000
cpd00058	Cu2+ [e0]	1	-1000	1000
cpd00107	L-Leucine [e0]	1	-1000	1000
cpd00314	D-Mannitol [e0]	1	-1000	1000
cpd00013	NH3 [e0]	1	-1000	1000
cpd00244	Ni2+ [e0]	1	-1000	1000
cpd11578	hexanesulfonate [e0]	1	-1000	1000
cpd00179	Maltose [e0]	1	-1000	1000
cpd00205	K+ [e0]	1	-1000	1000
cpd00100	Glycerol [e0]	1	-1000	1000


In [5]:
# Define the list of IDs to check
to_check = ["cpd00007_c0", "cpd00532_c0", "cpd00001"]

# Initialize a dictionary to record whether each ID is found
found = {key: False for key in to_check}

# Open and read the "complete_media_without_oxygen.tsv" file
tsv_path = "../models/complete_media_without_oxygen.tsv"

with open(tsv_path, "r") as f:
    # Read all lines while skipping the header
    lines = f.readlines()[1:]
    for line in lines:
        # Split each line into fields based on the tab delimiter
        parts = line.strip().split("\t")
        if parts:
            id_val = parts[0]
            if id_val in to_check:
                found[id_val] = True

# Output the checking results
for key in to_check:
    print(f"{key}: {'Found' if found[key] else 'Not Found'}")


cpd00007_c0: Not Found
cpd00532_c0: Not Found
cpd00001: Found


In [6]:
from cobra import Reaction

def makeExchangeReaction(model, met):
  new_reaction = Reaction('EX_' + met.id)
  new_reaction.name = met.name + 'exchange'
  new_reaction.lower_bound = -1000
  new_reaction.upper_bound = 1000
  new_reaction.add_metabolites({
    met: -1.0})

  model.add_reactions([new_reaction])
  return model

def fix_exchange_reactions(model_in):
  for met in model_in.metabolites:
    if met.compartment == 'e0':
      if not model_in.reactions.has_id('EX_' + met.id):
        model_in = makeExchangeReaction(model_in, met)
  return model_in.copy()

# gapfilling

In [7]:
# BLOCK X: Gapfilling all models with "complete_media_without_oxygen.tsv"

import os
import cobra
import dnngior
import traceback

# Define input and output directories
models_dir = "../models/Draft_models"  # Folder containing draft model files
output_dir = "../models/Gapfilled_models"  # Folder to store the gapfilled models
os.makedirs(output_dir, exist_ok=True)

# Collect all model files ending with .xml
model_files = sorted([f for f in os.listdir(models_dir) if f.endswith(".sbml")])

# Iterate over each model and perform gapfilling
for mf in model_files:
    model_path = os.path.join(models_dir, mf)
    prefix = mf.replace(".sbml", "")
    print(f"\n=== Gapfilling {prefix} with no-oxygen medium ===")

    try:
        # Use DNNGIOR to perform gapfilling
        gapfill_no_oxygen = dnngior.Gapfill(
            model_path,
            medium_file='../models/complete_media_without_oxygen.tsv',
            objectiveName='bio1'
        )

        # gapfilledModel contains the model after gapfilling
        model_gapfilled_no_oxygen = gapfill_no_oxygen.gapfilledModel.copy()

        model_gapfilled_no_oxygen = fix_exchange_reactions(model_gapfilled_no_oxygen)

        # -------- New step: Manually ensure that the bio1 reaction is set as the objective function --------
        try:
            bio_rxn = model_gapfilled_no_oxygen.reactions.get_by_id("bio1")
            bio_rxn.objective_coefficient = 1.0
            model_gapfilled_no_oxygen.objective = bio_rxn
            print("  [Info] 'bio1' objective function has been set with objective_coefficient = 1.0")
        except Exception as e:
            print("  [Warning] 'bio1' not found or unable to set objective_coefficient:", e)

        # Save the gapfilled model to the output folder
        out_path = os.path.join(output_dir, f"{prefix}_gapfilled_noO2.xml")
        cobra.io.write_sbml_model(model_gapfilled_no_oxygen, out_path)

        # Print the summary of the objective function to check if biomass can be produced after gapfilling
        print("Objective function (bio1) summary:")
        # First, check the optimization result before calling summary
        solution = model_gapfilled_no_oxygen.optimize()
        print("  FBA solution objective:", solution.objective_value)
        # Optionally, use model_gapfilled_no_oxygen.summary() to see a detailed flux distribution
        print(model_gapfilled_no_oxygen.summary())

        print(f"[Saved] Gapfilled model => {out_path}\n")

    except Exception as e:
        print(f"[Error] Gapfilling failed for {mf}: {e}")
        traceback.print_exc()

print("\n[All gapfilling done!]")



=== Gapfilling MAG230 with no-oxygen medium ===
Gap-filling database =  ModelSEED


No objective coefficients in model. Unclear what should be optimized


Loading medium from: ../models/complete_media_without_oxygen.tsv
#reactions not found in NN-keys:  6 / 831
Flux through biomass reaction is 1.00000000
Flux through biomass reaction is 1.00000000


 condition is currently:  76788 




 condition is currently:  38394 




 condition is currently:  19197 




 condition is currently:  9598 




 condition is currently:  4799 




 condition is currently:  2399 




 condition is currently:  1199 




 condition is currently:  599 




 condition is currently:  299 




 condition is currently:  149 




 condition is currently:  74 




 condition is currently:  37 




 condition is currently:  18 




 condition is currently:  9 




 condition is currently:  4 




 condition is currently:  2 




 condition is currently:  1 


Objective value is 0.005973.
Read LP format model from file /var/folders/6w/knrbtrj125ggkrx091kd2g840000gn/T/tmphriznp1j.lp
Reading time = 0.00 seconds
: 960 rows, 1930 columns, 9186 nonzeros
NN gapfilling added

No objective coefficients in model. Unclear what should be optimized


Loading medium from: ../models/complete_media_without_oxygen.tsv
#reactions not found in NN-keys:  6 / 753
Flux through biomass reaction is 1.00000000
Flux through biomass reaction is 1.00000000


 condition is currently:  76925 




 condition is currently:  38462 




 condition is currently:  19231 




 condition is currently:  9615 




 condition is currently:  4807 




 condition is currently:  2403 




 condition is currently:  1201 




 condition is currently:  600 




 condition is currently:  300 




 condition is currently:  150 




 condition is currently:  75 




 condition is currently:  37 




 condition is currently:  18 




 condition is currently:  9 




 condition is currently:  4 




 condition is currently:  2 




 condition is currently:  1 


Objective value is 0.079816.
Read LP format model from file /var/folders/6w/knrbtrj125ggkrx091kd2g840000gn/T/tmp7bpxl0ai.lp
Reading time = 0.00 seconds
: 918 rows, 1818 columns, 8686 nonzeros
NN gapfilling added

No objective coefficients in model. Unclear what should be optimized


Loading medium from: ../models/complete_media_without_oxygen.tsv
#reactions not found in NN-keys:  4 / 732
Flux through biomass reaction is 1.00000000
Flux through biomass reaction is 1.00000000


 condition is currently:  76916 




 condition is currently:  38458 




 condition is currently:  19229 




 condition is currently:  9614 




 condition is currently:  4807 




 condition is currently:  2403 




 condition is currently:  1201 




 condition is currently:  600 




 condition is currently:  300 




 condition is currently:  150 




 condition is currently:  75 




 condition is currently:  37 




 condition is currently:  18 




 condition is currently:  9 




 condition is currently:  5 




 condition is currently:  2 




 condition is currently:  1 


Objective value is 0.262988.
Read LP format model from file /var/folders/6w/knrbtrj125ggkrx091kd2g840000gn/T/tmp23jf7y78.lp
Reading time = 0.00 seconds
: 931 rows, 1786 columns, 8510 nonzeros
NN gapfilling added

No objective coefficients in model. Unclear what should be optimized


Loading medium from: ../models/complete_media_without_oxygen.tsv
#reactions not found in NN-keys:  6 / 1039
Flux through biomass reaction is 1.00000000
Flux through biomass reaction is 1.00000000


 condition is currently:  76457 




 condition is currently:  38228 




 condition is currently:  19114 




 condition is currently:  9557 




 condition is currently:  4778 




 condition is currently:  2389 




 condition is currently:  1194 




 condition is currently:  597 




 condition is currently:  298 




 condition is currently:  149 




 condition is currently:  74 




 condition is currently:  37 




 condition is currently:  18 




 condition is currently:  9 




 condition is currently:  4 




 condition is currently:  2 




 condition is currently:  1 


Objective value is 0.024388.
Read LP format model from file /var/folders/6w/knrbtrj125ggkrx091kd2g840000gn/T/tmp3irgtk1o.lp
Reading time = 0.00 seconds
: 1180 rows, 2314 columns, 10988 nonzeros
NN gapfilling ad

No objective coefficients in model. Unclear what should be optimized


Loading medium from: ../models/complete_media_without_oxygen.tsv
#reactions not found in NN-keys:  7 / 762
Flux through biomass reaction is 1.00000000
Flux through biomass reaction is 1.00000000


 condition is currently:  76898 




 condition is currently:  38449 




 condition is currently:  19224 




 condition is currently:  9612 




 condition is currently:  4806 




 condition is currently:  2403 




 condition is currently:  1201 




 condition is currently:  600 




 condition is currently:  300 




 condition is currently:  150 




 condition is currently:  75 




 condition is currently:  37 




 condition is currently:  18 




 condition is currently:  9 




 condition is currently:  5 




 condition is currently:  3 




 condition is currently:  2 




 condition is currently:  1 


Objective value is 0.337400.
Read LP format model from file /var/folders/6w/knrbtrj125ggkrx091kd2g840000gn/T/tmpkyon4bv2.lp
Reading time = 0.00 seconds
: 917 rows, 1814 columns, 

No objective coefficients in model. Unclear what should be optimized


Loading medium from: ../models/complete_media_without_oxygen.tsv
#reactions not found in NN-keys:  5 / 900
Flux through biomass reaction is 1.00000000
Flux through biomass reaction is 1.00000000


 condition is currently:  76719 




 condition is currently:  38359 




 condition is currently:  19179 




 condition is currently:  9589 




 condition is currently:  4794 




 condition is currently:  2397 




 condition is currently:  1198 




 condition is currently:  599 




 condition is currently:  299 




 condition is currently:  149 




 condition is currently:  74 




 condition is currently:  37 




 condition is currently:  18 




 condition is currently:  9 




 condition is currently:  4 




 condition is currently:  2 




 condition is currently:  1 


Objective value is 0.023146.
Read LP format model from file /var/folders/6w/knrbtrj125ggkrx091kd2g840000gn/T/tmp4gzcq4r0.lp
Reading time = 0.00 seconds
: 1022 rows, 2042 columns, 9840 nonzeros
NN gapfilling adde

No objective coefficients in model. Unclear what should be optimized


Loading medium from: ../models/complete_media_without_oxygen.tsv
#reactions not found in NN-keys:  5 / 819
Flux through biomass reaction is 1.00000000
Flux through biomass reaction is 1.00000000


 condition is currently:  76799 




 condition is currently:  38399 




 condition is currently:  19199 




 condition is currently:  9599 




 condition is currently:  4799 




 condition is currently:  2399 




 condition is currently:  1199 




 condition is currently:  599 




 condition is currently:  299 




 condition is currently:  149 




 condition is currently:  74 




 condition is currently:  37 




 condition is currently:  18 




 condition is currently:  9 




 condition is currently:  5 




 condition is currently:  2 




 condition is currently:  1 


Objective value is 1.000000.
Read LP format model from file /var/folders/6w/knrbtrj125ggkrx091kd2g840000gn/T/tmpbgno2sk9.lp
Reading time = 0.00 seconds
: 1017 rows, 2020 columns, 9518 nonzeros
NN gapfilling adde

No objective coefficients in model. Unclear what should be optimized


Loading medium from: ../models/complete_media_without_oxygen.tsv
#reactions not found in NN-keys:  7 / 809
Flux through biomass reaction is 1.00000000
Flux through biomass reaction is 1.00000000


 condition is currently:  76829 




 condition is currently:  38414 




 condition is currently:  19207 




 condition is currently:  9603 




 condition is currently:  4801 




 condition is currently:  2400 




 condition is currently:  1200 




 condition is currently:  600 




 condition is currently:  300 




 condition is currently:  150 




 condition is currently:  75 




 condition is currently:  37 




 condition is currently:  18 




 condition is currently:  9 




 condition is currently:  5 




 condition is currently:  2 




 condition is currently:  1 


Objective value is 0.336539.
Read LP format model from file /var/folders/6w/knrbtrj125ggkrx091kd2g840000gn/T/tmpckyygzxf.lp
Reading time = 0.00 seconds
: 973 rows, 1914 columns, 9230 nonzeros
NN gapfilling added

No objective coefficients in model. Unclear what should be optimized


Loading medium from: ../models/complete_media_without_oxygen.tsv
#reactions not found in NN-keys:  6 / 1239
Flux through biomass reaction is 1.00000000
Flux through biomass reaction is 1.00000000


 condition is currently:  76099 




 condition is currently:  38049 




 condition is currently:  19024 




 condition is currently:  9512 




 condition is currently:  4756 




 condition is currently:  2378 




 condition is currently:  1189 




 condition is currently:  594 




 condition is currently:  297 




 condition is currently:  148 




 condition is currently:  74 




 condition is currently:  37 




 condition is currently:  18 




 condition is currently:  9 




 condition is currently:  4 




 condition is currently:  2 




 condition is currently:  1 


Objective value is 0.045338.
Read LP format model from file /var/folders/6w/knrbtrj125ggkrx091kd2g840000gn/T/tmp2c0gahpl.lp
Reading time = 0.01 seconds
: 1305 rows, 2690 columns, 12964 nonzeros
NN gapfilling ad

No objective coefficients in model. Unclear what should be optimized


Loading medium from: ../models/complete_media_without_oxygen.tsv
#reactions not found in NN-keys:  6 / 736
Flux through biomass reaction is 1.00000000
Flux through biomass reaction is 1.00000000


 condition is currently:  76942 




 condition is currently:  38471 




 condition is currently:  19235 




 condition is currently:  9617 




 condition is currently:  4808 




 condition is currently:  2404 




 condition is currently:  1202 




 condition is currently:  601 




 condition is currently:  300 




 condition is currently:  150 




 condition is currently:  75 




 condition is currently:  37 




 condition is currently:  18 




 condition is currently:  9 




 condition is currently:  4 




 condition is currently:  2 




 condition is currently:  1 


Objective value is 0.072273.
Read LP format model from file /var/folders/6w/knrbtrj125ggkrx091kd2g840000gn/T/tmpdghs2slf.lp
Reading time = 0.00 seconds
: 902 rows, 1796 columns, 8518 nonzeros
NN gapfilling added

No objective coefficients in model. Unclear what should be optimized


Loading medium from: ../models/complete_media_without_oxygen.tsv
#reactions not found in NN-keys:  7 / 896
Flux through biomass reaction is 1.00000000
Flux through biomass reaction is 1.00000000


 condition is currently:  76667 




 condition is currently:  38333 




 condition is currently:  19166 




 condition is currently:  9583 




 condition is currently:  4791 




 condition is currently:  2395 




 condition is currently:  1197 




 condition is currently:  598 




 condition is currently:  299 




 condition is currently:  149 




 condition is currently:  74 




 condition is currently:  37 




 condition is currently:  18 




 condition is currently:  9 




 condition is currently:  5 




 condition is currently:  2 




 condition is currently:  1 


Objective value is 0.162300.
Read LP format model from file /var/folders/6w/knrbtrj125ggkrx091kd2g840000gn/T/tmpgqruwd1z.lp
Reading time = 0.00 seconds
: 1033 rows, 2088 columns, 9884 nonzeros
NN gapfilling adde

**Extract & Unify Exchange Reactions from All Gapfilled Models**

In [9]:
import os
import cobra
import numpy as np

gapfilled_dir = "../models/Gapfilled_models"  # folder containing gapfilled models
model_files = sorted([f for f in os.listdir(gapfilled_dir) if f.endswith(".xml")])

def extract_exchange_reactions(model):
    """
    Return a dictionary of exchange reactions from the model
    that have negative lower_bound (< 0), i.e., import direction.

    Format: {rxn_id: [met_id, met_name, met_formula]}
    """
    exchange_reactions = {}
    for reaction in model.reactions:
        # Common practice: exchange rxns often start with "EX_",
        # but you can adapt this filter if your exchange IDs differ
        if reaction.id.startswith("EX_") and (reaction.lower_bound < 0):
            # Typically there's only 1 metabolite in an exchange reaction,
            # so we grab the first one
            if len(reaction.metabolites) == 1:
                met = list(reaction.metabolites.keys())[0]
                exchange_reactions[reaction.id] = [met.id, met.name, met.formula]

    return exchange_reactions

# 1) Collect exchange reactions from each gapfilled model
all_exchange_dict = {}  # key=rxn_id, value=[met_id, met_name, met_formula]

for mf in model_files:
    model_path = os.path.join(gapfilled_dir, mf)
    print(f"Extracting exchange rxns from {mf}...")
    try:
        model = cobra.io.read_sbml_model(model_path)
    except Exception as e:
        print(f"[Error] Cannot load {mf}: {e}")
        continue

    ex_rxns = extract_exchange_reactions(model)

    # Unify them
    for rxn_id, info_list in ex_rxns.items():
        if rxn_id not in all_exchange_dict:
            all_exchange_dict[rxn_id] = info_list

# 2) Write the unified exchange reactions to a file
medium_file = "../models/group_medium_exchanges.tsv"
with open(medium_file, "w") as f:
    f.write("rxn_id\tmet_id\tmet_name\tmet_formula\n")
    for rxn_id, info in all_exchange_dict.items():
        met_id, met_name, met_form = info
        f.write(f"{rxn_id}\t{met_id}\t{met_name}\t{met_form}\n")

print(f"\n[Unified exchange reactions saved to: {medium_file}]")
print(f"Total unique exchange reactions: {len(all_exchange_dict)}")

Extracting exchange rxns from MAG001.sbml_gapfilled_noO2.xml...
Extracting exchange rxns from MAG002.sbml_gapfilled_noO2.xml...
Extracting exchange rxns from MAG003.sbml_gapfilled_noO2.xml...
Extracting exchange rxns from MAG004.sbml_gapfilled_noO2.xml...
Extracting exchange rxns from MAG005.sbml_gapfilled_noO2.xml...
Extracting exchange rxns from MAG006.sbml_gapfilled_noO2.xml...
Extracting exchange rxns from MAG007.sbml_gapfilled_noO2.xml...
Extracting exchange rxns from MAG008.sbml_gapfilled_noO2.xml...
Extracting exchange rxns from MAG009.sbml_gapfilled_noO2.xml...
Extracting exchange rxns from MAG010.sbml_gapfilled_noO2.xml...
Extracting exchange rxns from MAG011.sbml_gapfilled_noO2.xml...
Extracting exchange rxns from MAG012.sbml_gapfilled_noO2.xml...
Extracting exchange rxns from MAG013.sbml_gapfilled_noO2.xml...
Extracting exchange rxns from MAG014.sbml_gapfilled_noO2.xml...
Extracting exchange rxns from MAG015.sbml_gapfilled_noO2.xml...
Extracting exchange rxns from MAG016.sbm