In [None]:
import cobra
from cobra.core import Metabolite
from cobra.io import load_model, read_sbml_model, load_json_model, write_sbml_model
from cobra.flux_analysis import gapfill
import os, re
import pandas as pd

print(os.getcwd())

ext_dir = '/../../../data/external'
uni_dir = '/../2.gapfill_w_universal'
man_dir = '/../3.manual_check'
mas_dir = '/../4.mass_balance'
# Import model
# model = read_sbml_model(f"{os.getcwd()}/{ext_dir}/Salb-GEM.xml")
# model = read_sbml_model(f"{os.getcwd()}/{uni_dir}/Salb-GEM-Uni-gapfill.xml")
# model = read_sbml_model(f"{os.getcwd()}/{man_dir}/Salb-GEM-Man-gapfill.xml")
model = read_sbml_model(f"{os.getcwd()}/{mas_dir}/Salb-GEM-Biosustain.xml")


# 1. Find metabolites that doesn't have formula or charge

In [None]:
no_formula = [m.id for m in model.metabolites if m.formula == None]
print(no_formula)
len(no_formula)

In [None]:
# From the model, we found that if the charge is not documented in the notes, then the charge is missing.

no_charge = [m.id for m in model.metabolites if "CHARGE" not in m.notes.keys()]
print(no_charge)
len(no_charge)

# 2. Fix metabolite formula and charge using pubchem and METANETX

In [None]:
import json, requests

def get_pubchem_formula_charge(query):
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{query}/property/MolecularFormula,Charge/JSON"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        properties = data.get('PropertyTable', {}).get('Properties', [])[0]
        formula = properties.get('MolecularFormula').split("-")[0]
        charge = properties.get('Charge')
        return formula, charge
    else:
        print(f"Failed to retrieve data for {query}")
        return None, None

still_failed = []
for f in no_formula:
    met = model.metabolites.get_by_id(f)
    formula, charge = get_pubchem_formula_charge(met.name)
    print(f"Metabolite: {met.id}, Formula: {formula}, Charge: {charge}")

    if formula != None:
        met.formula = formula
    else:
        still_failed.append(met.id)

    if charge != None:
        met.charge = charge

In [None]:
import requests
from bs4 import BeautifulSoup

def get_metanetx_formula(metanetx_id):
    # Construct the URL using the MetaNetX ID
    url = f"https://www.metanetx.org/chem_info/{metanetx_id}"

    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise error for bad status codes
        
        # Parse HTML content
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Locate the table cell containing the formula
        formula_header = soup.find('td', string='formula')
        
        if formula_header:
            formula_cell = formula_header.find_next_sibling('td')
            if formula_cell:
                formula = formula_cell.text.strip("* ")
        
        # Locate the table cell containing the formula
        charge_header = soup.find('td', string='charge')
        
        if charge_header:
            charge_cell = charge_header.find_next_sibling('td')
            if charge_cell:
                if charge_cell.text.strip() != '':
                    charge = int(charge_cell.text.strip())
                else:
                    charge = None

        if formula:
            return formula, charge
        print(f"Formula not found for {metanetx_id}")
        return None, None
        
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
    except Exception as e:
        print(f"Error: {e}")
    return None, None

failed_again = []
for m in still_failed:
    met = model.metabolites.get_by_id(m)

    try:
        print(met, met.name)
        metanetx_id = met.annotation['metanetx.chemical']
        met.formula, met.charge = get_metanetx_formula(metanetx_id)
    except:
        failed_again.append(met.id)
        print("FAILED: ", met, met.annotation)


In [None]:
for m in model.metabolites:
    if m.formula == None:
        print(m, m.name, m.annotation)


In [None]:
model.metabolites.get_by_id("agdppdtundec_c").formula = 'C79H127O28P2'
model.metabolites.get_by_id("agdppdtundec_c").charge = -3

model.metabolites.get_by_id("dolichol_c").formula = 'C25H44O'
model.metabolites.get_by_id("dolichol_c").charge = 0

model.metabolites.get_by_id("ficytb5_c").formula = 'C34H30FeN4O4'
model.metabolites.get_by_id("focytb5_c").formula = 'C34H30FeN4O4'



# 3. Check if reactions are balanced using BIGG and METANETX

In [None]:
imbalanced_reactions = []

# Iterate through each reaction in the model
for reaction in model.reactions:
    try:
        imbalance = reaction.check_mass_balance()
    except Exception as e:
        print(f"Error checking mass balance for {reaction.id}: {e}")
        imbalance = None
    if imbalance and not (reaction.id.startswith('EX_') or reaction.id.startswith('DM_') or 'PSEUDO' in reaction.id):
        imbalanced_reactions.append((reaction, imbalance))

# Print out the imbalanced reactions
for reaction, imbalance in imbalanced_reactions:
    print(f"Reaction: {reaction.id}, Imbalance: {imbalance}")

# Save the list of imbalanced reactions and their imbalances to a file
try:
    with open('imbalanced_reactions.txt', 'w', encoding='utf-8') as f:
        for reaction, imbalance in imbalanced_reactions:
            try:
                rxn_str = reaction.build_reaction_string()
            except Exception as e:
                rxn_str = f"Error building reaction string: {e}"
            f.write(f"Reaction: {reaction.id}\t{rxn_str}\tImbalance: {imbalance}\n")
    print("imbalanced_reactions.txt written successfully.")
except Exception as e:
    print(f"Failed to write imbalanced_reactions.txt: {e}")


In [None]:
len(imbalanced_reactions)

In [None]:
import requests

exclude_metabolites = {"nadh_c", "nad_c", "h_c", "h2o_c", "o2_c", "atp_c", "adp_c", "pi_c", "coa_c", "co2_c", "nh4_c", "h2o2_c", "ppi_c", "glu__L_c", "succ_c", "accoa_c", "pyr_c", "donor_c", "acceptor_c"}

def get_metanetx_id(met):
    metanetx_id = None
    if hasattr(met, "annotation"):
        ann = met.annotation
        if "metanetx.chemical" in ann:
            metanetx_id = ann["metanetx.chemical"]
            if isinstance(metanetx_id, list):
                metanetx_id = metanetx_id[0]
    return metanetx_id

def get_bigg_formula_charge(bigg_id):
    # Remove compartment suffix (e.g., "_c")
    if bigg_id.endswith("_c") or bigg_id.endswith("_e") or bigg_id.endswith("_m") or bigg_id.endswith("_p"):
        base_id = bigg_id[:-2]
    else:
        base_id = bigg_id
    url = f"http://bigg.ucsd.edu/api/v2/universal/metabolites/{base_id}"
    try:
        resp = requests.get(url, timeout=10)
        if resp.status_code == 200:
            data = resp.json()
            formula = data.get("formula", None)
            # Try to get charge from compartments_in_models
            charge = None
            for comp in data.get("compartments_in_models", []):
                if comp.get("bigg_id", None) == bigg_id[-1]:  # match compartment
                    if "charge" in comp:
                        charge = comp["charge"]
                        break
            # If not found, try to get from top-level (rare)
            if charge is None:
                charge = data.get("charge", None)
            return formula, charge
        else:
            print(f"    BIGG API error for {base_id}: status {resp.status_code}")
            return None, None
    except Exception as e:
        print(f"    BIGG API error for {base_id}: {e}")
        return None, None

for reaction, imbalance in imbalanced_reactions:
    if set(imbalance.keys()) == {'charge', 'H'} and imbalance['charge'] == imbalance['H']:
        print(f"\nReaction {reaction.id} compounds (excluding common cofactors):")
        mets_to_check = []
        for met in reaction.metabolites:
            if met.id not in exclude_metabolites:
                print(f"  {met.id}: {met.formula}, charge={met.charge}")
                metanetx_id = get_metanetx_id(met)
                met_mnx_formula, met_mnx_charge = None, None
                if metanetx_id:
                    try:
                        met_mnx_formula, met_mnx_charge = get_metanetx_formula(metanetx_id)
                        print(f"    MetaNetX: {metanetx_id}, formula={met_mnx_formula}, charge={met_mnx_charge}")
                        if met_mnx_formula != met.formula or met_mnx_charge != met.charge:
                            print(f"    DIFFERENCE: Model formula/charge = {met.formula}/{met.charge}, MetaNetX = {met_mnx_formula}/{met_mnx_charge}")
                            mets_to_check.append((met, "metanetx", met_mnx_formula, met_mnx_charge))
                    except Exception as e:
                        print(f"    Could not retrieve MetaNetX info for {metanetx_id}: {e}")
                else:
                    print("    No MetaNetX ID found in annotation.")

                # If MetaNetX did not work or is not different, try BIGG
                bigg_formula, bigg_charge = get_bigg_formula_charge(met.id)
                if bigg_formula is not None or bigg_charge is not None:
                    if (bigg_formula != met.formula or (bigg_charge is not None and bigg_charge != met.charge)):
                        print(f"    BIGG: formula={bigg_formula}, charge={bigg_charge}")
                        print(f"    DIFFERENCE: Model formula/charge = {met.formula}/{met.charge}, BIGG = {bigg_formula}/{bigg_charge}")
                        mets_to_check.append((met, "bigg", bigg_formula, bigg_charge))

        # Try updating each possible alternative (MetaNetX, then BIGG) for each metabolite
        for met, source, new_formula, new_charge in mets_to_check:
            if new_formula is None and new_charge is None:
                continue
            old_formula = met.formula
            old_charge = met.charge
            if new_formula is not None:
                met.formula = new_formula
            if new_charge is not None:
                met.charge = new_charge
            try:
                new_imbalance = reaction.check_mass_balance()
            except Exception as e:
                print(f"    Error checking mass balance after update: {e}")
                new_imbalance = None
            if not new_imbalance:
                print(f"\n  Updating {met.id} to {source} formula/charge would balance reaction {reaction.id}!")
                user_input = input(f"    Update {met.id} in the model to formula={new_formula}, charge={new_charge}? (y/n): ")
                if user_input.strip().lower() == "y":
                    print(f"    Updated {met.id} in the model.")
                else:
                    # Revert to old values
                    met.formula = old_formula
                    met.charge = old_charge
                    print(f"    Did not update {met.id}.")
            else:
                # Revert to old values
                met.formula = old_formula
                met.charge = old_charge





# 3. Manual Correction of Imbalanced Reactions

In [None]:
# Iterate through each reaction in the model
for reaction in model.reactions.get_by_any('3OXCOADH'):
    try:
        imbalance = reaction.check_mass_balance()
    except:
        imbalance = None
    if imbalance:
        print(reaction, reaction.name)
        print(reaction.annotation)
        for m in reaction.reactants:
            print(m, m.formula, m.charge)
        for m in reaction.products:
            print(m, m.formula, m.charge)

In [None]:
# Iterate through each reaction in the model
model.metabolites.get_by_id('acceptor_c').formula = 'R'
model.metabolites.get_by_id('acceptor_c').charge = 0

# 4. Manually Add Extra Reactions

In [None]:
from cobra.core import Reaction

model.remove_reactions([model.reactions.DHPDH_1, model.reactions.DHPDH_2])

rxn13121 = Reaction("DHPDH_1", name="Dihydropyrimidine dehydrogenase", lower_bound=0, upper_bound=100)

rxn13121.annotation = {"ec-code": "1.3.1.2", 
                       "biocyc": "1.3.1.3-RXN", 
                       "metanetx.reaction": "MNXR97814", 
                       "seed.reaction":"rxn00719", 
                       "rhea": "18093", 
                       "subsystem": "uracil degradation"}


rxn13122 = Reaction("DHPDH_2", name="Dihydropyrimidine dehydrogenase", lower_bound=0, upper_bound=100)

rxn13122.annotation = {"ec-code": "1.3.1.2", 
                       "biocyc": "RXN-11209", 
                       "seed.reaction":"rxn01027", 
                       "rhea": "58284",
                       "subsystem": "thymine degradation"}

rxn26140 = Reaction("3HMPPYRAT", name="(R)-3-amino-2-methylpropionateâ€”pyruvate transaminase", lower_bound=0, upper_bound=100)

rxn26140.annotation = {"ec-code": "2.6.1.40", 
                       "biocyc": "2.6.1.40-RXN",
                       "metanetx.reaction": "MNXR115181", 
                       "seed.reaction":"rxn15297", 
                       "rhea": "18396",
                       "subsystem": "thymine degradation"}

rxn25148 = Reaction("OSHSL", name="O-succinyl-L-homoserine succinate-lyase", lower_bound=0, upper_bound=100)
rxn25148.gene_reaction_rule = 'XNR_4037'
rxn25148.annotation = {"ec-code": "2.5.1.48",
                       "biocyc": "RXN-18602",
                       "kegg.reaction": "R00651", 
                       "seed.reaction":"rxn15297", 
                       "rhea": "18396",
                       "subsystem": "Cysteine and methionine metabolism"}

rxn25149 = Reaction("AHSERL", name="O-acetyl-L-homoserine acetate-lyase", lower_bound=0, upper_bound=100)
rxn25149.gene_reaction_rule = 'XNR_0869'
rxn25149.annotation = {"ec-code": "2.5.1.49",
                       "biocyc": "O-ACETYLHOMOSERINE-THIOL-LYASE-RXN",
                       "kegg.reaction": "R00651", 
                       "metanetx.reaction":"MNXR95634", 
                       "rhea": "10049",
                       "subsystem": "Cysteine and methionine metabolism"}

rxn2311 = Reaction("ARGJ", name="amino acid acetyltransferase", lower_bound=0, upper_bound=100)
rxn2311.gene_reaction_rule = 'XNR_5266'
rxn2311.annotation = {"ec-code": "2.3.1.1",
                       "biocyc": "RXN0-6948",
                       "kegg.reaction": "R00651", 
                       "seed.reaction":"rxn15297", 
                       "rhea": "18396",
                       "subsystem": "Cysteine and methionine metabolism"}

rxn44111 = Reaction("METGL", name="methionine gamma-lyase", lower_bound=0, upper_bound=100)
rxn44111.gene_reaction_rule = 'XNR_5540'
rxn44111.annotation = {"ec-code": "4.4.1.11",
                       "biocyc": "METHIONINE-GAMMA-LYASE-RXN",
                       "kegg.reaction": "R00654", 
                       "seed.reaction":"rxn00456", 
                       "metanetx.reaction":"MNXR101474", 
                       "rhea": "23801",
                       "subsystem": "Cysteine and methionine metabolism"}

acmet_c = cobra.Metabolite('acmet_c',formula='C7H12NO3S', name='N-acetyl-L-methionine', compartment='c', charge=-1)
acmet_c.annotation = {
    'pubchem.compound': '448580',
    'chebi': 'CHEBI:71670',
    'kegg.compound': 'C02712'
}
ch4s_c = cobra.Metabolite('ch4s_c',formula='CH4S', name='Methanethiol', compartment='c', charge=0)
ch4s_c.annotation = {
    'pubchem.compound': '878',
    'chebi': 'CHEBI:14586',
    'kegg.compound': 'C00409'
}

model.add_metabolites([acmet_c, ch4s_c])
model.add_reactions([rxn13121, rxn13122, rxn26140, rxn25148, rxn25149, rxn2311, rxn44111])
model.reactions.DHPDH_1.build_reaction_from_string("56dura_c + nadp_c <-- ura_c + nadph_c + h_c")
model.reactions.DHPDH_2.build_reaction_from_string("56dthm_c + nadp_c <-- thym_c + nadph_c + h_c")
model.reactions.get_by_id('3HMPPYRAT').build_reaction_from_string("3aib_c + pyr_c --> ala__L_c + 2mop_c")

model.reactions.OSHSL.build_reaction_from_string("suchms_c + ch4s_c --> met__L_c + succ_c + h_c")
model.reactions.AHSERL.build_reaction_from_string("achms_c + ch4s_c --> met__L_c + ac_c + h_c")
model.reactions.ARGJ.build_reaction_from_string("met__L_c + accoa_c --> acmet_c + coa_c + h_c")
model.reactions.METGL.build_reaction_from_string("met__L_c + h2o_c --> nh4_c + ch4s_c + 2obut_c")
                                                

In [None]:
from cobra.io import write_sbml_model, save_json_model

write_sbml_model(model, 'Salb-GEM-Biosustain.xml')
save_json_model(model, 'Salb-GEM-Biosustain.json')


In [None]:
%%bash

memote report snapshot ../4.mass_balance/Salb-GEM-Biosustain.xml