In [26]:
import pandas as pd
from cobra.io import read_sbml_model

kokel = read_sbml_model("working_model.xml")

columns = ['#reaction_ID', 'reactant_IDs(atom)', 'product_IDs(atom)', 'reversibility']
df = pd.DataFrame(columns=columns)

i = 0
for rxn in kokel.reactions:
    df.loc[i] = [rxn.id, rxn.reactants, rxn.products, rxn.reversibility]
    i+=1

No objective in listOfObjectives
No objective coefficients in model. Unclear what should be optimized


[<Metabolite 13dpg[h] at 0x2087e9f4a30>, <Metabolite h[h] at 0x2087fed5000>, <Metabolite nadph[h] at 0x2087ff3e1d0>]
[<Metabolite atp[h] at 0x2087e837460>, <Metabolite ru5p_D[h] at 0x2087ea09b10>]
[<Metabolite co2[h] at 0x2087e8187f0>, <Metabolite h2o[h] at 0x2087ff3d7b0>, <Metabolite rb15bp[h] at 0x2087ea09ae0>]
[<Metabolite fdp_B[h] at 0x2087edefb80>, <Metabolite h2o[h] at 0x2087ff3d7b0>]
[<Metabolite 2pg[c] at 0x2087e8aec20>]
[<Metabolite 3pg[c] at 0x2087e8ae1a0>]
[<Metabolite adhlam[m] at 0x2087e9f6a70>, <Metabolite coa[m] at 0x2087e8351b0>]
[<Metabolite h[m] at 0x2087e837520>, <Metabolite pyr[m] at 0x2087ea09d20>, <Metabolite thmpp[m] at 0x2087ea09b70>]
[<Metabolite 2ahethmpp[m] at 0x2087e9f7790>, <Metabolite lpam[m] at 0x2087ff3c970>]
[<Metabolite adp[c] at 0x2087e8ae830>, <Metabolite h[c] at 0x2087fed63b0>, <Metabolite pep[c] at 0x2087ea0a1d0>]
[<Metabolite adp[m] at 0x2087e837100>, <Metabolite h[m] at 0x2087e837520>, <Metabolite pep[m] at 0x2087ea0a410>]
[<Metabolite dhlam[m] a

Unnamed: 0,#reaction_ID,reactant_IDs(atom),product_IDs(atom),reversibility
0,GAPDH_nadp_hi,"[13dpg[h], h[h], nadph[h]]","[g3p[h], nadp[h], pi[h]]",False
1,PRUK,"[atp[h], ru5p_D[h]]","[adp[h], h[h], rb15bp[h]]",False
2,RBPCh,"[co2[h], h2o[h], rb15bp[h]]","[3pg[h], h[h]]",False
3,BFBPh,"[fdp_B[h], h2o[h]]","[f6p_B[h], pi[h]]",False
4,ENO,[2pg[c]],"[h2o[c], pep[c]]",True
...,...,...,...,...
90,EX_fadh2[m],[fadh2[m]],[],True
91,EX_o2[m],[o2[m]],[],True
92,EX_nad[m],[nad[m]],[],True
93,EX_nadh[m],[nadh[m]],[],True


In [16]:
import re 
import string
from collections import defaultdict, Counter

# ---- CONFIG ----
MAPPING_FILE = "all_mapping.C.sorted.txt"
OUTPUT_FILE  = "modreactions.tsv"
COA_FAMILY   = {"coa", "accoa", "succoa"}
RESERVED_MASK_LETTER = "w"
# ----------------

# --- parse mapping file ---
pat = re.compile(r"(?P<rxn>\S+)\s+(?P<Lspec>\S+):C#(?P<Lidx>\d+)\s*=\s*(?P<Rspec>\S+):C#(?P<Ridx>\d+)")
species_pat = re.compile(r"([A-Za-z0-9_]+)\[([a-z])\]")  # e.g. accoa[m]

def bname(spec): return spec.split("[")[0]
def comp(spec):  return spec.split("[")[1].split("]")[0]

def pretty(spec, suffix=""):
    """accoa[m] -> accoaM; remove underscores; add 'n' if base starts with digit; optional '.ex' suffix."""
    base = bname(spec).replace("_", "")
    if base and base[0].isdigit():
        base = "n" + base
    return f"{base}{comp(spec).upper()}{suffix}"

edges_all = []
by_rxn = defaultdict(list)
with open(MAPPING_FILE) as fh:
    for line in fh:
        m = pat.match(line.strip())
        if not m: 
            continue
        rxn = m.group("rxn")
        Lspec, Lidx = m.group("Lspec"), int(m.group("Lidx"))
        Rspec, Ridx = m.group("Rspec"), int(m.group("Ridx"))
        edges_all.append((rxn,(Lspec,Lidx),(Rspec,Ridx)))
        by_rxn[rxn].append(((Lspec,Lidx),(Rspec,Ridx)))

# --- detect maskable (CoA-internal) nodes ---
nbrs = defaultdict(set)
for _,(Ls,Li),(Rs,Ri) in edges_all:
    Lnode=f"{Ls}:C#{Li}"; Rnode=f"{Rs}:C#{Ri}"
    nbrs[Lnode].add(bname(Rs)); nbrs[Rnode].add(bname(Ls))

maskable=set()
for node, nset in nbrs.items():
    met=bname(node.split(":")[0])
    if met in COA_FAMILY and nset and all(n in COA_FAMILY for n in nset):
        maskable.add(node)

In [17]:
# --- label assignment (base, unique letters; CoA masking when >26 reactant carbons) ---
ALL_LETTERS = list(string.ascii_lowercase)
UNIQUE = [ch for ch in ALL_LETTERS if ch != RESERVED_MASK_LETTER]

def build_labels_basic(pairs):
    """Original behavior: one label per reactant C#, propagate to products; mask CoA if needed."""
    reactants = {f"{Ls}:C#{Li}" for (Ls,Li),_ in pairs}
    need_mask = len(reactants) > 26
    labels=defaultdict(dict); used=set()

    ordered = sorted(reactants, key=lambda x:(x.split(":")[0], int(x.split("#")[1])))
    for node in ordered:
        spec, idx = node.split(":")[0], int(node.split("#")[1])
        if need_mask and node in maskable:
            ltr = RESERVED_MASK_LETTER
        else:
            ltr = next(ch for ch in UNIQUE if ch not in used)
            used.add(ltr)
        labels[spec][idx] = ltr

    for (Ls,Li),(Rs,Ri) in pairs:
        labels[Rs][Ri] = labels[Ls][Li]
    return labels

# --- duplicate-aware labeling (e.g., T_FBAp) ---
def build_labels_with_duplicates(pairs, left_specs, right_specs):
    """
    Handle cases with repeated species on either side.
    Strategy:
      - For each reactant species index (Ls,Li), create as many letters as there are
        occurrences of that species on the left (abc for occ#0, def for occ#1, ...).
      - Sort product mappings by product index and assign the letters *round-robin per reactant index*
        to preserve exact product order (gives fdpBC(adbecf) for FBA).
      - Return per-occurrence maps for both sides plus max index per species.
    """
    # multiplicities in DF order
    left_mult  = Counter(left_specs)
    right_mult = Counter(right_specs)

    # which product indices each reactant index maps to
    prod_targets = defaultdict(list)  # (Ls,Li) -> list of (Rs,Ri)
    for (Ls,Li),(Rs,Ri) in pairs:
        prod_targets[(Ls,Li)].append((Rs,Ri))

    # sort each target list by product index to respect product-side order
    for k in prod_targets:
        prod_targets[k].sort(key=lambda t: (t[0], t[1]))

    # prepare letter pools for each reactant index per occurrence
    from itertools import cycle
    letter_iter = (ch for ch in UNIQUE)  # global unique stream (we won't reuse across occs)
    reactant_letter_for_occ = {}         # (Ls,Li,occ) -> letter

    # per-occurrence letter assignment containers
    L_occ_letters = defaultdict(lambda: defaultdict(dict))  # spec -> occ -> {idx: letter}
    R_occ_letters = defaultdict(lambda: defaultdict(dict))

    # round-robin assignment to product by reactant occurrence
    # also fill the left side occurrences at the same time
    for (Ls,Li), targets in prod_targets.items():
        k = left_mult.get(Ls, 1)  # number of occurrences for this species on the left
        # letters for each occurrence of this reactant index
        letters_for_occs = []
        for occ in range(k):
            ltr = next(letter_iter)
            reactant_letter_for_occ[(Ls,Li,occ)] = ltr
            L_occ_letters[Ls][occ][Li] = ltr
            letters_for_occs.append(ltr)

        rr = cycle(range(k))  # which occurrence gets the next target
        for (Rs,Ri) in targets:
            occ = next(rr) if k > 1 else 0
            ltr = letters_for_occs[occ]
            # place letter into the *corresponding* product occurrence bin
            # based on how many times that product index has been hit so far
            # but to keep product duplicates clean, we’ll split by the number of
            # product-side occurrences for Rs:
            p_k = right_mult.get(Rs, 1)
            # choose occurrence by counting already assigned Ri within Rs
            already = sum(Ri in R_occ_letters[Rs][j] for j in range(p_k))
            occ_p = already if already < p_k else p_k - 1
            R_occ_letters[Rs][occ_p][Ri] = ltr

    # compute max indices for printing order
    L_max = defaultdict(int)
    R_max = defaultdict(int)
    for (Ls,Li), _ in pairs:
        L_max[Ls] = max(L_max[Ls], Li)
    for _, (Rs,Ri) in pairs:
        R_max[Rs] = max(R_max[Rs], Ri)

    return L_occ_letters, R_occ_letters, L_max, R_max

def letters_string(spec, labels):
    if spec not in labels: return ""
    idxmap=labels[spec]
    if not idxmap: return ""
    return "".join(idxmap[i] for i in range(1, max(idxmap)+1) if i in idxmap)

In [18]:
# ---- main assembly from your pandas df ----
def build_modreactions(df):
    rows=[]
    header="\t".join(["#reaction.ID","reactant.IDs(atom)","product.IDs(atom)","reversibility"])
    rows.append(header)

    for _, r in df.iterrows():
        rxn = r["#reaction_ID"]

        # parse species from DF columns (keep order & duplicates)
        left_specs  = [f"{m}[{c}]" for (m,c) in species_pat.findall(str(r["reactant_IDs(atom)"]))]
        right_specs = [f"{m}[{c}]" for (m,c) in species_pat.findall(str(r["product_IDs(atom)"]))]

        pairs = by_rxn.get(rxn, [])

        # detect duplicates
        dup_left  = any(v > 1 for v in Counter(left_specs).values())
        dup_right = any(v > 1 for v in Counter(right_specs).values())

        if pairs and (dup_left or dup_right):
            # duplicate-aware path
            Locc, Rocc, Lmax, Rmax = build_labels_with_duplicates(pairs, left_specs, right_specs)

            def assemble_dups(spec_list, occ_map, max_map, suffix=""):
                # print one entry PER occurrence in the order they appear in DF
                printed = Counter()
                parts = []
                for sp in spec_list:
                    nice = pretty(sp, suffix=suffix)
                    occ = printed[sp]
                    printed[sp] += 1
                    letters = ""
                    if sp in occ_map and occ in occ_map[sp]:
                        letters = "".join(occ_map[sp][occ].get(i,"") for i in range(1, max_map.get(sp,0)+1))
                        letters = "".join(ch for ch in letters if ch)  # compact
                    parts.append(f"{nice}({letters})" if letters else nice)
                return " + ".join(parts)

            left_str  = assemble_dups(left_specs,  Locc, Lmax)
            right_str = assemble_dups(right_specs, Rocc, Rmax)

        else:
            # basic path
            labels = build_labels_basic(pairs) if pairs else {}

            def assemble(specs, suffix=""):
                parts=[]
                for sp in specs:
                    nm = pretty(sp, suffix=suffix)
                    letters = letters_string(sp, labels)
                    parts.append(f"{nm}({letters})" if letters else nm)
                return " + ".join(parts)

            left_str  = assemble(left_specs)
            right_str = assemble(right_specs)

        # exchange rule: mirror non-empty side with '.ex', EXCEPT Biomass
        if rxn.lower() == "biomass":
            right_str = "biomass"   # explicit override
        else:
            if not left_str and right_str:
                # mirror products to reactant side with .ex
                if pairs and (dup_left or dup_right):
                    # need duplicate-aware mirror
                    left_str = assemble_dups(right_specs, Rocc, Rmax, suffix=".ex")
                else:
                    left_str = assemble(right_specs, suffix=".ex")
            elif left_str and not right_str:
                if pairs and (dup_left or dup_right):
                    right_str = assemble_dups(left_specs, Locc, Lmax, suffix=".ex")
                else:
                    right_str = assemble(left_specs, suffix=".ex")

        # normalize reversibility
        rev_raw = str(r["reversibility"]).strip().lower()
        if rev_raw in {"true","1","yes"}: rev="1"
        elif rev_raw in {"false","0","no",""}: rev="0"
        else: rev = str(r["reversibility"])

        rows.append("\t".join([rxn, left_str, right_str, rev]))

    with open(OUTPUT_FILE, "w") as fh:
        fh.write("\n".join(rows))
    print(f"Wrote {OUTPUT_FILE} with {len(df)} reactions.")

In [19]:
build_modreactions(df)

Wrote modreactions.tsv with 95 reactions.
