In [2]:
import sys
import os
from cobra.io import read_sbml_model

sys.path.append(os.path.abspath("../../src"))

model = read_sbml_model("../../models/fixed_model_V2/MAG013.sbml_gapfilled_noO2_v3.xml")


print(f"模型中包含 {len(model.reactions)} 个反应，{len(model.metabolites)} 个代谢物，{len(model.genes)} 个基因。")


Set parameter Username
Set parameter LicenseID to value 2663970
Academic license - for non-commercial use only - expires 2026-05-12
模型中包含 835 个反应，853 个代谢物，367 个基因。


## 测试

In [54]:
print(f"模型中包含 {len(model.reactions)} 个反应，{len(model.metabolites)} 个代谢物，{len(model.genes)} 个基因。")


模型中包含 836 个反应，900 个代谢物，367 个基因。


In [3]:
model.objective = 'EX_cpd00211_e0'
solution = model.optimize()
print("flux：", solution.objective_value)


flux： 0.0


In [4]:
model.objective = 'bio1'
solution = model.optimize()
print("Biomass flux：", solution.objective_value)


Biomass flux： 17.706736080016622


## 断点查询

In [5]:
for rxn in model.reactions:
    if "cpd00211" in rxn.build_reaction_string():
        print(rxn.id, ":", rxn.reaction)


rxn00994_c0 : cpd00120_c0 + cpd00142_c0 <=> cpd00211_c0 + cpd00279_c0
rxn00875_c0 : cpd00029_c0 + cpd00120_c0 <=> cpd00022_c0 + cpd00211_c0
EX_cpd00211_e0 : cpd00211_e0 <=> 
TR_cpd00211_c0 : cpd00211_c0 --> cpd00211_e0


In [6]:
for rxn_id in ["rxn00994_c0", "rxn00875_c0", "TR_cpd00211_c0", "EX_cpd00211_e0"]:
    rxn = model.reactions.get_by_id(rxn_id)
    print(f"{rxn.id} bounds: {rxn.bounds}")


rxn00994_c0 bounds: (-1000.0, 1000.0)
rxn00875_c0 bounds: (-1000.0, 1000.0)
TR_cpd00211_c0 bounds: (0.0, 1000.0)
EX_cpd00211_e0 bounds: (-1000.0, 1000.0)


In [7]:
from graph_analysis import  trace_flux_break, build_flux_graph

model.objective = "EX_cpd00211_e0"
solution = model.optimize()

G = build_flux_graph(model, solution)

trace_flux_break(G, "cpd00211_c0") 


 Starting from: cpd00211_c0
 Path blocked at:
  - rxn00875_c0: cpd00120_c0 ➝ cpd00211_c0 | flux=0.0


In [8]:
from graph_analysis import find_root_blockers

blocked_roots = find_root_blockers(model, solution, "cpd00211_c0")

print("\n 阻断根节点:")
for b in blocked_roots:
    met = model.metabolites.get_by_id(b)
    print(f"{b} - {met.name if hasattr(met, 'name') else ''}")


 阻断根节点:
cpd03734_c0 - Acetamide_c0
cpd00120_c0 - Butyryl-CoA_c0
cpd00891_c0 - L-methylmalonyl-CoA_c0
cpd00790_c0 - O-Acetyl-L-homoserine_c0
cpd02835_c0 - UDP-2,3-bis(3-hydroxytetradecanoyl)glucosamine_c0
cpd02685_c0 - N-acetyl-LL-2,6-diaminopimelate_c0


In [9]:
blocked_roots

{'cpd00120_c0',
 'cpd00790_c0',
 'cpd00891_c0',
 'cpd02685_c0',
 'cpd02835_c0',
 'cpd03734_c0'}

## 社区检测


In [10]:
import networkx as nx

G = nx.read_graphml("cpd00211_community.graphml")

graph_rxns = set()
for u, v, data in G.edges(data=True):
    if 'reaction' in data:
        rxn_id = data['reaction']
        base_rxn = rxn_id.split("_")[0] 
        graph_rxns.add(base_rxn)


In [11]:
def compare_with_model(model, community_rxns):
    model_rxn_ids = set()
    for rxn in model.reactions:
        base_id = rxn.id.split("_")[0] 
        model_rxn_ids.add(base_id)
    
    missing = community_rxns - model_rxn_ids
    return missing

missing_rxns = compare_with_model(model, graph_rxns)
print(f"模型中缺失的反应数: {len(missing_rxns)}")
print("缺失的反应 ID 示例：", list(missing_rxns)[:10])


模型中缺失的反应数: 31329
缺失的反应 ID 示例： ['rxn35196', 'rxn34578', 'rxn27280', 'rxn42949', 'rxn43923', 'rxn47105', 'rxn42631', 'rxn21454', 'rxn39971', 'rxn38609']


In [14]:
import pandas as pd

def parse_equation_reactants(equation):
    """
    通用解析 equation，提取反应物（无论是 <=>、=>、<=）
    """
    try:
        if '<=>' in equation:
            arrow = '<=>'
        elif '=>' in equation:
            arrow = '=>'
        elif '<=' in equation:
            arrow = '<='
        else:
            return []

        lhs = equation.split(arrow)[0]
        compounds = []

        for token in lhs.split('+'):
            token = token.strip()
            if 'cpd' in token:
                cpd = token.split('cpd')[1].split('[')[0]
                compartment = token.split('[')[-1].replace(']', '')
                comp_code = '_c0' if compartment == '0' else '_e0'
                cpd_full = 'cpd' + cpd + comp_code
                compounds.append(cpd_full)

        return compounds

    except Exception as e:
        print(" equation 解析失败：", equation, "| 错误：", e)
        return []


def filter_blocker_related_reactions(reaction_file, blocked_metabolites, missing_reactions):
    df = pd.read_csv(reaction_file, sep='\t')
    matched = []

    for _, row in df.iterrows():
        rxn_id = row['id']
        if rxn_id not in missing_reactions:
            continue
        reactants = parse_equation_reactants(str(row['equation']))
        if any(met in reactants for met in blocked_metabolites):
            matched.append({
                'reaction_id': rxn_id,
                'reactants': reactants,
                'equation': row['equation'],
                'name': row.get('name', '')
            })

    return pd.DataFrame(matched)


blocked_roots = blocked_roots


reaction_file_path = '../../data/database/reactions.tsv'

result_df = filter_blocker_related_reactions(
    reaction_file=reaction_file_path,
    blocked_metabolites=blocked_roots,
    missing_reactions = missing_rxns
)

# 保存结果
result_df.to_csv("recommended_gapfill_from_blockers_v1.csv", index=False)
print("recommended_gapfill_from_blockers saved")


recommended_gapfill_from_blockers saved


In [16]:
len(result_df)

55

In [None]:
import re
from cobra import Reaction, Metabolite

def build_reaction_from_equation(model, rxn_id, equation, name=""):
    rxn = Reaction(rxn_id)
    rxn.name = name
    rxn.lower_bound = -1000
    rxn.upper_bound = 1000

    # 判断方向符号
    if "<=>" in equation:
        lhs, rhs = equation.split("<=>")
    elif "=>" in equation:
        lhs, rhs = equation.split("=>")
        rxn.lower_bound = 0
    elif "<=" in equation:
        rhs, lhs = equation.split("<=")
        rxn.lower_bound = 0
    else:
        return None

    def parse_side(side, sign):
        met_dict = {}
        for m in side.split("+"):
            m = m.strip()
            match = re.match(r"\((\d+)\)\s*(cpd\d+)\[(\d)\]", m)
            if match:
                coef, met_base, comp = match.groups()
                met_id = f"{met_base}_{'c0' if comp == '0' else 'e0'}"
                if met_id in model.metabolites:
                    met = model.metabolites.get_by_id(met_id)
                else:
                    met = Metabolite(met_id, compartment="c" if comp == "0" else "e")
                met_dict[met] = sign * int(coef)
        return met_dict

    # 添加底物和产物
    mets = {}
    mets.update(parse_side(lhs, -1))
    mets.update(parse_side(rhs, 1))
    rxn.add_metabolites(mets)
    return rxn

def test_gapfill_reaction_activity(model, gapfill_df):
    results = []

    for _, row in gapfill_df.iterrows():
        rxn_id = row['reaction_id']
        rxn_name = row.get('name', '')
        equation = str(row['equation']).strip('"').strip()

        rxn = build_reaction_from_equation(model, rxn_id, equation, rxn_name)
        if rxn is None or not rxn.metabolites:
            results.append({
                "reaction_id": rxn_id,
                "name": rxn_name,
                "status": "build_failed",
                "reaction_flux": None
            })
            continue

        model.add_reactions([rxn])
        model.objective = rxn

        try:
            flux = model.optimize().objective_value
            results.append({
                "reaction_id": rxn_id,
                "name": rxn_name,
                "status": "tested",
                "reaction_flux": flux
            })
        except Exception as e:
            results.append({
                "reaction_id": rxn_id,
                "name": rxn_name,
                "status": "optimize_error",
                "reaction_flux": None
            })

        model.reactions.get_by_id(rxn_id).remove_from_model()

    return pd.DataFrame(results)

In [47]:
gapfill_df = pd.read_csv("recommended_gapfill_from_blockers_v1.csv")
results = test_gapfill_reaction_activity(model, gapfill_df)
#results.to_csv("tested_gapfill_results.csv", index=False)
results

Unnamed: 0,reaction_id,name,status,reaction_flux
0,rxn00871,butanoyl-CoA:phosphate butanoyltransferase,tested,0.0
1,rxn02425,R03383,tested,0.0
2,rxn03086,R04467,tested,0.0
3,rxn09630,thiazole phosphate synthesis (ribose 5-phospha...,tested,0.0
4,rxn09631,thiazole phosphate synthesis (xylulose 5-phosp...,tested,0.0
5,rxn10074,fatty acid oxidation (Butanoyl-CoA ),tested,0.0
6,rxn13069,Acyl-coenzyme A oxidase,tested,0.0
7,rxn13713,4-hydroxybutyrate CoA transferase,tested,0.0
8,rxn14157,butanoyl-CoA:carbon-dioxide ligase (ADP-forming),tested,0.0
9,rxn15529,Acyl-CoA:methylmalonyl-CoA C-acyltransferase (...,tested,0.0


In [49]:
def try_gapfill_reactions(model, gapfill_df, objective_rxn, flux_threshold=1e-6):
    """
    对每一条 gapfill 反应：
    - 添加到模型
    - 设置统一的 objective_rxn
    - 观察是否能恢复目标通量

    返回一个包含 reaction_id、reaction_flux 的 DataFrame
    """
    results = []

    for _, row in gapfill_df.iterrows():
        rxn_id = row['reaction_id']
        rxn_name = row.get('name', '')
        equation = str(row['equation']).strip()

        rxn = build_reaction_from_equation(model, rxn_id, equation, rxn_name)
        if rxn is None:
            results.append({
                "reaction_id": rxn_id,
                "name": rxn_name,
                "status": "build_failed",
                "flux": None
            })
            continue

        # 添加并设置目标函数
        model.add_reactions([rxn])
        model.objective = objective_rxn

        try:
            flux = model.optimize().objective_value
            results.append({
                "reaction_id": rxn_id,
                "name": rxn_name,
                "status": "tested",
                "flux": flux
            })
        except Exception as e:
            results.append({
                "reaction_id": rxn_id,
                "name": rxn_name,
                "status": "optimize_error",
                "flux": None,
                "error": str(e)
            })

        # 清理
        model.reactions.get_by_id(rxn_id).remove_from_model()

    return pd.DataFrame(results)

result = try_gapfill_reactions( model, gapfill_df, "EX_cpd00211_e0" )
result

Unnamed: 0,reaction_id,name,status,flux
0,rxn00871,butanoyl-CoA:phosphate butanoyltransferase,tested,0.0
1,rxn02425,R03383,tested,0.0
2,rxn03086,R04467,tested,0.0
3,rxn09630,thiazole phosphate synthesis (ribose 5-phospha...,tested,0.0
4,rxn09631,thiazole phosphate synthesis (xylulose 5-phosp...,tested,0.0
5,rxn10074,fatty acid oxidation (Butanoyl-CoA ),tested,0.04966
6,rxn13069,Acyl-coenzyme A oxidase,tested,0.0
7,rxn13713,4-hydroxybutyrate CoA transferase,tested,0.0
8,rxn14157,butanoyl-CoA:carbon-dioxide ligase (ADP-forming),tested,0.0
9,rxn15529,Acyl-CoA:methylmalonyl-CoA C-acyltransferase (...,tested,0.0


In [50]:
gapfill_df = pd.read_csv("recommended_gapfill_from_blockers_v1.csv")

# === STEP 3: 按 reaction_id 选择你要添加的反应（例如 rxn10074）===
target_rxn_id = "rxn10074"
row = gapfill_df[gapfill_df["reaction_id"] == target_rxn_id].iloc[0]
rxn_id = row["reaction_id"]
equation = row["equation"]
name = row.get("name", "")

# === STEP 4: 构建并添加到模型 ===
rxn = build_reaction_from_equation(model, rxn_id, equation, name)
if rxn:
    model.add_reactions([rxn])
    print(f"已添加反应: {rxn.id}")
else:
    print("失败")

已添加反应: rxn10074


In [None]:
model.objective = "EX_cpd00211_e0"
solution = model.optimize()


print(f"Butyrate export flux = {solution.objective_value}")


Butyrate export flux = 0.049659949958719


In [53]:
blocked_roots = find_root_blockers(model, solution, "cpd00211_c0")

print("\n 阻断根节点:")
for b in blocked_roots:
    met = model.metabolites.get_by_id(b)
    print(f"{b} - {met.name if hasattr(met, 'name') else ''}")


 阻断根节点:


In [55]:
from cobra.io import write_sbml_model


write_sbml_model(model, "../../models/fixed_model_V2/MAG013.sbml_gapfilled_noO2.xml")
