## 2.4 Model Validation and application
### 2.4.3 Heterotrophic metabolism is redox-limited

This notebook recreates the analyses and figures from section 2.4.3. Flux sampling of the model[1] using Methane, Propane, Isopropanol and Acetone. This analysis is divided in two notebooks, one dedicated to performing the simulations and other dedicted to plotting and data analysis (This).

The notebook is divided in three sections:
1. Summary of reactions with significant changes [1]
3. Summary of the Top 20 reactions ranked by PageRank Centrality [2]

[1] Megchelenbrink W, Huynen M, Marchiori E (2014) optGpSampler: An Improved Tool for Uniformly Sampling the Solution-Space of Genome-Scale Metabolic Networks. PLoS ONE 9(2): e86587. https://doi.org/10.1371/journal.pone.0086587

[2] Beguerisse-Díaz, M., Bosque, G., Oyarzún, D. et al. Flux-dependent graphs for metabolic networks. npj Syst Biol Appl 4, 32 (2018). https://doi.org/10.1038/s41540-018-0067-y

In [1]:
# Set up the modeling environment
from plotly.subplots import make_subplots
from tqdm import tqdm

import plotly.graph_objects as go
import plotly.express as pl
import plotly.io as pio

import pandas as pd
import numpy as np
import json

# Load style and theme templates for the figures
with open('theme_template.json', 'r') as template:
    style = json.load(template)
style["layout"]["height"] = 70 * (600 / 158.75)
style["layout"]["width"] = 160 * (600 / 158.75)

pio.templates["paper"] = go.layout.Template(
    data=style["data"],
    layout=style["layout"]
)
pio.templates.default = "simple_white+paper"

# Load Simulation results
summary_tests = {}
for substrate in ["Propane", "Isopropanol", "Acetone"]:
    summary_tests[substrate] = pd.read_excel(
        "data_files/summary_sampling.xlsx", sheet_name=substrate, index_col="Unnamed: 0"
    )
    
pagerank = {}
for substrate in ["Methane", "Propane", "Isopropanol", "Acetone"]:
    pagerank[substrate]= pd.read_csv(f"data_files/pagerank_{substrate}.csv")
    pagerank[substrate].drop(columns="Unnamed: 0", inplace=True)

# Dataframe used to group reactions by pathway
rxn_colors = pd.read_csv("data_files/reaction_pathways.csv", index_col="rxn_id")

# Model in table format
model_csv = pd.read_excel("../model_files/iMFP2023.xls", sheet_name="reactions", index_col="rxn_id")

**Section 1** Summary of reactions with significant changes

In [2]:
logf_cutoff = 0.5
cutoff = 0.2

# Identify significant flux changes in each condition
diff_propane_ks = set(summary_tests["Propane"].query("abs(log2F)>@logf_cutoff").query("kstest>@cutoff").index)
diff_propanol_ks = set(summary_tests["Isopropanol"].query("abs(log2F)>@logf_cutoff").query("kstest>@cutoff").index)
diff_acetone_ks = set(summary_tests["Acetone"].query("abs(log2F)>@logf_cutoff").query("kstest>@cutoff").index)

# Merge all significant reactions from the three conditions
significant_rxns = diff_propane_ks.union(diff_propanol_ks).union(diff_acetone_ks)

# Remove exchange and transport reactions from the list of significant reactions
false_positives = ['2PPOHtex', 'ACETONEtex', 'C3H8tex', 'CH4tex', 'EX_2ppoh_e',
                   'N2tex_r', 'N2trpp', 'NIT1c', 'EX_acetone_e', 'MTHGXLtabcpp',
                   'NITR_NOpp', 'NO2tpp', 'NO3t7p', 'NORpp', 'EX_c3h8_e',
                   "ACtex_r", "HAO2pp", "SUCCtex", 'EX_h2o_e',
                   'FALDtpp', "H2Otex", 'EX_o2_e', 'O2tex', 'FACOAL120t2pp',
                   'EX_for_e','FORtex', 'PPA', 'H2Otpp', "FALDHpp", "FALTHFL"]

for id in false_positives:
    if id in significant_rxns:
        significant_rxns.remove(id)
significant_rxns = set(significant_rxns)
not_significant_rxns = ~summary_tests[substrate].index.isin(significant_rxns)



In [3]:
# Use this command to explore significant reactions in each condition
summary_tests["Propane"].loc[significant_rxns, ["median", "log2F"]].join(model_csv.loc[:, "Reaction"]).join(rxn_colors.iloc[:, 0])

Unnamed: 0,median,log2F,Reaction,Level_1
C3H8OXq8,1.149050e+00,1.000000,c3h8_p + mql8_p + o2_p --> 2ppoh_p + h2o_p + m...,C3 metabolism
LDH_D,1.130620e+00,1.000000,lac__D_c + nad_c <=> h_c + nadh_c + pyr_c,C3 metabolism
UPPN,9.602538e-07,-0.345781,cala_c + h2o_c + 2.0 h_c --> ala_B_c + co2_c +...,Cofactors Biosynthesis
AKGDH,5.256019e-02,1.525241,akg_c + coa_c + nad_c --> co2_c + nadh_c + suc...,TCA cycle
PC1MT,8.613562e-07,0.821944,amet_c + pc1_c --> ahcys_c + dscl_c,Cofactors Biosynthesis
...,...,...,...,...
IPPOHDHpp,1.140987e+00,1.000000,2ppoh_p + 2.0 ficytC_p --> acetone_p + 2.0 foc...,C3 metabolism
DURIPP,4.022523e-02,-0.607358,duri_c + pi_c <=> 2dr1p_c + ura_c,Nucleotides/sides Degradation
GAPDy,4.495533e-01,-2.261278,13dpg_c + h_c + nadph_c --> g3p_c + nadp_c + pi_c,CO<sub>2</sub> Fixation
NDPK8,6.496544e-06,2.453512,atp_c + dadp_c --> adp_c + datp_c,Nucleotides/sides Biosynthesis


In [31]:
# Volcano plots
# Set a color for each pathway
c = style["layout"]["colorway"]
color_pathway = dict(
    zip(rxn_colors["Level_1"].unique(), c)
)

# Set the color to each reaction based on the pathway
for id in rxn_colors.index:
    pathway = rxn_colors.loc[id, "Level_1"]
    rxn_colors.loc[id, "color"] = color_pathway[pathway]

styling = {
    "Propane": {"color": "#B82E2E", "symbol": "diamond"},
    "Isopropanol": {"color": "#66AA00", "symbol": "square"},
    "Acetone": {"color": "#DD4477", "symbol": "x"}
}

sum_pathways = {}
fig_sampling = {}
for substrate in ["Propane", "Isopropanol", "Acetone"]:
    
    # Create a figure object
    fig_sampling[substrate] = make_subplots(rows=1, cols=2)
    # Create a dataframe to store log2 FC values by pathway
    sum_pathways[substrate] = pd.DataFrame()
    
    # Recover significant reactions
    log2F_significant = summary_tests[substrate].query("log2F>-6").query("abs(log2F)>@logf_cutoff").query("kstest>@cutoff")["log2F"]
    kstest_significant = summary_tests[substrate].query("log2F>-6").query("abs(log2F)>@logf_cutoff").query("kstest>@cutoff")["kstest"]
    
    for id in log2F_significant.index:
        if "EX_" not in id:
            if id not in false_positives:
                
                # Plot each reaction
                fig_sampling[substrate].add_trace(
                    go.Scatter(
                        x=[log2F_significant[id]],
                        y=[kstest_significant[id]],
                        mode="markers", name=substrate, showlegend=False,
                        marker={"color": rxn_colors.loc[id, "color"],
                                "symbol": styling[substrate]["symbol"],
                                "line": {"width": 0.5, "color": "rgb(0,0,0)"}},
                        text=[id]
                    ),
                    row=1, col=1
                )
                
                # Acumulator of log2 FC by pathway
                try:
                    sum_pathways[substrate].loc[rxn_colors.loc[id, "Level_1"], "sum"] += log2F_significant[id]
                except KeyError:
                    sum_pathways[substrate].loc[rxn_colors.loc[id, "Level_1"], "sum"] = log2F_significant[id]
    
    # Recover non-significant reactions
    log2F = summary_tests[substrate].query("log2F>-6").query("abs(log2F)<@logf_cutoff").query("kstest<@cutoff")["log2F"]
    kstest = summary_tests[substrate].query("log2F>-6").query("abs(log2F)<@logf_cutoff").query("kstest<@cutoff")["kstest"]
    
    # Plot non-significant reactions
    fig_sampling[substrate].add_trace(
        go.Scatter(
            x=log2F,
            y=kstest,
            mode="markers", name=substrate, showlegend=False,
            marker={"color": "rgb(0,0,0)", "symbol": styling[substrate]["symbol"]},
        ),
        row=1, col=1
    )

    # Bar plot of log2 FC by pathway
    sum_pathways[substrate].sort_values("sum", ascending=True, inplace=True)
    sum_pathways[substrate] = sum_pathways[substrate].join(pd.DataFrame(color_pathway, index=["color"]).transpose())
    for path in sum_pathways[substrate].index:
        fig_sampling[substrate].add_trace(
            go.Bar(
                x=[sum_pathways[substrate].loc[path, "sum"]], y=[path], orientation="h",
                marker={"color": sum_pathways[substrate].loc[path, "color"]}, showlegend=False
            ),
            row=1, col=2
        )
    
    # Figure styling
    fig_sampling[substrate].update_yaxes(
        title_text="KS-value",
        row=1, col=1
    )
    fig_sampling[substrate].update_xaxes(
        title_text="log2 FC",
        row=1, col=1
    )
    fig_sampling[substrate].update_yaxes(
        side="right",
        tickfont_size= 10,
        row=1, col=2
    )
    fig_sampling[substrate].update_xaxes(
        title_text="sum(log2 FC)",
        range= [-15, 15],
        row=1, col=2
    )
    fig_sampling[substrate].show()



**Section 2** Plot of most central reactions

In [35]:
# Remove Exchanges and transport reactions
false_positives = ['2PPOHtex', 'ACETONEtex', 'C3H8tex', 'CH4tex', 'MEOHDHpp', "EX_co2_e", "PPA",
                   "C3H8OXq8", "IPPOHDHpp", "atpm", "CO2tpp_r", "H2Otex_r", "CO2tex_r", "ATPS1",
                   'NITR_NOpp', 'NO2tpp', 'NO3t7p', 'NORpp', 'PMMOipp', "ACTNMOq8", "H2Otpp_r",
                   "ACtex_r", "SUCCtex", 'EX_h2o_e', 'ACOHDHpp', 'MTHGXLtabcpp', "BIOMASS_Mfumariolicum_Pic",
                   'MDH', 'FALDtpp', 'ACOHDHpp', "H2Otex", "NDPK7", "RNDR3", 'EX_o2_e', "EX_h_e"]

# Recover PageRank values for top 20 reactions
no_top_rxns = 20
substrate = "Propane"
top_reactions = pagerank[substrate].drop(columns=false_positives).mean(0).sort_values(ascending=False).head(no_top_rxns).index

# Pre-process dataset in the format requered for plotting functions
top_pagerank = pd.DataFrame()
for substrate in ["Methane", "Propane", "Isopropanol", "Acetone"]:
    for id in tqdm(top_reactions):
        tmp = pd.DataFrame(pagerank[substrate][id].to_list(), columns=["PR-Centrality"])
        tmp.loc[:, "Substrate"] = substrate
        tmp.loc[:, "Reaction"] = id
        top_pagerank = top_pagerank.append(tmp)

# Draw figure
fig_prcentrality = pl.box(top_pagerank, y="PR-Centrality", x="Reaction", color="Substrate", points=False)
fig_prcentrality.update_layout(
    {"yaxis": {"title": {"text": "PR-Centrality"},
               "tickfont": {"size": 14},
               "range": [-0.01, 0.15]},
     "xaxis": {"tickfont": {"size": 14},
               "tickangle": 20,
               "title": {"font": {"size": 14}}
               },
     "height": 100 * (600 / 158.75),
     "width": 250 * (600 / 158.75),
     "legend": {"title": {"text": None},
                "font": {"size": 14},
                "x": 1, "y": 1}
     }
)
fig_prcentrality.show()

100%|██████████| 20/20 [00:00<00:00, 606.39it/s]
100%|██████████| 20/20 [00:00<00:00, 582.19it/s]
100%|██████████| 20/20 [00:00<00:00, 508.84it/s]
100%|██████████| 20/20 [00:00<00:00, 459.98it/s]
