Imports

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from src.gem.yeast8model import Yeast8Model

# Initialise model

In [None]:
glc_exch_rate = 16.89
wt_ec = Yeast8Model("../data/gemfiles/ecYeastGEM_batch_8-6-0.xml")
wt_ec.model.reactions.get_by_id("r_1714").bounds = (-glc_exch_rate, 0)
wt_ec.model.reactions.get_by_id("r_1714_REV").bounds = (0, glc_exch_rate)

# Ablate and store fluxes in each round

In [None]:
wt_ec.ablation_result = wt_ec.ablate()

ablation_fluxes = wt_ec.ablation_fluxes
ablation_fluxes_diff = ablation_fluxes.copy()
ablation_fluxes_diff.pop("original")
for biomass_component, fluxes in ablation_fluxes_diff.items():
    ablation_fluxes_diff[biomass_component] = (
        ablation_fluxes[biomass_component] - ablation_fluxes["original"]
    )
    print(f"{biomass_component}")
    print(f"min {1e5 * ablation_fluxes_diff[biomass_component].min()} * 1e-5")
    print(f"max {1e5 * ablation_fluxes_diff[biomass_component].max()} * 1e-5")

# Load subsystems lookup

In [None]:
# Load subsystems.csv as a lookup table, convert to dict for speed
subsystem_df = pd.read_csv('easy_subsystems.csv', index_col=0)
subsystem_dict = dict(zip(
    subsystem_df.reaction_id.to_list(),
    subsystem_df.subsystem.to_list()
))

# Number of occurences of subsystems in top N positive/negative fluxes

In [None]:
# Choose component
biomass_component = 'lipid'
s = ablation_fluxes_diff[biomass_component].copy()

# Get top (False) or bottom (True) N
n = 100
s = s.sort_values(ascending=False)[:n]

In [None]:
def plot_subsystem_freqs(s, ax):
    # Extract protein IDs
    rxn_idx_list = s.index.to_list()
    enz_metabolite_ids = [rxn_idx.replace("draw_", "") + "[c]" for rxn_idx in rxn_idx_list]

    # Get participating reaction IDs
    participating_rxn_ids = []
    for enz_metabolite_id in enz_metabolite_ids:
        enz_participating_rxns = list(wt_ec.model.metabolites.get_by_id(enz_metabolite_id)._reaction)
        enz_participating_rxn_ids = [enz_participating_rxn.id for enz_participating_rxn in enz_participating_rxns]
        participating_rxn_ids.extend(enz_participating_rxn_ids)
    # unique
    participating_rxn_ids = list(set(participating_rxn_ids))

    # Subsystems that correspond to the participating reactions
    subsystem_list = [
        subsystem_dict[rxn_id[:6]]
        for rxn_id in participating_rxn_ids
        if rxn_id[:2] == "r_"
    ]

    # Frequency table & plot
    subsystem_freqs = pd.Series(subsystem_list).value_counts()
    subsystem_freqs.plot.barh(ax)
    ax.invert_yaxis()
    ax.set_xlabel('Number of occurences of subsystem')

In [None]:
fig, ax = plt.subplots()
plot_subsystem_freqs(s, ax)

Loop

In [None]:
n = 100

#fig, ax = plt.subplots(nrows=len(ablation_fluxes_diff.items()), ncols=1, figsize=(7,42))

for idx, (biomass_component, fluxes) in enumerate(ablation_fluxes_diff.items()):
    fig, ax = plt.subplots()
    s = fluxes.copy()
    s = s.sort_values(ascending=False)[:n]
    plot_subsystem_freqs(s, ax)
    ax.set_title(biomass_component)

# Sum of fluxes belonging to each subsystem, from all positive or negative fluxes

In [None]:
def plot_subsystem_sumfluxes(s, ax):
    # Extract protein ids
    rxn_idx_list = s.index.to_list()
    enz_metabolite_ids = [rxn_idx.replace("draw_", "") + "[c]" for rxn_idx in rxn_idx_list]

    # Get participating reaction ids
    participating_rxn_ids = []
    enz_usage_fluxes = []
    for idx, enz_metabolite_id in enumerate(enz_metabolite_ids):
        enz_participating_rxns = list(wt_ec.model.metabolites.get_by_id(enz_metabolite_id)._reaction)
        enz_participating_rxn_ids = [enz_participating_rxn.id for enz_participating_rxn in enz_participating_rxns]
        participating_rxn_ids.extend(enz_participating_rxn_ids)
        enz_usage_fluxes.extend([s[idx]] * len(enz_participating_rxn_ids))

    # Subsystems that correspond to the participating reactions
    subsystem_list = [
        subsystem_dict[rxn_id[:6]]
        if rxn_id[:2] == "r_"
        else 'Enzyme usage'
        for rxn_id in participating_rxn_ids
    ]
    
    # Construct new DF
    participating_rxn_df = pd.DataFrame(
        {
            'participating_rxn_id': participating_rxn_ids,
            'subsystem': subsystem_list,
            'enz_usage_flux': enz_usage_fluxes,
        }
    )
    
    # Pivot table
    table = pd.pivot_table(
        participating_rxn_df, values='enz_usage_flux', index='subsystem', aggfunc=np.sum
    )
    table.drop(['Enzyme usage'], inplace=True)
    table = table.sort_values(ascending=False, by='enz_usage_flux')
    
    # Draw
    table.plot.barh(ax=ax)
    ax.invert_yaxis()

In [None]:
# Choose component
biomass_component = 'lipid'
s = ablation_fluxes_diff[biomass_component].copy()

In [None]:
s_negative = s[s < 0]
s_positive = s[s > 0]

In [None]:
fig, ax = plt.subplots()
plot_subsystem_sumfluxes(-s_negative, ax)

Loop

In [None]:
for idx, (biomass_component, fluxes) in enumerate(ablation_fluxes_diff.items()):
    fig, ax = plt.subplots(nrows=1, ncols=2)
    s = fluxes.copy()
    s_negative = s[s < 0]
    s_positive = s[s > 0]
    plot_subsystem_sumfluxes(s_positive, ax[0])
    ax[0].set_title(f'{biomass_component}, flux increases')
    plot_subsystem_sumfluxes(-s_negative, ax[1])
    ax[1].set_title(f'{biomass_component}, flux decreases')