In [1]:
import numpy as np
import pandas as pd

import cobra
from cameo import pfba
from cameo.flux_analysis.simulation import room, lmoma

import escher

In [2]:
def show_escher(result):
    escher.Builder(
    map_name="e_coli_core.Core metabolism",
    reaction_data=result.fluxes.to_dict(),
).display_in_browser()

In [3]:
def prepare_dataframe(
    solution, sample="WT", author="iML1515", glucose_flux_id="EX_glc__D_e"
):
    # Prepare basic DataFrame
    df = pd.DataFrame(solution.fluxes)
    df.index = df.index.rename("ID")
    df = df.reset_index()
    df = df.assign(sample_id=sample)

    # rename some columns to be more compatible with other simulations
    df = df.assign(author=author)
    df = df.assign(BiGG_ID=df.ID)
    df = df.rename({"fluxes": "flux"}, axis=1)

    # calculate normalized fluxes
    glucose_uptake = -1 * df[df["ID"] == glucose_flux_id]["flux"].values[0]
    df = df.assign(normalized_flux=lambda x: x.flux * 100 / glucose_uptake)
    return df

## Define IDs of knocked-out reactions

In [4]:
knockouts = [
    {"gene": "fbaA", "id": "FBA"},
    {"gene": "fbaB", "id": "FBA"},
    {"gene": "fbp", "id": "FBP"},
    {"gene": "gnd", "id": "GND"},
    {"gene": "pfkA", "id": "PFK"},
    {"gene": "pfkB", "id": "PFK"},
    {"gene": "pgi", "id": "PGI"},
    {"gene": "pgl", "id": "PGL"},
    {"gene": "ppsA", "id": "PPS"},
    {"gene": "pts", "id": "GLCptspp"},
    {"gene": "pykA", "id": "PYK"},
    {"gene": "pykF", "id": "PYK"},
    {"gene": "rpe", "id": "RPE"},
    {"gene": "rpiA", "id": "RPI"},
    {"gene": "rpiB", "id": "RPI"},
    {"gene": "sdhCD", "id": "SUCDi"},
    {"gene": "sucAB", "id": "AKGDH"},
    {"gene": "talA", "id": "TALA"},
    {"gene": "tktA", "id": "TKT1"},
    {"gene": "tktB", "id": "TKT1"},
    {"gene": "tpi", "id": "TPI"},
    {"gene": "zwf", "id": "G6PDH2r"},
]

## Dilution simulation

### iML1515 model

In [5]:
model = cobra.io.load_json_model('../../../DataAnalysis/DataIntegrationProject/models/iML1515.json')

In [6]:
# Minimize glucose consumption from medium - equiv to max exchange. 
model.objective = "EX_glc__D_e"
model.objective_direction = "max"
# make glucose uptake to be through Pts
model.reactions.GLCptspp.bounds = (0.0,1000)

# Simulate D 0.2 h-1
model.reactions.BIOMASS_Ec_iML1515_core_75p37M.bounds = (0.19,0.21)
# Glucose uptake rate should more or less match Ishii, 2007
# GUR for WT @ aerobic glucose = 2.87 mmol/h/gDW
model.reactions.GLCptspp.bounds = (2.86,2.88)

dilution_result = pfba(model)
all_data = pd.DataFrame()

In [7]:
# write WT information
df = prepare_dataframe(dilution_result, author="iML1515", sample="WT", glucose_flux_id="EX_glc__D_e")

df.to_csv('../data/simulation_results/COBRA/WT_02.csv')

In [8]:
all_data = pd.concat([all_data, df])

In [9]:
for reaction in knockouts:
    with model:
        ko_gene = reaction["gene"]
        # Simulate knockout
        model.reactions.get_by_id(reaction["id"]).knock_out()
        lmoma_result = lmoma(model, reference=dilution_result.fluxes)
        df = prepare_dataframe(lmoma_result, author="iML1515", sample=ko_gene, glucose_flux_id="EX_glc__D_e")
        df.to_csv(f"../data/simulation_results/COBRA/delta_{ko_gene}.csv")

        all_data = pd.concat([all_data, df], sort=False)

In [10]:
all_data.to_csv("../data/simulation_results/COBRA/iML1515/knockouts_all.csv")

## Make calculations for the e_coli_core model

In [11]:
model = cobra.io.load_json_model('../../../DataAnalysis/DataIntegrationProject/models/e_coli_core.json')

In [12]:
model.objective = "EX_glc__D_e"
model.objective_direction = "max"

# Simulate D 0.2 h-1
model.reactions.BIOMASS_Ecoli_core_w_GAM.bounds = (0.19,0.21)
# Glucose uptake rate should more or less match Ishii, 2007
# GUR for WT @ aerobic glucose = 2.87 mmol/h/gDW
model.reactions.GLCpts.bounds = (2.86,2.88)

dilution_result = pfba(model)
all_data = pd.DataFrame()

In [13]:
# write WT information
df = prepare_dataframe(dilution_result, author="Ec_core", sample="WT", glucose_flux_id="EX_glc__D_e")

df.to_csv('../data/simulation_results/COBRA/core_model/WT_02.csv')

In [14]:
all_data = pd.concat([all_data, df])

In [15]:
for reaction in knockouts:
    with model:        
        ko_gene = reaction["gene"]
        print(f"Worinkg on gene {ko_gene}")
        try:
            # Simulate knockout, it may not grow at all
            model.reactions.get_by_id(reaction["id"]).knock_out()
            lmoma_result = lmoma(model, reference=dilution_result.fluxes)
            df = prepare_dataframe(lmoma_result, author="Ec_core", sample=ko_gene, glucose_flux_id="EX_glc__D_e")
        except:
            print(f"Unable to grow {ko_gene}! Filling flux data with zeros")
            df = prepare_dataframe(dilution_result, sample=ko_gene, author="Ec_core", glucose_flux_id="EX_glc__D_e" )
            df.flux = 0
        
        df.to_csv(f"../data/simulation_results/COBRA/core_model/delta_{ko_gene}.csv")

        all_data = pd.concat([all_data, df], sort=False)

Worinkg on gene fbaA
Worinkg on gene fbaB
Worinkg on gene fbp
Worinkg on gene gnd
Worinkg on gene pfkA
Worinkg on gene pfkB
Worinkg on gene pgi
Worinkg on gene pgl
Worinkg on gene ppsA
Worinkg on gene pts
Unable to grow pts! Filling flux data with zeros
Worinkg on gene pykA
Worinkg on gene pykF
Worinkg on gene rpe
Worinkg on gene rpiA
Unable to grow rpiA! Filling flux data with zeros
Worinkg on gene rpiB
Unable to grow rpiB! Filling flux data with zeros
Worinkg on gene sdhCD
Worinkg on gene sucAB
Worinkg on gene talA
Worinkg on gene tktA
Worinkg on gene tktB
Worinkg on gene tpi
Worinkg on gene zwf


In [17]:
all_data.to_csv("../data/simulation_results/COBRA/core_model/knockouts_all.csv")

## Make calculations for the EColiCore2 model

In [18]:
model = cobra.io.read_sbml_model('../../../DataAnalysis/DataIntegrationProject/models/EColiCore2_compressed_bigg_names.sbml')

Adding exchange reaction EX_Biomass for boundary metabolite: Biomass
Adding exchange reaction EX_4CRSOL_ex for boundary metabolite: 4CRSOL_ex
Adding exchange reaction EX_5DRIB_ex for boundary metabolite: 5DRIB_ex
Adding exchange reaction EX_ac_ex for boundary metabolite: ac_ex
Adding exchange reaction EX_adp_c for boundary metabolite: adp_c
Adding exchange reaction EX_AMOB_ex for boundary metabolite: AMOB_ex
Adding exchange reaction EX_ca2_ex for boundary metabolite: ca2_ex
Adding exchange reaction EX_cl_ex for boundary metabolite: cl_ex
Adding exchange reaction EX_co2_ex for boundary metabolite: co2_ex
Adding exchange reaction EX_coa_c for boundary metabolite: coa_c
Adding exchange reaction EX_cobalt2_ex for boundary metabolite: cobalt2_ex
Adding exchange reaction EX_cu2_ex for boundary metabolite: cu2_ex
Adding exchange reaction EX_etoh_ex for boundary metabolite: etoh_ex
Adding exchange reaction EX_fe2_ex for boundary metabolite: fe2_ex
Adding exchange reaction EX_fe3_ex for boundar

In [19]:
# Minimize glucose consumption from medium - equiv to max exchange. 
model.objective = "EX_glc__D_ex"
model.objective_direction = "max"

# Simulate D 0.2 h-1
model.reactions.Growth.bounds = (0.19,0.21)
# Glucose uptake rate should more or less match Ishii, 2007
# GUR for WT @ aerobic glucose = 2.87 mmol/h/gDW
model.reactions.GLCptspp.bounds = (2.86,2.88)

dilution_result = pfba(model)
all_data = pd.DataFrame()

In [None]:
show_escher(dilution_result)

In [20]:
df = prepare_dataframe(dilution_result, author="ECC2", glucose_flux_id="EX_glc__D_ex")
df.to_csv('../data/simulation_results/COBRA/ECC2/WT_02.csv')

In [21]:
all_data = pd.concat([all_data, df])

In [24]:
for reaction in knockouts:
    with model:        
        ko_gene = reaction["gene"]
        print(f"Working on gene {ko_gene}")
        try:
            # Simulate knockout, it may not grow at all
            model.reactions.get_by_id(reaction["id"]).knock_out()
            lmoma_result = lmoma(model, reference=dilution_result.fluxes)
            df = prepare_dataframe(lmoma_result, author="ECC2", sample=ko_gene, glucose_flux_id="EX_glc__D_ex")
        except:
            print(f"Unable to grow {ko_gene}! Filling flux data with zeros")
            df = prepare_dataframe(dilution_result, sample=ko_gene, author="ECC2", glucose_flux_id="EX_glc__D_ex" )
            df.flux = 0
        
        df.to_csv(f"../data/simulation_results/COBRA/ECC2/delta_{ko_gene}.csv")

        all_data = pd.concat([all_data, df], sort=False)

Working on gene fbaA
Working on gene fbaB
Working on gene fbp
Working on gene gnd
Working on gene pfkA
Working on gene pfkB
Working on gene pgi
Working on gene pgl
Working on gene ppsA
Working on gene pts
Working on gene pykA
Working on gene pykF
Working on gene rpe
Working on gene rpiA
Unable to grow rpiA! Filling flux data with zeros
Working on gene rpiB
Unable to grow rpiB! Filling flux data with zeros
Working on gene sdhCD
Working on gene sucAB
Working on gene talA
Working on gene tktA
Working on gene tktB
Working on gene tpi
Working on gene zwf


In [25]:
all_data.to_csv("../data/simulation_results/COBRA/ECC2/knockouts_all.csv")

## Constrain WT model by experimentally measured fluxes

### Load experimental data

In [26]:
ishii_df = pd.read_csv("../data/datasets/ishii2007_tidy.csv")

In [27]:
df = ishii_df
# this regexp matches deletions starting with d like dpgi
df["sample_id"] = df.Genotype.str.extract(r"d(\w+)")
df.loc[df.Genotype == "WT", "sample_id"] = "WT"

df = df.assign(author="Ishii")
df = df.rename(
    {
        "Measurement_ID": "BiGG_ID",
        "Original_Value": "normalized_flux",
        "Value": "flux",
        "Original_ID": "ID",
    },
    axis=1,
)
df = df[df['Measurement_Type'] == 'flux']
df.loc[df["BiGG_ID"] == "PYKF", "BiGG_ID"] = "PYK"

df = df[["flux", "ID", "BiGG_ID", "author", "sample_id", "normalized_flux"]]
exp_results = df

In [28]:
exp_data = exp_results.query("sample_id == 'WT'")

In [29]:
exp_data.head()

Unnamed: 0,flux,ID,BiGG_ID,author,sample_id,normalized_flux
991,0.0,ACALD,ACALD,Ishii,WT,0.0
992,2.4167,ACONTa,ACONTa,Ishii,WT,84.5
993,1.6874,AKGDH,AKGDH,Ishii,WT,59.0
994,2.4167,CS,CS,Ishii,WT,84.5
995,0.2431,EX_akg(e),EX_akg(e),Ishii,WT,8.5


### Identify which fluxes could be constrainted

In [30]:
common_fluxes = set(dilution_result.fluxes.index).intersection(set(exp_data.BiGG_ID.unique()))
common_fluxes.remove("RPI")

In [31]:
common_fluxes

{'ACALD',
 'ACONTa',
 'AKGDH',
 'CS',
 'EDA',
 'FBA',
 'FUM',
 'G6PDH2r',
 'GAPD',
 'GLCptspp',
 'GND',
 'ICDHyr',
 'ICL',
 'LDH_D',
 'MALS',
 'MDH',
 'ME1',
 'PDH',
 'PGI',
 'PGM',
 'PPC',
 'PTAr',
 'PYK',
 'RPE',
 'SUCDi',
 'TALA',
 'TKT1',
 'TKT2',
 'TPI'}

In [32]:
df = exp_data.query("BiGG_ID in @common_fluxes")
exp_model = model.copy()

for row in df.itertuples():
        print(f"Setting bounds for reaction {row.BiGG_ID}")
        r = exp_model.reactions.get_by_id(row.BiGG_ID)
        r.bounds = (row.flux - 0.1, row.flux + 0.1)

Setting bounds for reaction ACALD
Setting bounds for reaction ACONTa
Setting bounds for reaction AKGDH
Setting bounds for reaction CS
Setting bounds for reaction FBA
Setting bounds for reaction FUM
Setting bounds for reaction G6PDH2r
Setting bounds for reaction GAPD
Setting bounds for reaction GLCptspp
Setting bounds for reaction GND
Setting bounds for reaction ICDHyr
Setting bounds for reaction ICL
Setting bounds for reaction LDH_D
Setting bounds for reaction MALS
Setting bounds for reaction MDH
Setting bounds for reaction ME1
Setting bounds for reaction PDH
Setting bounds for reaction PGI
Setting bounds for reaction PGM
Setting bounds for reaction PPC
Setting bounds for reaction PTAr
Setting bounds for reaction PYK
Setting bounds for reaction RPE
Setting bounds for reaction SUCDi
Setting bounds for reaction TALA
Setting bounds for reaction TKT1
Setting bounds for reaction TKT2
Setting bounds for reaction TPI
Setting bounds for reaction EDA


In [33]:
all_data = pd.DataFrame()
# Calculate WT
exp_dilution_result = pfba(exp_model)
df = prepare_dataframe(exp_dilution_result, sample="WT", author="Exp_ECC2", glucose_flux_id="EX_glc__D_ex" )
df.to_csv("../data/simulation_results/COBRA/Exp_ECC2/WT_02.csv")

with model:
    for reaction in knockouts: 
        ko_gene = reaction["gene"]
        print(f"Working on gene {ko_gene}")
        try:
            # Simulate knockout, it may not grow at all
            model.reactions.get_by_id(reaction["id"]).knock_out()
            lmoma_result = lmoma(model, reference=exp_dilution_result.fluxes)
            df = prepare_dataframe(lmoma_result, sample=ko_gene, author="Exp_ECC2", glucose_flux_id="EX_glc__D_ex" )
        except:
            print(f"Unable to grow {ko_gene}! Filling flux data with zeros")
            df = prepare_dataframe(exp_dilution_result, sample=ko_gene, author="Exp_ECC2", glucose_flux_id="EX_glc__D_ex" )
            df.flux = 0
        df.to_csv(f"../data/simulation_results/COBRA/Exp_ECC2/delta_{ko_gene}.csv")
    
        all_data = pd.concat([all_data, df], sort=False)

Working on gene fbaA
Working on gene fbaB
Working on gene fbp
Working on gene gnd
Working on gene pfkA
Working on gene pfkB
Working on gene pgi
Working on gene pgl
Unable to grow pgl! Filling flux data with zeros
Working on gene ppsA
Unable to grow ppsA! Filling flux data with zeros
Working on gene pts
Working on gene pykA
Working on gene pykF
Working on gene rpe
Unable to grow rpe! Filling flux data with zeros
Working on gene rpiA
Unable to grow rpiA! Filling flux data with zeros
Working on gene rpiB
Unable to grow rpiB! Filling flux data with zeros
Working on gene sdhCD
Unable to grow sdhCD! Filling flux data with zeros
Working on gene sucAB
Unable to grow sucAB! Filling flux data with zeros
Working on gene talA
Unable to grow talA! Filling flux data with zeros
Working on gene tktA
Unable to grow tktA! Filling flux data with zeros
Working on gene tktB
Unable to grow tktB! Filling flux data with zeros
Working on gene tpi
Unable to grow tpi! Filling flux data with zeros
Working on gene