In [1]:
import glob
import numpy
import pandas
import seaborn
import matplotlib.pyplot as plt
from tqdm import tqdm
import multiprocessing as mp
import os
from build import build_model
import coralme
plt.rcParams['svg.fonttype'] = 'none'


In [2]:
from IPython.display import display, HTML, Math, Markdown
display(HTML("<style>.container { width:95% !important; }</style>"))

%load_ext autoreload
%autoreload 2

In [4]:
growth = pandas.read_csv("./datasets/all_growth_rates.csv",index_col=0).drop("mid_zinc",axis=1)
taxonomy = pandas.read_csv("./taxonomy.txt",index_col=0,sep='\t')
taxonomy = taxonomy[taxonomy["Kingdom"]=="Bacteria"]
growth = growth.loc[[i for i in taxonomy.index]]

In [61]:
growth["base"].sort_values()

Campylobacter_hominis_ATCC_BAA_381        0.001722
Campylobacter_fetus_subsp_fetus_82_40     0.002236
Helicobacter_winghamensis_ATCC_BAA_430    0.002365
Anaerostipes_sp_3_2_56FAA                 0.002401
Anaerostipes_caccae_DSM_14662             0.002744
                                            ...   
Enterobacter_aerogenes_KCTC_2190          0.579561
Escherichia_albertii_TW07627              0.601763
Escherichia_fergusonii_ATCC_35469         0.635045
Bacillus_sonorensis_L12                   0.655555
Clostridium_beijerinckii_NCIMB_8052       0.679225
Name: base, Length: 495, dtype: float64

In [3]:
from coralme.builder.helper_functions import flux_based_reactions
pandas.set_option('display.max_colwidth', None)
adv = pandas.read_csv("./datasets/2.3.3.MW-greater.csv",index_col=0)
dadv = pandas.read_csv("./datasets/2.3.3.MW-less.csv",index_col=0)

In [19]:
adv.loc["Porphyromonas"]["low_iron"]

0.0158730158730158

In [58]:
dct = {}
for col in taxonomy.columns:
    # dct[col] = taxonomy[col].value_counts()
    dct[col] = len(taxonomy[col].unique())
df = pandas.DataFrame.from_dict({"count":dct}).drop("NCBI Taxonomy ID").drop("Strain").drop("Kingdom")
df.loc[[i for i in taxonomy.columns if i in df.index]]

Unnamed: 0,count
Species,426
Genus,169
Family,81
Order,38
Class,20
Phylum,10


In [79]:
org = "Pseudomonas_aeruginosa_NCGM2_S1"
cond = "base"

In [80]:
model = coralme.io.pickle.load_pickle_me_model("./me-models/{}/MEModel-BIO-{}-ME-TS.pkl".format(org,org))

Read LP format model from file /tmp/tmp3d0i0w4b.lp
Reading time = 0.00 seconds
: 0 rows, 0 columns, 0 nonzeros
Read LP format model from file /tmp/tmpof1knx14.lp
Reading time = 0.00 seconds
: 1297 rows, 3258 columns, 14028 nonzeros
Read LP format model from file /tmp/tmp3teu0cl_.lp
Reading time = 0.00 seconds
: 1330 rows, 3256 columns, 13858 nonzeros


In [73]:
fluxes = {
    "base" : pandas.read_csv("./cases/fluxes/base/{}.csv".format(org),index_col=0)["fluxes"],
    cond : pandas.read_csv("./cases/fluxes/{}/{}.csv".format(cond,org),index_col=0)["fluxes"]
}

In [82]:
flux_based_reactions(model,"bmocogdp_c")

Unnamed: 0,lb,ub,rxn_flux,met_flux,reaction
TS_bmocogdp_c,-10.0,1000.0,0.0,0.0,1.0 bmocogdp_c <=>
formation_CPLX_FDH2-0_mod_bmocogdp(1),0.0,1000.0,0.0,0.0,1.0 bmocogdp_c + 1.0 protein_1089456.5.peg.721_Periplasm + 1.0 protein_1089456.5.peg.722_Inner_Membrane + 1.0 protein_1089456.5.peg.723_Inner_Membrane --> 1.0 CPLX_FDH2-0_mod_bmocogdp(1)


In [66]:
flux_based_reactions(model,"zn2_c",flux_dict=fluxes["base"].to_dict())

Unnamed: 0,lb,ub,rxn_flux,met_flux,reaction
ZNabc_FWD_CPLX_ZNabc-0,0.0,1000.0,1.749814e-06,1.749814e-06,[1.36948652809302e-6*mu] CPLX_ZNabc-0 + 1.0 atp_c + 1.0 h2o_c + 1.0 zn2_e --> 1.0 adp_c + 1.0 h_c + 1.0 pi_c + 1.0 zn2_c
formation_RNAP-CPLX_mod_zn2(1)_mod_mg2(2),0.0,1000.0,1.327968e-06,-1.327968e-06,2.0 mg2_c + 1.0 protein_469598.5.peg.4072 + 1.0 protein_469598.5.peg.4477 + 1.0 protein_469598.5.peg.4840 + 1.0 protein_469598.5.peg.4841 + 1.0 zn2_c --> 1.0 RNAP-CPLX_mod_zn2(1)_mod_mg2(2) + 0.11401900000000001 prosthetic_group_biomass
formation_469598.5.peg.264-MONOMER_mod_fe2(1)_mod_zn2(1),0.0,1000.0,2.505669e-07,-2.505669e-07,1.0 fe2_c + 2.0 protein_469598.5.peg.264 + 1.0 zn2_c --> 1.0 469598.5.peg.264-MONOMER_mod_fe2(1)_mod_zn2(1) + 0.121254 prosthetic_group_biomass
formation_469598.5.peg.4642-MONOMER_mod_k(1)_mod_zn2(1),0.0,1000.0,1.214559e-07,-1.214559e-07,1.0 k_c + 4.0 protein_469598.5.peg.4642 + 1.0 zn2_c --> 1.0 469598.5.peg.4642-MONOMER_mod_k(1)_mod_zn2(1) + 0.10450730000000001 prosthetic_group_biomass
formation_469598.5.peg.3094-MONOMER_mod_zn2(1)_mod_cobalt2(1),0.0,1000.0,3.847722e-08,-3.847722e-08,1.0 cobalt2_c + 4.0 protein_469598.5.peg.3094 + 1.0 zn2_c --> 1.0 469598.5.peg.3094-MONOMER_mod_zn2(1)_mod_cobalt2(1) + 0.12434220000000001 prosthetic_group_biomass
formation_469598.5.peg.854-MONOMER_mod_zn2(2),0.0,1000.0,4.889922e-09,-9.779843e-09,4.0 protein_469598.5.peg.854_Inner_Membrane + 2.0 zn2_c --> 1.0 469598.5.peg.854-MONOMER_mod_zn2(2) + 0.13081800000000002 prosthetic_group_biomass
formation_469598.5.peg.3794-MONOMER_mod_zn2(4),0.0,1000.0,2.006515e-10,-8.02606e-10,2.0 protein_469598.5.peg.3794_Membrane + 4.0 zn2_c --> 1.0 469598.5.peg.3794-MONOMER_mod_zn2(4) + 0.26163600000000004 prosthetic_group_biomass
formation_469598.5.peg.3294-MONOMER_mod_zn2(1)_mod_cbl1(1),0.0,1000.0,5.893876e-10,-5.893876e-10,1.0 cbl1_c + 1.0 protein_469598.5.peg.3294 + 1.0 zn2_c --> 1.0 469598.5.peg.3294-MONOMER_mod_zn2(1)_mod_cbl1(1) + 1.3947567810000001 prosthetic_group_biomass
formation_469598.5.peg.2086-MONOMER_mod_zn2(2),0.0,1000.0,8.703553e-11,-1.740711e-10,2.0 protein_469598.5.peg.2086 + 2.0 zn2_c --> 1.0 469598.5.peg.2086-MONOMER_mod_zn2(2) + 0.13081800000000002 prosthetic_group_biomass
formation_469598.5.peg.495-MONOMER_mod_zn2(1),0.0,1000.0,-1.316783e-28,1.316783e-28,1.0 protein_469598.5.peg.495_Periplasm + 1.0 zn2_c --> 1.0 469598.5.peg.495-MONOMER_mod_zn2(1) + 0.06540900000000001 prosthetic_group_biomass


In [28]:
flux_based_reactions(model,"zn2_c",flux_dict=fluxes[cond].to_dict())

Unnamed: 0,lb,ub,rxn_flux,met_flux,reaction
ZNabc_FWD_CPLX_ZNabc-1,0.0,1000.0,5.520382e-05,5.520382e-05,[2.67490593970742e-6*mu] CPLX_ZNabc-1 + 1.0 atp_c + 1.0 h2o_c + 1.0 zn2_e --> 1.0 adp_c + 1.0 h_c + 1.0 pi_c + 1.0 zn2_c
formation_g.689.peg.2281-MONOMER_mod_mn2(1)_mod_zn2(1),0.0,1000.0,5.261292e-05,-5.261292e-05,1.0 mn2_c + 1.0 protein_g.689.peg.2281 + 1.0 zn2_c --> 1.0 g.689.peg.2281-MONOMER_mod_mn2(1)_mod_zn2(1) + 0.12034704900000001 prosthetic_group_biomass
formation_RNAP-CPLX_mod_zn2(1)_mod_mg2(2),0.0,1000.0,1.756587e-06,-1.756587e-06,2.0 mg2_c + 1.0 protein_g.689.peg.182 + 1.0 protein_g.689.peg.208 + 1.0 protein_g.689.peg.24 + 1.0 protein_g.689.peg.34 + 1.0 zn2_c --> 1.0 RNAP-CPLX_mod_zn2(1)_mod_mg2(2) + 0.11401900000000001 prosthetic_group_biomass
formation_g.689.peg.4355-MONOMER_mod_zn2(1),0.0,1000.0,8.216384e-07,-8.216384e-07,4.0 protein_g.689.peg.4355 + 1.0 zn2_c --> 1.0 g.689.peg.4355-MONOMER_mod_zn2(1) + 0.06540900000000001 prosthetic_group_biomass
formation_290402.34.peg.2506-MONOMER_mod_zn2(1),0.0,1000.0,1.14841e-08,-1.14841e-08,10.0 protein_290402.34.peg.2506 + 1.0 zn2_c --> 1.0 290402.34.peg.2506-MONOMER_mod_zn2(1) + 0.06540900000000001 prosthetic_group_biomass
formation_g.689.peg.5164-MONOMER_mod_zn2(4),0.0,1000.0,2.290695e-10,-9.16278e-10,2.0 protein_g.689.peg.5164_Membrane + 4.0 zn2_c --> 1.0 g.689.peg.5164-MONOMER_mod_zn2(4) + 0.26163600000000004 prosthetic_group_biomass
formation_g.689.peg.3287-MONOMER_mod_zn2(2),0.0,1000.0,1.394935e-10,-2.78987e-10,2.0 protein_g.689.peg.3287 + 2.0 zn2_c --> 1.0 g.689.peg.3287-MONOMER_mod_zn2(2) + 0.13081800000000002 prosthetic_group_biomass
formation_g.689.peg.2277-MONOMER_mod_zn2(1),0.0,1000.0,3.723668e-31,-3.723668e-31,6.0 protein_g.689.peg.2277 + 1.0 zn2_c --> 1.0 g.689.peg.2277-MONOMER_mod_zn2(1) + 0.06540900000000001 prosthetic_group_biomass
formation_g.689.peg.2281-MONOMER_mod_mg2(1)_mod_zn2(1),0.0,1000.0,5.47749e-39,-5.47749e-39,1.0 mg2_c + 1.0 protein_g.689.peg.2281 + 1.0 zn2_c --> 1.0 g.689.peg.2281-MONOMER_mod_mg2(1)_mod_zn2(1) + 0.08971400000000002 prosthetic_group_biomass
formation_290402.34.peg.3047-MONOMER_mod_zn2(1)_mod_cbl1(1),0.0,1000.0,4.307673e-42,-4.307673e-42,1.0 cbl1_c + 1.0 protein_290402.34.peg.3047 + 1.0 zn2_c --> 1.0 290402.34.peg.3047-MONOMER_mod_zn2(1)_mod_cbl1(1) + 1.3947567810000001 prosthetic_group_biomass


In [33]:
taxonomy[taxonomy["Family"] == "Bifidobacteriaceae"]["Genus"].unique()

array(['Bifidobacterium', 'Scardovia'], dtype=object)

In [29]:
dadv.loc[taxonomy[taxonomy["Family"] == "Lactobacillaceae"]["Genus"].unique()]

Unnamed: 0,low_iron,low_zinc,high_FA,high_carbs,high_prot,micro_oxygen
Lactobacillus,0.032479,1.0,0.439239,0.958508,0.080264,0.360451
Levilactobacillus,1.0,1.0,0.5,1.0,0.5,0.5
Lentilactobacillus,1.0,1.0,0.5,1.0,0.8,0.8
Lacticaseibacillus,0.590909,1.0,0.349567,0.933983,0.020563,0.468615
Limosilactobacillus,0.689977,1.0,0.5,0.935897,0.1588,0.5
Lactiplantibacillus,0.242857,1.0,0.442857,0.1,0.057143,0.442857
Latilactobacillus,0.5,1.0,0.5,0.5,0.5,0.5
Ligilactobacillus,1.0,1.0,0.5,0.5,0.5,1.0
Leuconostoc,1.0,1.0,0.333333,0.666667,0.166667,0.833333
Pediococcus,0.833333,1.0,0.666667,1.0,0.333333,0.666667


In [29]:
dadv["high_prot"].sort_values().head(20)

Listeria               0.000525
Lacticaseibacillus     0.020563
Vibrio                 0.050000
Lactiplantibacillus    0.057143
Lactobacillus          0.080264
Limosilactobacillus    0.158800
Rothia                 0.166667
Butyrivibrio           0.166667
Anaerostipes           0.166667
Leuconostoc            0.166667
Corynebacterium        0.193254
Staphylococcus         0.194845
Citrobacter            0.200000
Capnocytophaga         0.200000
Ruminococcus           0.273810
Yersinia               0.273810
Parabacteroides        0.273810
Brevibacterium         0.333333
Dialister              0.333333
Alkalihalobacillus     0.333333
Name: high_prot, dtype: float64

In [23]:
tmp = df[df["Genus"]=="Listeria"]
tmp["high_carbs"]/tmp["base"]

Listeria_grayi_DSM_20601                       6.459790
Listeria_monocytogenes_4a_L99                  2.773626
Listeria_monocytogenes_4b_F2365                2.733813
Listeria_monocytogenes_Finland_1988            2.735010
Listeria_monocytogenes_FSL_R2_561              2.738206
Listeria_monocytogenes_J0161_FSL_R2_499        2.737260
Listeria_monocytogenes_serotype_7_SLCC_2482    2.729546
Listeria_monocytogenes_SLCC_2378               2.728663
Listeria_monocytogenes_SLCC_2540               2.736059
Listeria_monocytogenes_SLCC_7179               2.738628
dtype: float64