In [1]:
import glob
import numpy
import pandas
import seaborn
import matplotlib.pyplot as plt
from tqdm import tqdm
import multiprocessing as mp
import os
from build import build_model
from diets import load_model
import coralme
pandas.set_option('display.max_colwidth', None)

In [2]:
from IPython.display import display, HTML, Math, Markdown
display(HTML("<style>.container { width:95% !important; }</style>"))

%load_ext autoreload
%autoreload 2

### Set biomass

In [3]:
biomass = {i.split(".csv")[0] for i in os.listdir("./biomass/") if ".csv" in i}

In [4]:
biodf = pandas.DataFrame()
for b in biomass:
    df = pandas.read_csv("./biomass/{}.csv".format(b), index_col=0)
    df.columns = [b]
    biodf = pandas.concat([biodf,df],axis=1)
    # biodf[b] = df["coeff"]
    # break
biodf = biodf.T.fillna(0).sort_index()

In [5]:
biodf.to_csv("all_biomass.csv")

### Set organisms

In [6]:
# !bash report.sh
survivors = set(pandas.read_csv("survivors.txt",index_col=0,header=None).index.to_list())
done = set(pandas.read_csv("biomass_constrained.txt",index_col=0,header=None).index.to_list())
zinc_issues = set(pandas.read_csv("zinc_issues.txt",index_col=0,header=None).index.to_list())

In [7]:
run_for = survivors
len(run_for)

497

In [8]:
run_for = set(pandas.read_csv("temporary_list.txt",index_col=0,header=None,comment="#").index.to_list())

In [11]:
# pandas.DataFrame(columns=run_for).T.to_csv("rna_polymerase_issues.txt",header=None)

### Get biomass

In [12]:
from biomass import filter_biomass, correct_biomass
biodf = pandas.read_csv("all_biomass.csv",index_col=0)

### Get diet

In [13]:
from coralme.builder.main import MEBuilder
from build import parse_diet,constrain_diet
# diet = pandas.read_csv("./diets/high_fiber_diet.txt",index_col=0,sep='\t',comment='#',header=None)
diet = -pandas.read_csv("./diets/AGORADiets.txt",index_col=1,sep=' ')[["WesternDiet"]]
diet.columns = ["lb"]
diet_dct = parse_diet(diet)

### Run

In [14]:
def close_sink(model,met):
    for r in model.reactions.query("sink_{}".format(met)):
        print(r)
        r.remove_from_model()

In [15]:
mfluxdir = "./fluxes_reconstruction/m"
mefluxdir = "./fluxes_reconstruction/me"
if not os.path.isdir(mfluxdir):
    os.makedirs(mfluxdir)
    os.makedirs(mefluxdir)

def run(org):
    builder = MEBuilder(*['./me-models/{:s}/coralme-config.yaml'.format(org)])
    builder.me_model = coralme.io.pickle.load_pickle_me_model("./me-models/{}/MEModel-step2-{}-ME.pkl".format(org,org))

    # Fix
    close_sink(builder.me_model,"[a-z]?heme")

    # Constrain diet
    constrain_diet(builder.me_model.gem,diet_dct)
    builder.me_model.gem.optimize().to_frame().to_csv("./fluxes_reconstruction/m/{}.csv".format(org))
    # Biomass
    correct_biomass(builder.me_model,biodf.loc[org])
    constrain_diet(builder.me_model,diet_dct) 
    out_directory = "./me-models/{}".format(org)
    builder.troubleshoot(growth_key_and_value = { builder.me_model.mu : 0.001 },
                        savefile = '{:s}/MEModel-BIO-{:s}-TS.pkl'.format(out_directory, builder.me_model.id),
                        gapfill_cofactors=False)
    builder.me_model.solution.to_frame().to_csv("./fluxes_reconstruction/me/{}.csv".format(org))

In [16]:
# run("Bartonella_quintana_Toulouse")

In [None]:
NP = min([4,len(run_for)])
pool = mp.Pool(NP,maxtasksperchild=1)
pbar = tqdm(total=len(run_for),position=0,leave=True)
pbar.set_description('Building ({} threads)'.format(NP))
def collect_result(result):
    pbar.update(1)
for org in run_for:
    args = ([org])
    pool.apply_async(run,args, callback=collect_result)
pool.close()
pool.join()

Building (4 threads):   0%|                                                                                                                                                                 | 0/17 [00:00<?, ?it/s]

Set parameter Username
Academic license - for non-commercial use only - expires 2024-08-16
Set parameter Username
Academic license - for non-commercial use only - expires 2024-08-16
Set parameter Username
Academic license - for non-commercial use only - expires 2024-08-16
Set parameter Username
Academic license - for non-commercial use only - expires 2024-08-16
Read LP format model from file /tmp/tmpicu87r1x.lp
Reading time = 0.00 seconds
: 0 rows, 0 columns, 0 nonzeros
Read LP format model from file /tmp/tmpqqywf77d.lp
Reading time = 0.00 seconds
: 0 rows, 0 columns, 0 nonzeros
Read LP format model from file /tmp/tmpv45tz0as.lp
Reading time = 0.00 seconds
: 0 rows, 0 columns, 0 nonzeros
Read LP format model from file /tmp/tmp64pgu8ur.lp
Reading time = 0.00 seconds
: 0 rows, 0 columns, 0 nonzeros
Read LP format model from file /tmp/tmp8j9f9b49.lp
Reading time = 0.01 seconds
: 1297 rows, 3258 columns, 14028 nonzeros
Read LP format model from file /tmp/tmpd91w5f1i.lp
Reading time = 0.01 

  warn(f"The element {e} does not appear in the periodic table")


The following biomass constituents are not in the ME-model or have no formula: glyc45tca_c, ai17tcaglc_c, ai17tcaacgam_c, i17tcaacgam_c, ai17tca1_c, sttca1_c, i17tcaala__D_c, ai17tcaala__D_c, sttcaala__D_c, i17tca1_c, i17tcaglc_c, sttcaacgam_c, sttcaglc_c, tcam_c, glyc45tcaala__D_c, glyc45tcaglc_c.
EX_arab_L(e) not in model
EX_cellb(e) not in model
EX_drib(e) -0.17878295
EX_fru(e) -0.14898579
EX_fuc_L(e) not in model
EX_gal(e) not in model
EX_glc_D(e) -0.14898579
EX_glcn(e) -0.14898579
EX_lcts(e) not in model
EX_malt(e) -0.07449289
EX_man(e) -0.14898579
EX_melib(e) not in model
EX_mnl(e) -0.14898579
EX_oxa(e) -0.44695737
EX_rib_D(e) -0.17878295
EX_rmn(e) not in model
EX_sucr(e) -0.07449289
EX_tre(e) -0.07449289
EX_xyl_D(e) not in model
EX_strch1(e) not in model
EX_amylopect900(e) not in model
EX_amylose300(e) not in model
EX_arabinan101(e) not in model
EX_arabinogal(e) not in model
EX_arabinoxyl(e) not in model
EX_bglc(e) not in model
EX_cellul(e) not in model
EX_dextran40(e) not in mo

  warn(f"The element {e} does not appear in the periodic table")


The following biomass constituents are not in the ME-model or have no formula: glyc45tca_c, colipa_c, tcam_c, glyc45tcaala__D_c, glyc45tcaglc_c.
EX_arab_L(e) not in model
EX_cellb(e) not in model
EX_drib(e) not in model
EX_fru(e) not in model
EX_fuc_L(e) not in model
EX_gal(e) not in model
EX_glc_D(e) not in model
EX_glcn(e) not in model
EX_lcts(e) not in model
EX_malt(e) not in model
EX_man(e) not in model
EX_melib(e) not in model
EX_mnl(e) not in model
EX_oxa(e) not in model
EX_rib_D(e) not in model
EX_rmn(e) not in model
EX_sucr(e) not in model
EX_tre(e) not in model
EX_xyl_D(e) not in model
EX_strch1(e) not in model
EX_amylopect900(e) not in model
EX_amylose300(e) not in model
EX_arabinan101(e) not in model
EX_arabinogal(e) not in model
EX_arabinoxyl(e) not in model
EX_bglc(e) not in model
EX_cellul(e) not in model
EX_dextran40(e) not in model
EX_galmannan(e) not in model
EX_glcmannan(e) not in model
EX_homogal(e) not in model
EX_inulin(e) not in model
EX_kestopt(e) not in model
EX

  warn(f"The element {e} does not appear in the periodic table")


The following biomass constituents are not in the ME-model or have no formula: glyc45tca_c, i17tcaala__D_c, ai17tcaala__D_c, sttcaala__D_c, tcam_c, glyc45tcaala__D_c, glyc45tcaglc_c.
EX_arab_L(e) not in model
EX_cellb(e) not in model
EX_drib(e) not in model
EX_fru(e) -0.14898579
EX_fuc_L(e) not in model
EX_gal(e) -0.14898579
EX_glc_D(e) -0.14898579
EX_glcn(e) not in model
EX_lcts(e) -0.07449289
EX_malt(e) -0.07449289
EX_man(e) -0.14898579
EX_melib(e) not in model
EX_mnl(e) -0.14898579
EX_oxa(e) not in model
EX_rib_D(e) -0.17878295
EX_rmn(e) not in model
EX_sucr(e) -0.07449289
EX_tre(e) -0.07449289
EX_xyl_D(e) not in model
EX_strch1(e) not in model
EX_amylopect900(e) not in model
EX_amylose300(e) not in model
EX_arabinan101(e) not in model
EX_arabinogal(e) not in model
EX_arabinoxyl(e) not in model
EX_bglc(e) not in model
EX_cellul(e) not in model
EX_dextran40(e) not in model
EX_galmannan(e) not in model
EX_glcmannan(e) not in model
EX_homogal(e) not in model
EX_inulin(e) not in model
E