Skip to content

Commit

Permalink
biomass dry weight normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
cdanielmachado committed Oct 10, 2017
1 parent 2dcd6ed commit cd57c56
Show file tree
Hide file tree
Showing 11 changed files with 169 additions and 799,812 deletions.
2 changes: 1 addition & 1 deletion carveme/config.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ opt_tol = 1e-9
int_feas_tol = 1e-5

[universe]
default_biomass = bacterial
default_biomass = bacteria

[thermodynamics]
concentration_min = 1e-5
Expand Down
Binary file modified carveme/data/generated/universe_bacteria.xml.gz
Binary file not shown.
498,427 changes: 0 additions & 498,427 deletions carveme/data/generated/universe_draft.xml

This file was deleted.

150,126 changes: 0 additions & 150,126 deletions carveme/data/generated/universe_gramneg.xml

This file was deleted.

Binary file modified carveme/data/generated/universe_gramneg.xml.gz
Binary file not shown.
151,096 changes: 0 additions & 151,096 deletions carveme/data/generated/universe_grampos.xml

This file was deleted.

Binary file modified carveme/data/generated/universe_grampos.xml.gz
Binary file not shown.
232 changes: 97 additions & 135 deletions carveme/data/input/biomass_db.tsv

Large diffs are not rendered by default.

37 changes: 36 additions & 1 deletion carveme/reconstruction/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from collections import OrderedDict
from warnings import warn

import pandas as pd
from framed import Environment
from framed import CBReaction
from framed.experimental.elements import molecular_weight
import numpy as np


def create_exchange_reactions(model, default_lb=0, default_ub=None):
Expand Down Expand Up @@ -89,7 +92,7 @@ def medium_to_constraints(model, compounds, max_uptake=10, inplace=False, verbos
return env.apply(model, inplace=inplace, warning=verbose)


def load_biomass_db(filename, sep='\t'):
def load_biomass_db(filename, sep='\t', normalize_weight=False, model=None):
data = pd.read_csv(filename, sep=sep)
data.dropna(subset=['bigg_id', 'comp'], inplace=True)
data.sort_values(by=['bigg_id', 'comp'], inplace=True)
Expand All @@ -99,9 +102,41 @@ def load_biomass_db(filename, sep='\t'):
if column.startswith('@'):
col_slice = data[['bigg_id', 'comp', column]].dropna().values
biomass_db[column[1:]] = OrderedDict(('M_{}_{}'.format(x, y), z) for x, y, z in col_slice)

if normalize_weight:
if model is None:
raise RuntimeError('To normalize the biomass weight please provide a model with metabolite formulas.')

for biomass_id, coeffs in biomass_db.items():
normalize_coeffs(biomass_id, coeffs, model)

return biomass_db


def biomass_weight(biomass_id, coeffs, model):
bio_weight = 0
for m_id, coeff in coeffs.items():
metabolite = model.metabolites[m_id]
if 'FORMULA' in metabolite.metadata:
formulae = metabolite.metadata['FORMULA'].split(';')
met_weight = np.mean([molecular_weight(formula) for formula in formulae])
bio_weight += -coeff * met_weight
# print biomass_id, '\t', m_id, '\t', (-coeff * met_weight)
else:
warn('Unable to normalize {} due to missing formula for {}:'.format(biomass_id, m_id))
break

return bio_weight


def normalize_coeffs(biomass_id, coeffs, model):
bio_weight = biomass_weight(biomass_id, coeffs, model)

if bio_weight > 0:
for x, val in coeffs.items():
coeffs[x] = val * 1000.0 / bio_weight


def add_biomass_equation(model, stoichiometry, label=None):
r_id = 'Growth_' + label if label else 'Growth'
name = 'Biomass reaction'
Expand Down
52 changes: 31 additions & 21 deletions scripts/build_universe
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ from framed.io.sbml import load_cbmodel


def main(mode, noheuristics=False, nothermo=False, allow_unbalanced=False, allow_blocked=False,
biomass=None, biomass_db_path=None, outputfile=None):
biomass=None, biomass_db_path=None, normalize_biomass=False, cyanobacteria=False, outputfile=None):

if mode == 'draft':

Expand Down Expand Up @@ -41,15 +41,19 @@ def main(mode, noheuristics=False, nothermo=False, allow_unbalanced=False, allow

compute_bigg_gibbs_energy(universe_draft, equilibrator_compounds, bigg_gibbs)

elif mode in {'bacteria', 'cyanobacteria'}:
elif mode == 'curated':

universe_draft = project_dir + config.get('generated', 'universe_draft')
model_specific_data = project_dir + config.get('generated', 'model_specific_data')

if not biomass:
biomass = config.get('universe', 'default_biomass')

if outputfile:
universe_final = outputfile
else:
universe_final = "{}{}universe_{}.xml.gz".format(project_dir, config.get('generated', 'folder'), mode)
tag = 'cyanobacteria' if cyanobacteria else biomass
universe_final = "{}{}universe_{}.xml.gz".format(project_dir, config.get('generated', 'folder'), tag)

bigg_models = project_dir + config.get('input', 'bigg_models')
bigg_models = pd.read_csv(bigg_models, index_col=0)
Expand All @@ -61,26 +65,23 @@ def main(mode, noheuristics=False, nothermo=False, allow_unbalanced=False, allow
unbalanced = pd.read_csv(unbalanced, header=None)
unbalanced = unbalanced[0].tolist()

try:
model = load_cbmodel(universe_draft, flavor=config.get('sbml', 'default_flavor'))
model_specific_data = pd.read_csv(model_specific_data)
except IOError:
raise IOError('Universe draft not found. Please run --draft first to download BiGG data.')

if biomass_db_path is None:
biomass_db_path = project_dir + config.get('input', 'biomass_library')

biomass_db = load_biomass_db(biomass_db_path)

if not biomass:
biomass = config.get('universe', 'default_biomass')
biomass_db = load_biomass_db(biomass_db_path, normalize_weight=normalize_biomass, model=model)

if biomass not in biomass_db:
valid_ids = ','.join(biomass_db.keys())
raise RuntimeError('Biomass identifier not in database. Currently in database: ' + valid_ids)

biomass_eq = biomass_db[biomass]

try:
model = load_cbmodel(universe_draft, flavor=config.get('sbml', 'default_flavor'))
model_specific_data = pd.read_csv(model_specific_data)
except IOError:
raise IOError('Universe draft not found. Please run --draft first to download BiGG data.')

if nothermo:
thermodynamics_data = None
metabolomics_data = None
Expand All @@ -95,7 +96,7 @@ def main(mode, noheuristics=False, nothermo=False, allow_unbalanced=False, allow
metabolomics_data = pd.read_csv(metabolomics, index_col=1)

curate_universe(model,
taxa=mode,
taxa='cyanobacteria' if cyanobacteria else 'bacteria',
outputfile=universe_final,
model_specific_data=model_specific_data,
bigg_models=bigg_models,
Expand All @@ -120,10 +121,9 @@ if __name__ == '__main__':
help='Download all data from BiGG and save uncurated (draft) model to SBML.')
mode.add_argument('--thermo', action='store_true',
help='Compute thermodynamics data for BiGG reactions.')
mode.add_argument('--bacteria', action='store_true',
mode.add_argument('--curated', action='store_true',
help='Generate curated universal model of bacterial metabolism.')
mode.add_argument('--cyanobacteria', action='store_true',
help='Generate curated universal model of cyanobacterial metabolism.')

parser.add_argument('-o', '--output', dest='output', help="Output file")

parser.add_argument('--nothermo', action='store_true',
Expand All @@ -135,10 +135,17 @@ if __name__ == '__main__':
parser.add_argument('--blocked', action='store_true',
help="Advanced options: allow blocked reactions")

parser.add_argument('--cyanobacteria', action='store_true',
help='Generate a template for cyanobacteria (includes thylakoid compartment).')

parser.add_argument('--biomass', help="Advanced options: biomass equation identifier")

parser.add_argument('--biomass-db', dest='biomass_db', help="Advanced options: biomass database file")

parser.add_argument('--normalize-biomass', dest='normalize_biomass', action='store_true',
help="Advanced options: Normalize biomass dry weight to 1 gram")


args = parser.parse_args()

if args.nothermo and (args.draft or args.thermo):
Expand All @@ -156,14 +163,15 @@ if __name__ == '__main__':
if args.biomass and (args.draft or args.thermo):
parser.error('--biomass cannot be used with --draft or --thermo')

if args.cyanobacteria and (args.draft or args.thermo):
parser.error('--cyanobacteria cannot be used with --draft or --thermo')

if args.draft:
mode = 'draft'
elif args.thermo:
mode = 'thermo'
elif args.bacteria:
mode = 'bacteria'
elif args.cyanobacteria:
mode = 'cyanobacteria'
elif args.curated:
mode = 'curated'

main(mode=mode,
nothermo=args.nothermo,
Expand All @@ -172,5 +180,7 @@ if __name__ == '__main__':
allow_blocked=args.blocked,
biomass=args.biomass,
biomass_db_path=args.biomass_db,
normalize_biomass=args.normalize_biomass,
cyanobacteria=args.cyanobacteria,
outputfile=args.output)

9 changes: 4 additions & 5 deletions scripts/carve
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import os
import pandas as pd
from framed.io.sbml import save_cbmodel, sanitize_id
from multiprocessing import Pool
from glob import glob


def main(inputfile, input_type='protein', outputfile=None, diamond_args=None, universe=None, universe_file=None,
Expand Down Expand Up @@ -88,18 +89,16 @@ def main(inputfile, input_type='protein', outputfile=None, diamond_args=None, un

if not universe_file:
if universe:
universe_file = "{}{}universe_{}.xml".format(project_dir, config.get('generated', 'folder'), universe)
universe_file = "{}{}universe_{}.xml.gz".format(project_dir, config.get('generated', 'folder'), universe)
else:
universe_file = project_dir + config.get('generated', 'default_universe')

try:
universe_model = load_cbmodel(universe_file, flavor=config.get('sbml', 'default_flavor'))
universe_model.id = model_id
except IOError:
if universe:
raise IOError('Failed to load universe "{0}". Please run build_universe.py --{0}.'.format(universe))
else:
raise IOError('Failed to load universe model:' + universe_file)
available = '\n'.join(glob("{}{}universe_*.xml.gz".format(project_dir, config.get('generated', 'folder'))))
raise IOError('Failed to load universe model: {}\nAvailable universe files:\n{}'.format(universe_file, available))

if gapfill or init:

Expand Down

0 comments on commit cd57c56

Please sign in to comment.