biomass dry weight normalization

cdanielmachado · Oct 10, 2017 · cd57c56 · cd57c56
1 parent 2dcd6ed
commit cd57c56
Show file tree

Hide file tree

Showing 11 changed files with 169 additions and 799,812 deletions.
diff --git a/carveme/config.cfg b/carveme/config.cfg
@@ -27,7 +27,7 @@ opt_tol = 1e-9
 int_feas_tol = 1e-5
 
 [universe]
-default_biomass = bacterial
+default_biomass = bacteria
 
 [thermodynamics]
 concentration_min = 1e-5

diff --git a/carveme/data/generated/universe_bacteria.xml.gz b/carveme/data/generated/universe_bacteria.xml.gz
diff --git a/carveme/data/generated/universe_draft.xml b/carveme/data/generated/universe_draft.xml
diff --git a/carveme/data/generated/universe_gramneg.xml b/carveme/data/generated/universe_gramneg.xml
diff --git a/carveme/data/generated/universe_gramneg.xml.gz b/carveme/data/generated/universe_gramneg.xml.gz
diff --git a/carveme/data/generated/universe_grampos.xml b/carveme/data/generated/universe_grampos.xml
diff --git a/carveme/data/generated/universe_grampos.xml.gz b/carveme/data/generated/universe_grampos.xml.gz
diff --git a/carveme/data/input/biomass_db.tsv b/carveme/data/input/biomass_db.tsv
diff --git a/carveme/reconstruction/utils.py b/carveme/reconstruction/utils.py
@@ -1,8 +1,11 @@
 from collections import OrderedDict
+from warnings import warn
 
 import pandas as pd
 from framed import Environment
 from framed import CBReaction
+from framed.experimental.elements import molecular_weight
+import numpy as np
 
 
 def create_exchange_reactions(model, default_lb=0, default_ub=None):
@@ -89,7 +92,7 @@ def medium_to_constraints(model, compounds, max_uptake=10, inplace=False, verbos
     return env.apply(model, inplace=inplace, warning=verbose)
 
 
-def load_biomass_db(filename, sep='\t'):
+def load_biomass_db(filename, sep='\t', normalize_weight=False, model=None):
     data = pd.read_csv(filename, sep=sep)
     data.dropna(subset=['bigg_id', 'comp'], inplace=True)
     data.sort_values(by=['bigg_id', 'comp'], inplace=True)
@@ -99,9 +102,41 @@ def load_biomass_db(filename, sep='\t'):
         if column.startswith('@'):
             col_slice = data[['bigg_id', 'comp', column]].dropna().values
             biomass_db[column[1:]] = OrderedDict(('M_{}_{}'.format(x, y), z) for x, y, z in col_slice)
+
+    if normalize_weight:
+        if model is None:
+            raise RuntimeError('To normalize the biomass weight please provide a model with metabolite formulas.')
+
+        for biomass_id, coeffs in biomass_db.items():
+            normalize_coeffs(biomass_id, coeffs, model)
+
     return biomass_db
 
 
+def biomass_weight(biomass_id, coeffs, model):
+    bio_weight = 0
+    for m_id, coeff in coeffs.items():
+        metabolite = model.metabolites[m_id]
+        if 'FORMULA' in metabolite.metadata:
+            formulae = metabolite.metadata['FORMULA'].split(';')
+            met_weight = np.mean([molecular_weight(formula) for formula in formulae])
+            bio_weight += -coeff * met_weight
+#            print biomass_id, '\t', m_id, '\t', (-coeff * met_weight)
+        else:
+            warn('Unable to normalize {} due to missing formula for {}:'.format(biomass_id, m_id))
+            break
+
+    return bio_weight
+
+
+def normalize_coeffs(biomass_id, coeffs, model):
+    bio_weight = biomass_weight(biomass_id, coeffs, model)
+
+    if bio_weight > 0:
+        for x, val in coeffs.items():
+            coeffs[x] = val * 1000.0 / bio_weight
+
+
 def add_biomass_equation(model, stoichiometry, label=None):
     r_id = 'Growth_' + label if label else 'Growth'
     name = 'Biomass reaction'

diff --git a/scripts/build_universe b/scripts/build_universe
@@ -13,7 +13,7 @@ from framed.io.sbml import load_cbmodel
 
 
 def main(mode, noheuristics=False, nothermo=False, allow_unbalanced=False, allow_blocked=False,
-         biomass=None, biomass_db_path=None, outputfile=None):
+         biomass=None, biomass_db_path=None, normalize_biomass=False, cyanobacteria=False, outputfile=None):
 
     if mode == 'draft':
 
@@ -41,15 +41,19 @@ def main(mode, noheuristics=False, nothermo=False, allow_unbalanced=False, allow
 
         compute_bigg_gibbs_energy(universe_draft, equilibrator_compounds, bigg_gibbs)
 
-    elif mode in {'bacteria', 'cyanobacteria'}:
+    elif mode == 'curated':
 
         universe_draft = project_dir + config.get('generated', 'universe_draft')
         model_specific_data = project_dir + config.get('generated', 'model_specific_data')
 
+        if not biomass:
+            biomass = config.get('universe', 'default_biomass')
+
         if outputfile:
             universe_final = outputfile
         else:
-            universe_final = "{}{}universe_{}.xml.gz".format(project_dir, config.get('generated', 'folder'), mode)
+            tag = 'cyanobacteria' if cyanobacteria else biomass
+            universe_final = "{}{}universe_{}.xml.gz".format(project_dir, config.get('generated', 'folder'), tag)
 
         bigg_models = project_dir + config.get('input', 'bigg_models')
         bigg_models = pd.read_csv(bigg_models, index_col=0)
@@ -61,26 +65,23 @@ def main(mode, noheuristics=False, nothermo=False, allow_unbalanced=False, allow
         unbalanced = pd.read_csv(unbalanced, header=None)
         unbalanced = unbalanced[0].tolist()
 
+        try:
+            model = load_cbmodel(universe_draft, flavor=config.get('sbml', 'default_flavor'))
+            model_specific_data = pd.read_csv(model_specific_data)
+        except IOError:
+            raise IOError('Universe draft not found. Please run --draft first to download BiGG data.')
+
         if biomass_db_path is None:
             biomass_db_path = project_dir + config.get('input', 'biomass_library')
 
-        biomass_db = load_biomass_db(biomass_db_path)
-
-        if not biomass:
-            biomass = config.get('universe', 'default_biomass')
+        biomass_db = load_biomass_db(biomass_db_path, normalize_weight=normalize_biomass, model=model)
 
         if biomass not in biomass_db:
             valid_ids = ','.join(biomass_db.keys())
             raise RuntimeError('Biomass identifier not in database. Currently in database: ' + valid_ids)
 
         biomass_eq = biomass_db[biomass]
 
-        try:
-            model = load_cbmodel(universe_draft, flavor=config.get('sbml', 'default_flavor'))
-            model_specific_data = pd.read_csv(model_specific_data)
-        except IOError:
-            raise IOError('Universe draft not found. Please run --draft first to download BiGG data.')
-
         if nothermo:
             thermodynamics_data = None
             metabolomics_data = None
@@ -95,7 +96,7 @@ def main(mode, noheuristics=False, nothermo=False, allow_unbalanced=False, allow
             metabolomics_data = pd.read_csv(metabolomics, index_col=1)
 
         curate_universe(model,
-                        taxa=mode,
+                        taxa='cyanobacteria' if cyanobacteria else 'bacteria',
                         outputfile=universe_final,
                         model_specific_data=model_specific_data,
                         bigg_models=bigg_models,
@@ -120,10 +121,9 @@ if __name__ == '__main__':
                       help='Download all data from BiGG and save uncurated (draft) model to SBML.')
     mode.add_argument('--thermo', action='store_true',
                       help='Compute thermodynamics data for BiGG reactions.')
-    mode.add_argument('--bacteria', action='store_true',
+    mode.add_argument('--curated', action='store_true',
                       help='Generate curated universal model of bacterial metabolism.')
-    mode.add_argument('--cyanobacteria', action='store_true',
-                      help='Generate curated universal model of cyanobacterial metabolism.')
+
     parser.add_argument('-o', '--output', dest='output', help="Output file")
 
     parser.add_argument('--nothermo', action='store_true',
@@ -135,10 +135,17 @@ if __name__ == '__main__':
     parser.add_argument('--blocked', action='store_true',
                         help="Advanced options: allow blocked reactions")
 
+    parser.add_argument('--cyanobacteria', action='store_true',
+                      help='Generate a template for cyanobacteria (includes thylakoid compartment).')
+
     parser.add_argument('--biomass', help="Advanced options: biomass equation identifier")
 
     parser.add_argument('--biomass-db', dest='biomass_db', help="Advanced options: biomass database file")
 
+    parser.add_argument('--normalize-biomass', dest='normalize_biomass', action='store_true',
+                        help="Advanced options: Normalize biomass dry weight to 1 gram")
+
+
     args = parser.parse_args()
 
     if args.nothermo and (args.draft or args.thermo):
@@ -156,14 +163,15 @@ if __name__ == '__main__':
     if args.biomass and (args.draft or args.thermo):
         parser.error('--biomass cannot be used with --draft or --thermo')
 
+    if args.cyanobacteria and (args.draft or args.thermo):
+        parser.error('--cyanobacteria cannot be used with --draft or --thermo')
+
     if args.draft:
         mode = 'draft'
     elif args.thermo:
         mode = 'thermo'
-    elif args.bacteria:
-        mode = 'bacteria'
-    elif args.cyanobacteria:
-        mode = 'cyanobacteria'
+    elif args.curated:
+        mode = 'curated'
 
     main(mode=mode,
          nothermo=args.nothermo,
@@ -172,5 +180,7 @@ if __name__ == '__main__':
          allow_blocked=args.blocked,
          biomass=args.biomass,
          biomass_db_path=args.biomass_db,
+         normalize_biomass=args.normalize_biomass,
+         cyanobacteria=args.cyanobacteria,
          outputfile=args.output)
 
diff --git a/scripts/carve b/scripts/carve
@@ -14,6 +14,7 @@ import os
 import pandas as pd
 from framed.io.sbml import save_cbmodel, sanitize_id
 from multiprocessing import Pool
+from glob import glob
 
 
 def main(inputfile, input_type='protein', outputfile=None, diamond_args=None, universe=None, universe_file=None,
@@ -88,18 +89,16 @@ def main(inputfile, input_type='protein', outputfile=None, diamond_args=None, un
 
     if not universe_file:
         if universe:
-            universe_file = "{}{}universe_{}.xml".format(project_dir, config.get('generated', 'folder'), universe)
+            universe_file = "{}{}universe_{}.xml.gz".format(project_dir, config.get('generated', 'folder'), universe)
         else:
             universe_file = project_dir + config.get('generated', 'default_universe')
 
     try:
         universe_model = load_cbmodel(universe_file, flavor=config.get('sbml', 'default_flavor'))
         universe_model.id = model_id
     except IOError:
-        if universe:
-            raise IOError('Failed to load universe "{0}". Please run build_universe.py --{0}.'.format(universe))
-        else:
-            raise IOError('Failed to load universe model:' + universe_file)
+        available = '\n'.join(glob("{}{}universe_*.xml.gz".format(project_dir, config.get('generated', 'folder'))))
+        raise IOError('Failed to load universe model: {}\nAvailable universe files:\n{}'.format(universe_file, available))
 
     if gapfill or init: