Skip to content

Commit

Permalink
Merge pull request #7 from cdanielmachado/debug_features
Browse files Browse the repository at this point in the history
Debug features
  • Loading branch information
cdanielmachado committed Dec 7, 2017
2 parents 9f57d8f + 46cfa70 commit 4d47671
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 22 deletions.
7 changes: 6 additions & 1 deletion carveme/reconstruction/carving.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import pandas as pd

from framed.cobra.ensemble import EnsembleModel, save_ensemble
from framed.io.sbml import parse_gpr_rule, save_cbmodel
Expand Down Expand Up @@ -138,7 +139,7 @@ def minmax_reduction(model, scores, min_growth=0.1, min_atpm=0.1, eps=1e-3, bigM


def carve_model(model, reaction_scores, outputfile=None, flavor=None, inplace=True,
default_score=-1.0, uptake_score=0.0, init_env=None):
default_score=-1.0, uptake_score=0.0, init_env=None, debug_output=None):
""" Reconstruct a metabolic model using the CarveMe approach.
Args:
Expand Down Expand Up @@ -174,6 +175,10 @@ def carve_model(model, reaction_scores, outputfile=None, flavor=None, inplace=Tr
print "MILP solver failed: {}".format(sol.message)
return

if debug_output:
pd.DataFrame.from_dict(sol.values, orient='index').to_csv(debug_output + '_milp_solution.tsv',
sep='\t', header=False)

model.remove_reactions(inactive)

del_metabolites = disconnected_metabolites(model)
Expand Down
7 changes: 6 additions & 1 deletion carveme/reconstruction/scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def merge_protein_scores(scores):
return scores.max(skipna=True)


def reaction_scoring(annotation, gprs, spontaneous_score=0.0):
def reaction_scoring(annotation, gprs, spontaneous_score=0.0, debug_output=None):
""" Calculate reaction scores using new eggnog output.
Args:
Expand Down Expand Up @@ -110,6 +110,11 @@ def reaction_scoring(annotation, gprs, spontaneous_score=0.0):

avg_score = reaction_scores['score'].median()

if debug_output:
gene_scores.to_csv(debug_output + '_gene_scores.tsv', sep='\t', index=False)
protein_scores.to_csv(debug_output + '_protein_scores.tsv', sep='\t', index=False)
reaction_scores.to_csv(debug_output + '_reaction_scores.tsv', sep='\t', index=False)

if avg_score != 0:
reaction_scores['normalized_score'] = reaction_scores['score'] / avg_score
return reaction_scores
Expand Down
24 changes: 13 additions & 11 deletions carveme/universe/curation.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,13 +198,13 @@ def add_bounds_from_extracted_data(model, data, trusted_models=None):
model.reactions[r_id].reversible = bool(lb < 0)


def filter_reactions_by_kingdom(model, kingdom, kingdom_map, exclusive=False, inplace=False):
""" Filter reactions in model by Kingdom.
def filter_reactions_by_kingdoms(model, kingdoms, kingdom_map, exclusive=False, inplace=False):
""" Filter reactions in model by Kingdoms.
A reaction is considered valid for a given Kingdom if it is present in at least one model from that Kingdom.
Args:
model (CBModel): model
kingdom (str): Kingdom name
kingdoms (set): Kingdom names
kingdom_map (dict): mapping between model ids and kingdoms
exclusive (bool): only accept reactions *exclusive* to given Kingdom (default: False)
inplace (bool): automatically remove invalid reactions from model (default: False)
Expand All @@ -219,11 +219,11 @@ def filter_reactions_by_kingdom(model, kingdom, kingdom_map, exclusive=False, in

for r_id, rxn in model.reactions.items():
model_ids = rxn.metadata['BiGG models'].split(';')
kingdoms = set([kingdom_map[model_id] for model_id in model_ids])
model_kingdoms = set([kingdom_map[model_id] for model_id in model_ids])

if exclusive and kingdoms == {kingdom}:
if exclusive and model_kingdoms == kingdoms:
valid.append(r_id)
elif not exclusive and kingdom in kingdoms:
elif not exclusive and len(model_kingdoms & kingdoms) > 0:
valid.append(r_id)
else:
invalid.append(r_id)
Expand Down Expand Up @@ -324,22 +324,24 @@ def curate_universe(model, model_specific_data, bigg_models, biomass_eq, taxa=No
kingdom_map = bigg_models['domain'].to_dict()

if taxa in {'cyanobacteria', 'bacteria'}:
kingdom = 'Bacteria'
kingdoms = {'Bacteria'}
elif taxa == 'archaea':
kingdoms = {'Archaea', 'Bacteria'}
else:
raise ValueError('Unsupported taxa:' + taxa)

filter_reactions_by_kingdom(model, kingdom, kingdom_map, inplace=True)
filter_reactions_by_kingdoms(model, kingdoms, kingdom_map, inplace=True)

print '(size: {} x {})\n'.format(len(model.metabolites), len(model.reactions))

if taxa == 'bacteria':
if taxa in {'bacteria', 'archaea'}:
valid_compartments = {'C_c', 'C_p', 'C_e'}
elif taxa == 'cyanobacteria':
valid_compartments = {'C_c', 'C_p', 'C_e', 'C_u'}

other_compartments = set(model.compartments.keys()) - valid_compartments
model.remove_compartments(other_compartments, delete_metabolites=True, delete_reactions=True)

print '(size: {} x {})\n'.format(len(model.metabolites), len(model.reactions))

if thermodynamics_data is not None:
print 'Computing thermodynamics...',

Expand Down
25 changes: 20 additions & 5 deletions scripts/build_universe
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ from framed.io.sbml import load_cbmodel


def main(mode, noheuristics=False, nothermo=False, allow_unbalanced=False, allow_blocked=False,
biomass=None, biomass_db_path=None, normalize_biomass=False, cyanobacteria=False, outputfile=None):
biomass=None, biomass_db_path=None, normalize_biomass=False, taxa=None, outputfile=None):

if mode == 'draft':

Expand Down Expand Up @@ -52,7 +52,7 @@ def main(mode, noheuristics=False, nothermo=False, allow_unbalanced=False, allow
if outputfile:
universe_final = outputfile
else:
tag = 'cyanobacteria' if cyanobacteria else biomass
tag = taxa if taxa != 'bacteria' else biomass
universe_final = "{}{}universe_{}.xml.gz".format(project_dir, config.get('generated', 'folder'), tag)

bigg_models = project_dir + config.get('input', 'bigg_models')
Expand Down Expand Up @@ -96,7 +96,7 @@ def main(mode, noheuristics=False, nothermo=False, allow_unbalanced=False, allow
metabolomics_data = pd.read_csv(metabolomics, index_col=1)

curate_universe(model,
taxa='cyanobacteria' if cyanobacteria else 'bacteria',
taxa=taxa,
outputfile=universe_final,
model_specific_data=model_specific_data,
bigg_models=bigg_models,
Expand Down Expand Up @@ -135,9 +135,14 @@ if __name__ == '__main__':
parser.add_argument('--blocked', action='store_true',
help="Advanced options: allow blocked reactions")

parser.add_argument('--cyanobacteria', action='store_true',
taxa = parser.add_mutually_exclusive_group(required=False)

taxa.add_argument('--cyanobacteria', action='store_true',
help='Generate a template for cyanobacteria (includes thylakoid compartment).')

taxa.add_argument('--archaea', action='store_true',
help='Generate a template for archaea (includes methanogenic reactions).')

parser.add_argument('--biomass', help="Advanced options: biomass equation identifier")

parser.add_argument('--biomass-db', dest='biomass_db', help="Advanced options: biomass database file")
Expand Down Expand Up @@ -166,13 +171,23 @@ if __name__ == '__main__':
if args.cyanobacteria and (args.draft or args.thermo):
parser.error('--cyanobacteria cannot be used with --draft or --thermo')

if args.archaea and (args.draft or args.thermo):
parser.error('--archaea cannot be used with --draft or --thermo')

if args.draft:
mode = 'draft'
elif args.thermo:
mode = 'thermo'
elif args.curated:
mode = 'curated'

if args.cyanobacteria:
taxa = 'cyanobacteria'
elif args.archaea:
taxa = 'archaea'
else:
taxa = 'bacteria'

main(mode=mode,
nothermo=args.nothermo,
noheuristics=args.noheuristics,
Expand All @@ -181,6 +196,6 @@ if __name__ == '__main__':
biomass=args.biomass,
biomass_db_path=args.biomass_db,
normalize_biomass=args.normalize_biomass,
cyanobacteria=args.cyanobacteria,
taxa=taxa,
outputfile=args.output)

14 changes: 10 additions & 4 deletions scripts/carve
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ from glob import glob


def main(inputfile, input_type='protein', outputfile=None, diamond_args=None, universe=None, universe_file=None,
ensemble_size=None, verbose=False, flavor=None, gapfill=None, init=None, mediadb=None,
ensemble_size=None, verbose=False, debug=False, flavor=None, gapfill=None, init=None, mediadb=None,
default_score=None, uptake_score=None, recursive_mode=False):

if recursive_mode:
Expand Down Expand Up @@ -120,7 +120,8 @@ def main(inputfile, input_type='protein', outputfile=None, diamond_args=None, un
gprs = pd.read_csv(bigg_gprs)
gprs = gprs[gprs.reaction.isin(universe_model.reactions)]

scores = reaction_scoring(annotations, gprs)
debug_output = model_id if debug else None
scores = reaction_scoring(annotations, gprs, debug_output=debug_output)

if scores is None:
print 'The input genome did not match sufficient genes/reactions in the database.'
Expand All @@ -146,13 +147,15 @@ def main(inputfile, input_type='protein', outputfile=None, diamond_args=None, un
flavor=flavor,
default_score=default_score,
uptake_score=uptake_score,
init_env=init_env)
init_env=init_env,
debug_output=debug_output)
else:
model = carve_model(universe_model, scores,
inplace=False,
default_score=default_score,
uptake_score=uptake_score,
init_env=init_env)
init_env=init_env,
debug_output=debug_output)
else:
build_ensemble(universe_model, scores, ensemble_size, outputfile, flavor, init_env=init_env)

Expand Down Expand Up @@ -222,6 +225,8 @@ if __name__ == '__main__':
parser.add_argument('--mediadb', help="Media database file")

parser.add_argument('-v', '--verbose', action='store_true', dest='verbose', help="Switch to verbose mode")
parser.add_argument('-d', '--debug', action='store_true', dest='debug',
help="Debug mode: writes intermediate results into output files")

parser.add_argument('--default-score', type=float, help=argparse.SUPPRESS)
parser.add_argument('--uptake-score', type=float, help=argparse.SUPPRESS)
Expand Down Expand Up @@ -267,6 +272,7 @@ if __name__ == '__main__':
universe_file=args.universe_file,
ensemble_size=args.ensemble,
verbose=args.verbose,
debug=args.debug,
flavor=flavor,
gapfill=args.gapfill,
init=args.init,
Expand Down

0 comments on commit 4d47671

Please sign in to comment.