Skip to content

Commit

Permalink
Merge pull request #51 from cdanielmachado/develop
Browse files Browse the repository at this point in the history
integrate development branch
  • Loading branch information
cdanielmachado committed Mar 6, 2019
2 parents 8f4314b + 9213968 commit 7ed7557
Show file tree
Hide file tree
Showing 11 changed files with 77 additions and 40 deletions.
4 changes: 2 additions & 2 deletions carveme/config.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ unbalanced_metabolites = data/input/unbalanced_metabolites.csv
manually_curated = data/input/manually_curated.csv

[ncbi]
refseq = data/input/refseq_release_84.tsv.gz
genbank = data/input/genbank_release_221.tsv.gz
refseq = data/input/refseq_release_92.tsv.gz
genbank = data/input/genbank_release_230.tsv.gz

[generated]
folder = data/generated/
Expand Down
Binary file removed carveme/data/input/genbank_release_221.tsv.gz
Binary file not shown.
Binary file added carveme/data/input/genbank_release_230.tsv.gz
Binary file not shown.
Binary file removed carveme/data/input/refseq_release_84.tsv.gz
Binary file not shown.
Binary file added carveme/data/input/refseq_release_92.tsv.gz
Binary file not shown.
53 changes: 31 additions & 22 deletions carveme/reconstruction/carving.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ def inactive_reactions(model, solution):
return inactive + inactive_ext


def minmax_reduction(model, scores, min_growth=0.1, min_atpm=0.1, eps=1e-3, bigM=1e3, default_score=-1, uptake_score=0,
soft_score=1, soft_constraints=None, hard_constraints=None, solver=None, debug_output=None):
def minmax_reduction(model, scores, min_growth=0.1, min_atpm=0.1, eps=1e-3, bigM=1e3, default_score=-1.0,
uptake_score=0.0, soft_score=1.0, soft_constraints=None, hard_constraints=None,
ref_reactions=None, ref_score=0.0, solver=None, debug_output=None):
""" Apply minmax reduction algorithm (MILP).
Computes a binary reaction vector that optimizes the agreement with reaction scores (maximizes positive scores,
Expand Down Expand Up @@ -71,21 +72,29 @@ def minmax_reduction(model, scores, min_growth=0.1, min_atpm=0.1, eps=1e-3, bigM
scores = scores.copy()
reactions = list(scores.keys())

if not soft_constraints:
if soft_constraints:
reactions += [r_id for r_id in soft_constraints if r_id not in reactions]
else:
soft_constraints = {}

reactions += [r_id for r_id in soft_constraints if r_id not in reactions]
if not ref_reactions:
ref_reactions = {}

if hard_constraints:
solver.set_bounds(hard_constraints)

# Add default score
if default_score != 0:
for r_id in model.reactions:
if r_id not in reactions and not r_id.startswith('R_EX') and r_id != 'R_ATPM':
if r_id not in reactions and r_id not in ref_reactions and not r_id.startswith('R_EX') and r_id != 'R_ATPM':
scores[r_id] = default_score
reactions.append(r_id)

if ref_score != 0:
for r_id in ref_reactions:
if r_id not in reactions and r_id != 'R_ATPM':
scores[r_id] = ref_score
reactions.append(r_id)

if not hasattr(solver, '_carveme_flag'):
solver._carveme_flag = True

Expand Down Expand Up @@ -147,16 +156,20 @@ def minmax_reduction(model, scores, min_growth=0.1, min_atpm=0.1, eps=1e-3, bigM
w_f, w_r = -soft_score, -soft_score

if y_f in solver.pos_vars:
if r_id in scores:
objective[y_f] = scores[r_id]
if r_id in soft_constraints:
objective[y_f] = w_f
objective[y_f] = w_f
elif r_id in ref_reactions:
objective[y_f] = 2 * scores[r_id] + ref_score
else:
objective[y_f] = scores[r_id]

if y_r in solver.neg_vars:
if r_id in scores:
objective[y_r] = scores[r_id]
if r_id in soft_constraints:
objective[y_r] = w_r
elif r_id in ref_reactions:
objective[y_r] = 2 * scores[r_id] + ref_score
else:
objective[y_r] = scores[r_id]

if uptake_score != 0:
for r_id in model.reactions:
Expand All @@ -175,7 +188,7 @@ def minmax_reduction(model, scores, min_growth=0.1, min_atpm=0.1, eps=1e-3, bigM

def carve_model(model, reaction_scores, outputfile=None, flavor=None, inplace=True,
default_score=-1.0, uptake_score=0.0, soft_score=1.0,
soft_constraints=None, hard_constraints=None,
soft_constraints=None, hard_constraints=None, ref_model=None, ref_score=0.0,
init_env=None, debug_output=None):
""" Reconstruct a metabolic model using the CarveMe approach.
Expand All @@ -201,15 +214,6 @@ def carve_model(model, reaction_scores, outputfile=None, flavor=None, inplace=Tr

scores = dict(reaction_scores[['reaction', 'normalized_score']].values)

if default_score is None:
default_score = -1.0

if uptake_score is None:
uptake_score = 0.0

if soft_score is None:
soft_score = 1.0

if soft_constraints:
not_in_model = set(soft_constraints) - set(model.reactions)
if not_in_model:
Expand All @@ -222,9 +226,14 @@ def carve_model(model, reaction_scores, outputfile=None, flavor=None, inplace=Tr
hard_constraints = {r_id: (lb, ub) for r_id, (lb, ub) in hard_constraints.items() if r_id in model.reactions}
warnings.warn("Hard constraints contain reactions not in the model:\n" + "\n".join(not_in_model))

if ref_model:
ref_reactions = set(model.reactions) & set(ref_model.reactions)
else:
ref_reactions = None

sol = minmax_reduction(model, scores, default_score=default_score, uptake_score=uptake_score, soft_score=soft_score,
soft_constraints=soft_constraints, hard_constraints=hard_constraints,
debug_output=debug_output)
ref_reactions=ref_reactions, ref_score=ref_score, debug_output=debug_output)

if sol.status == Status.OPTIMAL:
inactive = inactive_reactions(model, sol)
Expand Down
4 changes: 3 additions & 1 deletion carveme/reconstruction/scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,15 @@ def reaction_scoring(annotation, gprs, spontaneous_score=0.0, debug_output=None)

avg_score = reaction_scores['score'].median()

if avg_score != 0:
reaction_scores['normalized_score'] = reaction_scores['score'] / avg_score

if debug_output:
gene_scores.to_csv(debug_output + '_gene_scores.tsv', sep='\t', index=False)
protein_scores.to_csv(debug_output + '_protein_scores.tsv', sep='\t', index=False)
reaction_scores.to_csv(debug_output + '_reaction_scores.tsv', sep='\t', index=False)

if avg_score != 0:
reaction_scores['normalized_score'] = reaction_scores['score'] / avg_score
return reaction_scores
else:
return None
13 changes: 8 additions & 5 deletions carveme/reconstruction/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,14 @@ def biomass_weight(biomass_id, coeffs, model):
for m_id, coeff in coeffs.items():
metabolite = model.metabolites[m_id]
if 'FORMULA' in metabolite.metadata:
formulae = metabolite.metadata['FORMULA'].split(';')
met_weight = np.mean([molecular_weight(formula) for formula in formulae])
contribution = -coeff * met_weight
bio_weight += contribution
# print '\t'.join([biomass_id, m_id, str(met_weight), str(coeff), str(contribution)])
try:
formulae = metabolite.metadata['FORMULA'].split(';')
met_weight = np.mean([molecular_weight(formula) for formula in formulae])
contribution = -coeff * met_weight
bio_weight += contribution
except:
warn('Unable to normalize {} due to invalid formula for {}:'.format(biomass_id, m_id))
break
else:
warn('Unable to normalize {} due to missing formula for {}:'.format(biomass_id, m_id))
break
Expand Down
2 changes: 1 addition & 1 deletion docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ If you already have a model, and you just want to gap-fill it, you can do it wit

.. code-block:: console
$ gapfill model.xml -g M9 -o new_model.xml
$ gapfill model.xml -m M9 -o new_model.xml
Please note that the result is not the same if you gap-fill during reconstruction. When you gap-fill during
reconstruction, the gene annotation scores are used to prioritize the reactions selected for gap-filling based on
Expand Down
37 changes: 30 additions & 7 deletions scripts/carve
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ def build_model_id(name):


def main(inputfile, input_type='protein', outputfile=None, diamond_args=None, universe=None, universe_file=None,
ensemble_size=None, verbose=False, debug=False, flavor=None, gapfill=None, blind_gapfill=False, init=None, mediadb=None,
default_score=None, uptake_score=None, soft_score=None, soft=None, hard=None, recursive_mode=False):
ensemble_size=None, verbose=False, debug=False, flavor=None, gapfill=None, blind_gapfill=False, init=None,
mediadb=None, default_score=None, uptake_score=None, soft_score=None, soft=None, hard=None, reference=None,
ref_score=None, recursive_mode=False):

if recursive_mode:
model_id = os.path.splitext(os.path.basename(inputfile))[0]
Expand Down Expand Up @@ -127,6 +128,17 @@ def main(inputfile, input_type='protein', outputfile=None, diamond_args=None, un
available = '\n'.join(glob("{}{}universe_*.xml.gz".format(project_dir, config.get('generated', 'folder'))))
raise IOError('Failed to load universe model: {}\nAvailable universe files:\n{}'.format(universe_file, available))

if reference:
if verbose:
print('Loading reference model...')

try:
ref_model = load_cbmodel(reference)
except:
raise IOError('Failed to load reference model.')
else:
ref_model = None

if gapfill or init:

if verbose:
Expand Down Expand Up @@ -170,6 +182,7 @@ def main(inputfile, input_type='protein', outputfile=None, diamond_args=None, un
if ensemble_size is None or ensemble_size <= 1:
if verbose:
print('Reconstructing a single model')

if not gapfill:
carve_model(universe_model, scores,
outputfile=outputfile,
Expand All @@ -179,6 +192,8 @@ def main(inputfile, input_type='protein', outputfile=None, diamond_args=None, un
soft_score=soft_score,
soft_constraints=soft_constraints,
hard_constraints=hard_constraints,
ref_model=ref_model,
ref_score=ref_score,
init_env=init_env,
debug_output=debug_output)
else:
Expand All @@ -189,6 +204,8 @@ def main(inputfile, input_type='protein', outputfile=None, diamond_args=None, un
soft_score=soft_score,
soft_constraints=soft_constraints,
hard_constraints=hard_constraints,
ref_model=ref_model,
ref_score=ref_score,
init_env=init_env,
debug_output=debug_output)
else:
Expand Down Expand Up @@ -276,12 +293,14 @@ if __name__ == '__main__':
parser.add_argument('--soft', help="Soft constraints file")
parser.add_argument('--hard', help="Hard constraints file")

parser.add_argument('--default-score', type=float, help=argparse.SUPPRESS)
parser.add_argument('--uptake-score', type=float, help=argparse.SUPPRESS)
parser.add_argument('--soft-score', type=float, help=argparse.SUPPRESS)
parser.add_argument('--reference', help="Manually curated model of a close reference species.")

parser.add_argument('--blind-gapfill', action='store_true', help=argparse.SUPPRESS)
parser.add_argument('--default-score', type=float, default=-1.0, help=argparse.SUPPRESS)
parser.add_argument('--uptake-score', type=float, default=0.0, help=argparse.SUPPRESS)
parser.add_argument('--soft-score', type=float, default=1.0, help=argparse.SUPPRESS)
parser.add_argument('--reference-score', type=float, default=0.0, help=argparse.SUPPRESS)

parser.add_argument('--blind-gapfill', action='store_true', help=argparse.SUPPRESS)

args = parser.parse_args()

Expand Down Expand Up @@ -340,7 +359,9 @@ if __name__ == '__main__':
uptake_score=args.uptake_score,
soft_score=args.soft_score,
soft=args.soft,
hard=args.hard
hard=args.hard,
reference=args.reference,
ref_score=args.reference_score
)

else:
Expand All @@ -365,6 +386,8 @@ if __name__ == '__main__':
soft_score=args.soft_score,
soft=args.soft,
hard=args.hard,
reference=args.reference,
ref_score=args.reference_score,
recursive_mode=True
)

Expand Down
4 changes: 2 additions & 2 deletions scripts/carveme_init
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ if __name__ == '__main__':
downloads = [
'data/input/bigg_proteins.faa',
'data/input/equilibrator_compounds.tsv.gz',
'data/input/refseq_release_84.tsv.gz',
'data/input/genbank_release_221.tsv.gz',
'data/input/refseq_release_92.tsv.gz',
'data/input/genbank_release_230.tsv.gz',
'data/generated/bigg_gprs.csv.gz',
'data/generated/model_specific_data.csv.gz',
'data/generated/universe_draft.xml.gz',
Expand Down

0 comments on commit 7ed7557

Please sign in to comment.