Skip to content

Commit

Permalink
Merge pull request #54 from danielparton/master
Browse files Browse the repository at this point in the history
align function: substitution matrix and other parameters now accessib…
  • Loading branch information
danielparton committed Aug 31, 2015
2 parents 66ec9f1 + 5be1ba7 commit 2e6a120
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 21 deletions.
4 changes: 4 additions & 0 deletions ensembler/cli_commands/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
"""\
--targets <target> Define one or more target IDs to work on (comma-separated), e.g.
"--targets ABL1_HUMAN_D0,SRC_HUMAN_D0" (default: all targets)""",

"""\
--substitution_matrix <matrix Specify an amino acid substitution matrix available from
Bio.SubsMat.MatrixInfo (default: 'Gonnet')""",
]

helpstring_nonunique_options = [
Expand Down
3 changes: 2 additions & 1 deletion ensembler/cli_commands/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
ensembler loopmodel [-h | --help] [--templates <templates>] [--templatesfile <templatesfile>]
[--overwrite_structures] [-v | --verbose]
ensembler align [-h | --help] [--targets <targets>] [--targetsfile <targetsfile>]
[--templates <templates>] [--templatesfile <templatesfile>] [-v | --verbose]
[--templates <templates>] [--templatesfile <templatesfile>] [--substitution_matrix <matrix>]
[-v | --verbose]
ensembler build_models [-h | --help] [--targets <target>] [--targetsfile <targetsfile>]
[--templates <template>] [--templatesfile <templatesfile>] [--template_seqid_cutoff <cutoff>]
[--write_modeller_restraints_file] [-v | --verbose]
Expand Down
14 changes: 7 additions & 7 deletions ensembler/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,16 +178,16 @@ def strf_timedelta(delta_t):


def check_project_toplevel_dir(raise_exception=True):
import os
for dirtype in project_dirtypes:
if dirtype == 'packaged_models':
cwd = os.getcwd()
for project_dirtype in project_dirtypes:
if project_dirtype == 'packaged_models':
continue
dirpath = getattr(default_project_dirnames, dirtype)
if not os.path.exists(dirpath):
project_dirpath = getattr(default_project_dirnames, project_dirtype)
if not os.path.exists(project_dirpath):
if raise_exception:
raise Exception('Directory {0} is not the top-level directory of an Ensembler project'.format(dirpath))
raise Exception('Directory "{}" is not the top-level directory of an Ensembler project'.format(cwd))
else:
logger.debug('Directory %s not found' % dirpath)
logger.debug('Directory "{}" not found'.format(project_dirpath))
return False
return True

Expand Down
52 changes: 39 additions & 13 deletions ensembler/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,16 +337,24 @@ def check_loopmodel_complete_and_successful(template):


@ensembler.utils.notify_when_done
def align_targets_and_templates(process_only_these_targets=None, process_only_these_templates=None, loglevel=None):
def align_targets_and_templates(process_only_these_targets=None,
process_only_these_templates=None,
substitution_matrix='gonnet',
gap_open=-10,
gap_extend=-0.5,
loglevel=None
):
"""
Conducts pairwise alignments of target sequences against template sequences.
Stores Modeller-compatible 'alignment.pir' files in each model directory,
and also outputs a table of model IDs, sorted by sequence identity.
:param process_only_these_targets:
:param process_only_these_templates:
:param loglevel:
:return:
Parameters
----------
process_only_these_targets:
process_only_these_templates:
substitution_matrix: str
Specify an amino acid substitution matrix available from Bio.SubsMat.MatrixInfo
"""
ensembler.utils.set_loglevel(loglevel)
targets, templates_resolved_seq = ensembler.core.get_targets_and_templates()
Expand Down Expand Up @@ -375,7 +383,13 @@ def align_targets_and_templates(process_only_these_targets=None, process_only_th

model_dir = os.path.abspath(os.path.join(ensembler.core.default_project_dirnames.models, target.id, template_id))
ensembler.utils.create_dir(model_dir)
aln = align_target_template(target, template)
aln = align_target_template(
target,
template,
substitution_matrix=substitution_matrix,
gap_open=gap_open,
gap_extend=gap_extend
)
aln_filepath = os.path.join(model_dir, 'alignment.pir')
write_modeller_pir_aln_file(aln, target, template, pir_aln_filepath=aln_filepath)
seq_identity_data_sublist.append({
Expand All @@ -397,15 +411,27 @@ def align_targets_and_templates(process_only_these_targets=None, process_only_th
write_sorted_seq_identities(target, seq_identity_data)


def align_target_template(target, template, gap_open=-10, gap_extend=-0.5):
def align_target_template(target,
template,
substitution_matrix='gonnet',
gap_open=-10,
gap_extend=-0.5
):
"""
:param target: BioPython SeqRecord
:param template: BioPython SeqRecord
:param gap_open: float or int
:param gap_extend: float or int
:return: alignment
Parameters
----------
target: BioPython SeqRecord
template: BioPython SeqRecord
substitution_matrix: str
Specify an amino acid substitution matrix available from Bio.SubsMat.MatrixInfo
gap_open: float or int
gap_extend: float or int
Returns
-------
alignment: list
"""
matrix = Bio.SubsMat.MatrixInfo.gonnet
matrix = getattr(Bio.SubsMat.MatrixInfo, substitution_matrix)
aln = Bio.pairwise2.align.globalds(str(target.seq), str(template.seq), matrix, gap_open, gap_extend)
return aln

Expand Down
14 changes: 14 additions & 0 deletions ensembler/tests/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,20 @@ def test_build_model():
assert os.path.getsize(model_filepath) > 0


@attr('unit')
def test_align_target_template():
target = Mock()
template = Mock()
target.id = 'mock_target'
target.seq = 'YILGDTLGVGGKVKVGKH'
template.id = 'mock_template'
template.seq = 'YQNLSPVGSGGSVCAAFD'
aln = ensembler.modeling.align_target_template(target, template, substitution_matrix='gonnet')
assert aln == [('YILGDTLGVGGKVKVGKH', 'YQNLSPVGSGGSVCAAFD', 18.099999999999998, 0, 18)]
aln2 = ensembler.modeling.align_target_template(target, template, substitution_matrix='blosum62')
assert aln2 == [('YILGDTLGVGGKVKVGKH', 'YQNLSPVGSGGSVCAAFD', 10.0, 0, 18)]


@attr('unit')
def test_align_command():
ref_resources_dirpath = get_installed_resource_filename(os.path.join('tests', 'example_project'))
Expand Down

0 comments on commit 2e6a120

Please sign in to comment.