Skip to content

Commit

Permalink
add first pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
gregcaporaso committed Apr 29, 2024
1 parent 93a3098 commit 1e601c4
Show file tree
Hide file tree
Showing 5 changed files with 165 additions and 22 deletions.
13 changes: 13 additions & 0 deletions q2_dwq2/_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,16 @@ def nw_align_example_1(use):
use.UsageInputs(seq1=seq1, seq2=seq2),
use.UsageOutputNames(aligned_sequences='msa'),
)


def align_and_summarize_example_1(use):
seq1 = use.init_artifact('seq1', seq1_factory)
seq2 = use.init_artifact('seq2', seq2_factory)

msa, msa_summary, = use.action(
use.UsageAction(plugin_id='dwq2',
action_id='align_and_summarize'),
use.UsageInputs(seq1=seq1, seq2=seq2),
use.UsageOutputNames(aligned_sequences='msa',
msa_summary='msa_summary'),
)
21 changes: 15 additions & 6 deletions q2_dwq2/_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,22 @@
from skbio.alignment import global_pairwise_align_nucleotide, TabularMSA
from skbio import DNA

_nw_align_defaults = {
'gap_open_penalty': 5,
'gap_extend_penalty': 2,
'match_score': 1,
'mismatch_score': -2
}

def nw_align(seq1: DNA,
seq2: DNA,
gap_open_penalty: float = 5,
gap_extend_penalty: float = 2,
match_score: float = 1,
mismatch_score: float = -2) -> TabularMSA:

def nw_align(
seq1: DNA,
seq2: DNA,
gap_open_penalty: float = _nw_align_defaults['gap_open_penalty'],
gap_extend_penalty: float = _nw_align_defaults['gap_extend_penalty'],
match_score: float = _nw_align_defaults['match_score'],
mismatch_score: float = _nw_align_defaults['mismatch_score']) \
-> TabularMSA:
msa, _, _ = global_pairwise_align_nucleotide(
seq1=seq1, seq2=seq2, gap_open_penalty=gap_open_penalty,
gap_extend_penalty=gap_extend_penalty, match_score=match_score,
Expand Down
27 changes: 27 additions & 0 deletions q2_dwq2/_pipelines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2024, Greg Caporaso.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------

from ._methods import _nw_align_defaults


def align_and_summarize(
ctx, seq1, seq2,
gap_open_penalty=_nw_align_defaults['gap_open_penalty'],
gap_extend_penalty=_nw_align_defaults['gap_extend_penalty'],
match_score=_nw_align_defaults['match_score'],
mismatch_score=_nw_align_defaults['mismatch_score']):
nw_align_action = ctx.get_action('dwq2', 'nw_align')
summarize_alignment_action = ctx.get_action('dwq2', 'summarize_alignment')

msa, = nw_align_action(
seq1, seq2, gap_open_penalty=gap_open_penalty,
gap_extend_penalty=gap_extend_penalty,
match_score=match_score, mismatch_score=mismatch_score)
msa_summary, = summarize_alignment_action(msa)

return (msa, msa_summary)
75 changes: 59 additions & 16 deletions q2_dwq2/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@

import importlib

from qiime2.plugin import Citations, Plugin, Float, Range
from qiime2.plugin import Citations, Plugin, Float, Range, Visualization
from q2_types.feature_data import FeatureData, AlignedSequence
from q2_dwq2 import __version__
from q2_dwq2._methods import nw_align
from q2_dwq2._visualizers import summarize_alignment
from q2_dwq2._examples import nw_align_example_1
from q2_dwq2._pipelines import align_and_summarize
from q2_dwq2._examples import nw_align_example_1, align_and_summarize_example_1
from q2_dwq2 import (
SingleDNASequence, SingleRecordDNAFASTAFormat,
SingleRecordDNAFASTADirectoryFormat)
Expand Down Expand Up @@ -42,20 +43,19 @@
SingleRecordDNAFASTADirectoryFormat,
description="A single DNA sequence.")

# Register actions
plugin.methods.register_function(
function=nw_align,
inputs={'seq1': SingleDNASequence,
'seq2': SingleDNASequence},
parameters={

# Register methods
_nw_align_inputs = {'seq1': SingleDNASequence,
'seq2': SingleDNASequence}
_nw_align_parameters = {
'gap_open_penalty': Float % Range(0, None, inclusive_start=False),
'gap_extend_penalty': Float % Range(0, None, inclusive_start=False),
'match_score': Float % Range(0, None, inclusive_start=False),
'mismatch_score': Float % Range(None, 0, inclusive_end=True)},
outputs={'aligned_sequences': FeatureData[AlignedSequence]},
input_descriptions={'seq1': 'The first sequence to align.',
'seq2': 'The second sequence to align.'},
parameter_descriptions={
'mismatch_score': Float % Range(None, 0, inclusive_end=True)}
_nw_align_outputs = {'aligned_sequences': FeatureData[AlignedSequence]}
_nw_align_input_descriptions = {'seq1': 'The first sequence to align.',
'seq2': 'The second sequence to align.'}
_nw_align_parameter_descriptions = {
'gap_open_penalty': ('The penalty incurred for opening a new gap. By '
'convention this is a positive number.'),
'gap_extend_penalty': ('The penalty incurred for extending an existing '
Expand All @@ -64,10 +64,19 @@
'position. By convention, this is a positive number.'),
'mismatch_score': ('The score for mismatching characters at an '
'alignment position. By convention, this is a '
'negative number.')},
output_descriptions={
'negative number.')}
_nw_align_output_descriptions = {
'aligned_sequences': 'The pairwise aligned sequences.'
},
}

plugin.methods.register_function(
function=nw_align,
inputs=_nw_align_inputs,
parameters=_nw_align_parameters,
outputs=_nw_align_outputs,
input_descriptions=_nw_align_input_descriptions,
parameter_descriptions=_nw_align_parameter_descriptions,
output_descriptions=_nw_align_output_descriptions,
name='Pairwise global sequence alignment.',
description=("Align two DNA sequences using Needleman-Wunsch (NW). "
"This is a Python implementation of NW, so it is very slow! "
Expand All @@ -76,6 +85,7 @@
examples={'Align two DNA sequences.': nw_align_example_1}
)

# Register visualizers
plugin.visualizers.register_function(
function=summarize_alignment,
inputs={'msa': FeatureData[AlignedSequence]},
Expand All @@ -84,6 +94,39 @@
parameter_descriptions={},
name='Summarize an alignment.',
description='Summarize a multiple sequence alignment.',
citations=[],
)

# Register pipelines
# Order is important in the outputs dict unlike for inputs and parameters,
# because these outputs are mapped onto the return values of the registered
# function.
_align_and_summarize_outputs = {}
_align_and_summarize_outputs.update(_nw_align_outputs)
_align_and_summarize_outputs['msa_summary'] = Visualization

_align_and_summarize_output_descriptions = {}
_align_and_summarize_output_descriptions.update(_nw_align_output_descriptions)
_align_and_summarize_output_descriptions['msa_summary'] = \
"Visual summary of the pairwise alignment."

plugin.pipelines.register_function(
function=align_and_summarize,
inputs=_nw_align_inputs,
parameters=_nw_align_parameters,
outputs=_align_and_summarize_outputs,
input_descriptions=_nw_align_input_descriptions,
parameter_descriptions=_nw_align_parameter_descriptions,
output_descriptions=_align_and_summarize_output_descriptions,
name="Pairwise global alignment and summarization.",
description=("Perform global pairwise sequence alignment using a slow "
"Needleman-Wunsch (NW) implementation, and generate a "
"visual summary of the alignment."),
# Only citations new to this Pipeline need to be defined. Citations for
# the Actions called from the Pipeline are automatically included.
citations=[],
examples={'Align two sequences and summarize the alignment.':
align_and_summarize_example_1}
)

importlib.import_module('q2_dwq2._transformers')
51 changes: 51 additions & 0 deletions q2_dwq2/tests/test_pipelines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# ----------------------------------------------------------------------------
# Copyright (c) 2024, Greg Caporaso.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------

import skbio

import qiime2
from qiime2.plugin.testing import TestPluginBase


class AlignAndSummarizeTests(TestPluginBase):
package = 'q2_dwq2.tests'

def test_simple1(self):
# access the pipeline as QIIME 2 sees it,
# for correct assignment of `ctx` variable
align_and_summarize_pipeline = \
self.plugin.pipelines['align_and_summarize']

sequence1 = skbio.DNA('AAAAAAAAGGTGGCCTTTTTTTT',
metadata={'id': 's1', 'description': ''})
sequence2 = skbio.DNA('AAAAAAAAGGGGCCTTTTTTTT',
metadata={'id': 's2', 'description': ''})
sequence1_art = qiime2.Artifact.import_data(
"SingleDNASequence", sequence1, view_type=skbio.DNA)
sequence2_art = qiime2.Artifact.import_data(
"SingleDNASequence", sequence2, view_type=skbio.DNA)
observed_msa, observed_viz = align_and_summarize_pipeline(
sequence1_art, sequence2_art)

aligned_sequence1 = skbio.DNA('AAAAAAAAGGTGGCCTTTTTTTT',
metadata={'id': 's1', 'description': ''})
aligned_sequence2 = skbio.DNA('AAAAAAAAGG-GGCCTTTTTTTT',
metadata={'id': 's2', 'description': ''})
expected_msa = skbio.TabularMSA([aligned_sequence1, aligned_sequence2])

# observed_msa output is a qiime2.Artifact, so view it as a
# skbio.TabularMSA for comparison to expected_msa.
self.assertEqual(observed_msa.view(skbio.TabularMSA), expected_msa)

# observed_viz is a qiime2.Visualization.
# access its index.html file for testing.
index_fp = observed_viz.get_index_paths(relative=False)['html']
with open(index_fp, 'r') as fh:
observed_index = fh.read()
self.assertIn(str(aligned_sequence1), observed_index)
self.assertIn(str(aligned_sequence2), observed_index)

0 comments on commit 1e601c4

Please sign in to comment.