add first pipeline

caporaso-lab · Apr 29, 2024 · 1e601c4 · 1e601c4
1 parent 93a3098
commit 1e601c4
Show file tree

Hide file tree

Showing 5 changed files with 165 additions and 22 deletions.
diff --git a/q2_dwq2/_examples.py b/q2_dwq2/_examples.py
@@ -33,3 +33,16 @@ def nw_align_example_1(use):
         use.UsageInputs(seq1=seq1, seq2=seq2),
         use.UsageOutputNames(aligned_sequences='msa'),
     )
+
+
+def align_and_summarize_example_1(use):
+    seq1 = use.init_artifact('seq1', seq1_factory)
+    seq2 = use.init_artifact('seq2', seq2_factory)
+
+    msa, msa_summary, = use.action(
+        use.UsageAction(plugin_id='dwq2',
+                        action_id='align_and_summarize'),
+        use.UsageInputs(seq1=seq1, seq2=seq2),
+        use.UsageOutputNames(aligned_sequences='msa',
+                             msa_summary='msa_summary'),
+    )
diff --git a/q2_dwq2/_methods.py b/q2_dwq2/_methods.py
@@ -9,13 +9,22 @@
 from skbio.alignment import global_pairwise_align_nucleotide, TabularMSA
 from skbio import DNA
 
+_nw_align_defaults = {
+    'gap_open_penalty': 5,
+    'gap_extend_penalty': 2,
+    'match_score': 1,
+    'mismatch_score': -2
+}
 
-def nw_align(seq1: DNA,
-             seq2: DNA,
-             gap_open_penalty: float = 5,
-             gap_extend_penalty: float = 2,
-             match_score: float = 1,
-             mismatch_score: float = -2) -> TabularMSA:
+
+def nw_align(
+        seq1: DNA,
+        seq2: DNA,
+        gap_open_penalty: float = _nw_align_defaults['gap_open_penalty'],
+        gap_extend_penalty: float = _nw_align_defaults['gap_extend_penalty'],
+        match_score: float = _nw_align_defaults['match_score'],
+        mismatch_score: float = _nw_align_defaults['mismatch_score']) \
+        -> TabularMSA:
     msa, _, _ = global_pairwise_align_nucleotide(
         seq1=seq1, seq2=seq2, gap_open_penalty=gap_open_penalty,
         gap_extend_penalty=gap_extend_penalty, match_score=match_score,

diff --git a/q2_dwq2/_pipelines.py b/q2_dwq2/_pipelines.py
@@ -0,0 +1,27 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2024, Greg Caporaso.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+
+from ._methods import _nw_align_defaults
+
+
+def align_and_summarize(
+        ctx, seq1, seq2,
+        gap_open_penalty=_nw_align_defaults['gap_open_penalty'],
+        gap_extend_penalty=_nw_align_defaults['gap_extend_penalty'],
+        match_score=_nw_align_defaults['match_score'],
+        mismatch_score=_nw_align_defaults['mismatch_score']):
+    nw_align_action = ctx.get_action('dwq2', 'nw_align')
+    summarize_alignment_action = ctx.get_action('dwq2', 'summarize_alignment')
+
+    msa, = nw_align_action(
+                    seq1, seq2, gap_open_penalty=gap_open_penalty,
+                    gap_extend_penalty=gap_extend_penalty,
+                    match_score=match_score, mismatch_score=mismatch_score)
+    msa_summary, = summarize_alignment_action(msa)
+
+    return (msa, msa_summary)
diff --git a/q2_dwq2/plugin_setup.py b/q2_dwq2/plugin_setup.py
@@ -8,12 +8,13 @@
 
 import importlib
 
-from qiime2.plugin import Citations, Plugin, Float, Range
+from qiime2.plugin import Citations, Plugin, Float, Range, Visualization
 from q2_types.feature_data import FeatureData, AlignedSequence
 from q2_dwq2 import __version__
 from q2_dwq2._methods import nw_align
 from q2_dwq2._visualizers import summarize_alignment
-from q2_dwq2._examples import nw_align_example_1
+from q2_dwq2._pipelines import align_and_summarize
+from q2_dwq2._examples import nw_align_example_1, align_and_summarize_example_1
 from q2_dwq2 import (
     SingleDNASequence, SingleRecordDNAFASTAFormat,
     SingleRecordDNAFASTADirectoryFormat)
@@ -42,20 +43,19 @@
                                SingleRecordDNAFASTADirectoryFormat,
                                description="A single DNA sequence.")
 
-# Register actions
-plugin.methods.register_function(
-    function=nw_align,
-    inputs={'seq1': SingleDNASequence,
-            'seq2': SingleDNASequence},
-    parameters={
+
+# Register methods
+_nw_align_inputs = {'seq1': SingleDNASequence,
+                    'seq2': SingleDNASequence}
+_nw_align_parameters = {
         'gap_open_penalty': Float % Range(0, None, inclusive_start=False),
         'gap_extend_penalty': Float % Range(0, None, inclusive_start=False),
         'match_score': Float % Range(0, None, inclusive_start=False),
-        'mismatch_score': Float % Range(None, 0, inclusive_end=True)},
-    outputs={'aligned_sequences': FeatureData[AlignedSequence]},
-    input_descriptions={'seq1': 'The first sequence to align.',
-                        'seq2': 'The second sequence to align.'},
-    parameter_descriptions={
+        'mismatch_score': Float % Range(None, 0, inclusive_end=True)}
+_nw_align_outputs = {'aligned_sequences': FeatureData[AlignedSequence]}
+_nw_align_input_descriptions = {'seq1': 'The first sequence to align.',
+                                'seq2': 'The second sequence to align.'}
+_nw_align_parameter_descriptions = {
         'gap_open_penalty': ('The penalty incurred for opening a new gap. By '
                              'convention this is a positive number.'),
         'gap_extend_penalty': ('The penalty incurred for extending an existing '
@@ -64,10 +64,19 @@
                         'position. By convention, this is a positive number.'),
         'mismatch_score': ('The score for mismatching characters at an '
                            'alignment position. By convention, this is a '
-                           'negative number.')},
-    output_descriptions={
+                           'negative number.')}
+_nw_align_output_descriptions = {
         'aligned_sequences': 'The pairwise aligned sequences.'
-    },
+    }
+
+plugin.methods.register_function(
+    function=nw_align,
+    inputs=_nw_align_inputs,
+    parameters=_nw_align_parameters,
+    outputs=_nw_align_outputs,
+    input_descriptions=_nw_align_input_descriptions,
+    parameter_descriptions=_nw_align_parameter_descriptions,
+    output_descriptions=_nw_align_output_descriptions,
     name='Pairwise global sequence alignment.',
     description=("Align two DNA sequences using Needleman-Wunsch (NW). "
                  "This is a Python implementation of NW, so it is very slow! "
@@ -76,6 +85,7 @@
     examples={'Align two DNA sequences.': nw_align_example_1}
 )
 
+# Register visualizers
 plugin.visualizers.register_function(
     function=summarize_alignment,
     inputs={'msa': FeatureData[AlignedSequence]},
@@ -84,6 +94,39 @@
     parameter_descriptions={},
     name='Summarize an alignment.',
     description='Summarize a multiple sequence alignment.',
+    citations=[],
+)
+
+# Register pipelines
+# Order is important in the outputs dict unlike for inputs and parameters,
+# because these outputs are mapped onto the return values of the registered
+# function.
+_align_and_summarize_outputs = {}
+_align_and_summarize_outputs.update(_nw_align_outputs)
+_align_and_summarize_outputs['msa_summary'] = Visualization
+
+_align_and_summarize_output_descriptions = {}
+_align_and_summarize_output_descriptions.update(_nw_align_output_descriptions)
+_align_and_summarize_output_descriptions['msa_summary'] = \
+    "Visual summary of the pairwise alignment."
+
+plugin.pipelines.register_function(
+    function=align_and_summarize,
+    inputs=_nw_align_inputs,
+    parameters=_nw_align_parameters,
+    outputs=_align_and_summarize_outputs,
+    input_descriptions=_nw_align_input_descriptions,
+    parameter_descriptions=_nw_align_parameter_descriptions,
+    output_descriptions=_align_and_summarize_output_descriptions,
+    name="Pairwise global alignment and summarization.",
+    description=("Perform global pairwise sequence alignment using a slow "
+                 "Needleman-Wunsch (NW) implementation, and generate a "
+                 "visual summary of the alignment."),
+    # Only citations new to this Pipeline need to be defined. Citations for
+    # the Actions called from the Pipeline are automatically included.
+    citations=[],
+    examples={'Align two sequences and summarize the alignment.':
+              align_and_summarize_example_1}
 )
 
 importlib.import_module('q2_dwq2._transformers')
diff --git a/q2_dwq2/tests/test_pipelines.py b/q2_dwq2/tests/test_pipelines.py
@@ -0,0 +1,51 @@
+# ----------------------------------------------------------------------------
+# Copyright (c) 2024, Greg Caporaso.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file LICENSE, distributed with this software.
+# ----------------------------------------------------------------------------
+
+import skbio
+
+import qiime2
+from qiime2.plugin.testing import TestPluginBase
+
+
+class AlignAndSummarizeTests(TestPluginBase):
+    package = 'q2_dwq2.tests'
+
+    def test_simple1(self):
+        # access the pipeline as QIIME 2 sees it,
+        # for correct assignment of `ctx` variable
+        align_and_summarize_pipeline = \
+            self.plugin.pipelines['align_and_summarize']
+
+        sequence1 = skbio.DNA('AAAAAAAAGGTGGCCTTTTTTTT',
+                              metadata={'id': 's1', 'description': ''})
+        sequence2 = skbio.DNA('AAAAAAAAGGGGCCTTTTTTTT',
+                              metadata={'id': 's2', 'description': ''})
+        sequence1_art = qiime2.Artifact.import_data(
+            "SingleDNASequence", sequence1, view_type=skbio.DNA)
+        sequence2_art = qiime2.Artifact.import_data(
+            "SingleDNASequence", sequence2, view_type=skbio.DNA)
+        observed_msa, observed_viz = align_and_summarize_pipeline(
+            sequence1_art, sequence2_art)
+
+        aligned_sequence1 = skbio.DNA('AAAAAAAAGGTGGCCTTTTTTTT',
+                                      metadata={'id': 's1', 'description': ''})
+        aligned_sequence2 = skbio.DNA('AAAAAAAAGG-GGCCTTTTTTTT',
+                                      metadata={'id': 's2', 'description': ''})
+        expected_msa = skbio.TabularMSA([aligned_sequence1, aligned_sequence2])
+
+        # observed_msa output is a qiime2.Artifact, so view it as a
+        # skbio.TabularMSA for comparison to expected_msa.
+        self.assertEqual(observed_msa.view(skbio.TabularMSA), expected_msa)
+
+        # observed_viz is a qiime2.Visualization.
+        # access its index.html file for testing.
+        index_fp = observed_viz.get_index_paths(relative=False)['html']
+        with open(index_fp, 'r') as fh:
+            observed_index = fh.read()
+            self.assertIn(str(aligned_sequence1), observed_index)
+            self.assertIn(str(aligned_sequence2), observed_index)