diff --git a/CHANGES.rst b/CHANGES.rst index 04f6790..f0afd9e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,12 @@ CHANGES ======= +Version 1.4.0 +------------- +* nofilter option added to micca.api.msa.nast() (do not remove positions which + are gaps in every sequenceces) and to the msa command (--nast-nofilter option); +* Documentation improved. + Version 1.3.0 ------------- * Swarm clustering algorithm added to micca otu; diff --git a/doc/source/index.rst b/doc/source/index.rst index cf5f179..7b15aef 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -6,19 +6,18 @@ .. include:: ../../README.rst - .. toctree:: :maxdepth: 1 :caption: Getting Started - + install run databases - + .. toctree:: :maxdepth: 1 :caption: Tutorials - + singleend pairedend phyloseq @@ -43,8 +42,7 @@ .. Indices and tables .. ================== -.. +.. .. * :ref:`genindex` .. * :ref:`modindex` .. * :ref:`search` - diff --git a/doc/source/otu.rst b/doc/source/otu.rst index 98363fc..1d63dd1 100644 --- a/doc/source/otu.rst +++ b/doc/source/otu.rst @@ -27,7 +27,7 @@ The :doc:`commands/otu` command returns in a single directory 5 files: otus.fasta FASTA containing the representative sequences (OTUs):: - >DENOVO1 + >DENOVO1 GACGAACGCTGGCGGCGTGCCTAACACATGCAAGTCGAACGGGG... >DENOVO2 GATGAACGCTAGCTACAGGCTTAACACATGCAAGTCGAGGGGCA... @@ -40,8 +40,8 @@ The :doc:`commands/otu` command returns in a single directory 5 files: ids:: DENOVO1 IS0AYJS04JQKIS;sample=Mw_01 - DENOVO2 IS0AYJS04JL6RS;sample=Mw_01 - DENOVO3 IS0AYJS04H4XNN;sample=Mw_01 + DENOVO2 IS0AYJS04JL6RS;sample=Mw_01 + DENOVO3 IS0AYJS04H4XNN;sample=Mw_01 ... hits.txt @@ -50,12 +50,12 @@ The :doc:`commands/otu` command returns in a single directory 5 files: identity (if available), see :ref:`otu-definition_identity`:: IS0AYJS04JE658;sample=Mw_01; IS0AYJS04I4XYN;sample=Mw_01 99.4 - IS0AYJS04JPH34;sample=Mw_01; IS0AYJS04JVUBC;sample=Mw_01 98.0 - IS0AYJS04I67XN;sample=Mw_01; IS0AYJS04JVUBC;sample=Mw_01 99.7 + IS0AYJS04JPH34;sample=Mw_01; IS0AYJS04JVUBC;sample=Mw_01 98.0 + IS0AYJS04I67XN;sample=Mw_01; IS0AYJS04JVUBC;sample=Mw_01 99.7 ... otuschim.fasta - (only for 'denovo_greedy' and 'open_ref' mathods, when + (only for 'denovo_greedy', 'denovo_swarm' and 'open_ref' mathods, when ``-c/--rmchim`` is specified) FASTA file containing the chimeric otus. diff --git a/docker/README.md b/docker/README.md index 356687b..14b3dc3 100644 --- a/docker/README.md +++ b/docker/README.md @@ -11,6 +11,7 @@ The RDP Classifier is included. ## Available Tags/Versions - latest: GitHub snapshot (master) +- 1.4.0: micca 1.4.0 (RDP Classifier release 2.11 included) - 1.3.0: micca 1.3.0 (RDP Classifier release 2.11 included) - 1.2.2: micca 1.2.2 (RDP Classifier release 2.11 included) diff --git a/micca/__init__.py b/micca/__init__.py index bb5ffb8..e4288a0 100644 --- a/micca/__init__.py +++ b/micca/__init__.py @@ -1,5 +1,5 @@ import os -__version__ = "1.3.0" +__version__ = "1.4.0" THIRDPARTY_BIN_PATH = os.path.join(os.path.dirname(__file__), "thirdparty_bin") diff --git a/micca/api/msa.py b/micca/api/msa.py index 61a7021..dc04480 100644 --- a/micca/api/msa.py +++ b/micca/api/msa.py @@ -166,7 +166,7 @@ def _aln_to_seqs(aln, query, target): def nast(input_fn, template_fn, output_fn, notaligned_fn=None, hits_fn=None, - ident=0.75, threads=1, mincov=0.75, strand="both"): + ident=0.75, threads=1, mincov=0.75, strand="both", nofilter=False): output_dir = os.path.dirname(output_fn) @@ -269,24 +269,30 @@ def nast(input_fn, template_fn, output_fn, notaligned_fn=None, hits_fn=None, hits_temp_handle.close() output_temp_handle.close() hits_out_handle.close() + os.remove(hits_temp_fn) + os.remove(template_wg_temp_fn) # remove columns which are gaps in every sequence - output_temp_handle = open(output_temp_fn, "rb") - output_handle = open(output_fn, "wb") - for title, seq in SimpleFastaParser(output_temp_handle): - seqout = "".join(np.array(list(seq))[msa_cov > 0]) - output_handle.write(">{}\n{}\n".format(title, seqout)) - output_temp_handle.close() - output_handle.close() + if nofilter: + os.rename(output_temp_fn, output_fn) + else: + output_temp_handle = open(output_temp_fn, "rb") + output_handle = open(output_fn, "wb") + for title, seq in SimpleFastaParser(output_temp_handle): + seqout = "".join(np.array(list(seq))[msa_cov > 0]) + output_handle.write(">{}\n{}\n".format(title, seqout)) + output_temp_handle.close() + output_handle.close() + os.remove(output_temp_fn) if hits_fn is None: os.remove(hits_out_fn) else: os.rename(hits_out_fn, hits_fn) - os.remove(template_wg_temp_fn) - os.remove(hits_temp_fn) - os.remove(output_temp_fn) + + + def muscle(input_fn, output_fn, maxiters=16): diff --git a/micca/cmds/msa.py b/micca/cmds/msa.py index f751e67..8ad6f5e 100644 --- a/micca/cmds/msa.py +++ b/micca/cmds/msa.py @@ -111,6 +111,10 @@ def main(argv): help="write hits on a TAB delimited file with the " "query sequence id, the template sequence id and " "the identity.") + group_nast.add_argument('--nast-nofilter', default=False, action="store_true", + help="do not remove positions which are gaps in " + "every sequenceces (useful if you want to apply " + "a Lane mask filter before the tree inference).") args = parser.parse_args(argv) @@ -129,7 +133,8 @@ def main(argv): ident=args.nast_id, threads=args.nast_threads, mincov=args.nast_mincov, - strand=args.nast_strand) + strand=args.nast_strand, + nofilter=args.nast_nofilter) else: micca.api.msa.muscle( input_fn=args.input, diff --git a/micca/cmds/otu.py b/micca/cmds/otu.py index f6c613a..de91751 100644 --- a/micca/cmds/otu.py +++ b/micca/cmds/otu.py @@ -133,7 +133,8 @@ def main(argv): "open_ref"], help="clustering method (default %(default)s)") group.add_argument('-d', '--id', default=0.97, type=float, - help="sequence identity threshold (0.0 to 1.0, " + help="sequence identity threshold (for 'denovo_greedy', " + "'closed_ref' and 'open_ref', 0.0 to 1.0, " "default %(default)s).") group.add_argument('-n', '--mincov', default=0.75, type=float, help="reject sequence if the fraction of alignment "