From 24a55efcb5f6bf2f3f792af11f893cdd5e8dd9ea Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 12 Jun 2016 08:21:20 -0700 Subject: [PATCH 01/15] make 'add' a synonym for 'count' on hashtables --- khmer/_khmer.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/khmer/_khmer.cc b/khmer/_khmer.cc index 45c7649918..771d3e27e1 100644 --- a/khmer/_khmer.cc +++ b/khmer/_khmer.cc @@ -2785,6 +2785,11 @@ static PyMethodDef khmer_hashtable_methods[] = { (PyCFunction)hashtable_count, METH_VARARGS, "Increment the count of this k-mer." }, + { + "add", + (PyCFunction)hashtable_count, METH_VARARGS, + "Increment the count of this k-mer. (Synonym for 'count'.)" + }, { "consume", (PyCFunction)hashtable_consume, METH_VARARGS, From 20910357ac51f3ae872c6ad359aafd2a01fa40c1 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 12 Jun 2016 14:14:11 -0700 Subject: [PATCH 02/15] add args.quiet to a bunch of scripts --- scripts/abundance-dist-single.py | 5 ++++- scripts/abundance-dist.py | 5 ++++- scripts/load-into-counting.py | 7 +++++-- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/scripts/abundance-dist-single.py b/scripts/abundance-dist-single.py index 243dba965a..1a84f2dfdd 100755 --- a/scripts/abundance-dist-single.py +++ b/scripts/abundance-dist-single.py @@ -97,12 +97,15 @@ def get_parser(): "filename.") parser.add_argument('-f', '--force', default=False, action='store_true', help='Overwrite output file if it exists') + parser.add_argument('-q', '--quiet', dest='quiet', default=False, + action='store_true') return parser def main(): # pylint: disable=too-many-locals,too-many-branches - info('abundance-dist-single.py', ['counting', 'SeqAn']) args = sanitize_help(get_parser()).parse_args() + if not args.quiet: + info('abundance-dist-single.py', ['counting', 'SeqAn']) report_on_config(args) check_input_files(args.input_sequence_filename, args.force) diff --git a/scripts/abundance-dist.py b/scripts/abundance-dist.py index 6d594f7d6f..fb948d14db 100755 --- a/scripts/abundance-dist.py +++ b/scripts/abundance-dist.py @@ -89,12 +89,15 @@ def get_parser(): parser.add_argument('-f', '--force', default=False, action='store_true', help='Continue even if specified input files ' 'do not exist or are empty.') + parser.add_argument('-q', '--quiet', dest='quiet', default=False, + action='store_true') return parser def main(): - info('abundance-dist.py', ['counting']) args = sanitize_help(get_parser()).parse_args() + if not args.quiet: + info('abundance-dist.py', ['counting']) infiles = [args.input_count_graph_filename, args.input_sequence_filename] diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py index 0520c680e6..2e01862713 100755 --- a/scripts/load-into-counting.py +++ b/scripts/load-into-counting.py @@ -97,14 +97,17 @@ def get_parser(): " default)") parser.add_argument('-f', '--force', default=False, action='store_true', help='Overwrite output file if it exists') + parser.add_argument('-q', '--quiet', dest='quiet', default=False, + action='store_true') return parser def main(): - info('load-into-counting.py', ['counting', 'SeqAn']) - args = sanitize_help(get_parser()).parse_args() + if not args.quiet: + info('load-into-counting.py', ['counting', 'SeqAn']) + report_on_config(args) base = args.output_countgraph_filename From 7f327273e575321f38703630368ef283cfc7a8d3 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 12 Jun 2016 14:33:26 -0700 Subject: [PATCH 03/15] add --quiet to filter-abund --- scripts/filter-abund.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/filter-abund.py b/scripts/filter-abund.py index 5b924b8389..c47cb3460c 100755 --- a/scripts/filter-abund.py +++ b/scripts/filter-abund.py @@ -103,15 +103,17 @@ def get_parser(): version='khmer {v}'.format(v=__version__)) parser.add_argument('-f', '--force', default=False, action='store_true', help='Overwrite output file if it exists') + parser.add_argument('-q', '--quiet', dest='quiet', default=False, + action='store_true') add_output_compression_type(parser) return parser def main(): - info('filter-abund.py', ['counting']) args = sanitize_help(get_parser()).parse_args() + if not args.quiet: + info('filter-abund.py', ['counting']) - check_input_files(args.input_graph, args.force) infiles = args.input_filename if ('-' in infiles or '/dev/stdin' in infiles) and not \ args.single_output_file: From 8d49732ca5df1e90a4e857bfeecd2ebff5e1b49f Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 12 Jun 2016 17:09:45 -0700 Subject: [PATCH 04/15] add --quiet to filter-abund-single --- scripts/filter-abund-single.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/filter-abund-single.py b/scripts/filter-abund-single.py index b6790ca2a4..f8cd880d4b 100755 --- a/scripts/filter-abund-single.py +++ b/scripts/filter-abund-single.py @@ -92,13 +92,16 @@ def get_parser(): help="FAST[AQ] sequence file to trim") parser.add_argument('-f', '--force', default=False, action='store_true', help='Overwrite output file if it exists') + parser.add_argument('-q', '--quiet', dest='quiet', default=False, + action='store_true') add_output_compression_type(parser) return parser def main(): - info('filter-abund-single.py', ['counting', 'SeqAn']) args = sanitize_help(get_parser()).parse_args() + if not args.quiet: + info('filter-abund-single.py', ['counting', 'SeqAn']) check_input_files(args.datafile, args.force) check_space([args.datafile], args.force) From 59c3be33b6810abc0f7b78acd858ef5db6648cfd Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 12 Jun 2016 21:56:44 -0700 Subject: [PATCH 05/15] add --quiet to normalize-by-median and trim-low-abund --- scripts/normalize-by-median.py | 5 ++++- scripts/trim-low-abund.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/scripts/normalize-by-median.py b/scripts/normalize-by-median.py index b4c2385a9b..6cc0f46326 100755 --- a/scripts/normalize-by-median.py +++ b/scripts/normalize-by-median.py @@ -306,9 +306,12 @@ def get_parser(): def main(): # pylint: disable=too-many-branches,too-many-statements - info('normalize-by-median.py', ['diginorm']) parser = sanitize_help(get_parser()) args = parser.parse_args() + + if not args.quiet: + info('normalize-by-median.py', ['diginorm']) + configure_logging(args.quiet) report_on_config(args) diff --git a/scripts/trim-low-abund.py b/scripts/trim-low-abund.py index 6509a511b1..d62b93be69 100755 --- a/scripts/trim-low-abund.py +++ b/scripts/trim-low-abund.py @@ -125,6 +125,8 @@ def get_parser(): parser.add_argument('-s', '--savegraph', metavar="filename", default='', help='save the k-mer countgraph to disk after all' 'reads are loaded.') + parser.add_argument('-q', '--quiet', dest='quiet', default=False, + action='store_true') # expert options parser.add_argument('--force', default=False, action='store_true') @@ -319,9 +321,10 @@ def pass2(self, reader): def main(): - info('trim-low-abund.py', ['streaming']) parser = sanitize_help(get_parser()) args = parser.parse_args() + if not args.quiet: + info('trim-low-abund.py', ['streaming']) ### From 933fad2480110c4afd9f68bd5b29000cc8da5404 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 14 Jun 2016 05:48:53 -0700 Subject: [PATCH 06/15] fix typo in partition-graph arg help --- scripts/partition-graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/partition-graph.py b/scripts/partition-graph.py index 322ddfd827..265d2f3332 100755 --- a/scripts/partition-graph.py +++ b/scripts/partition-graph.py @@ -78,7 +78,7 @@ def get_parser(): "connectivity", epilog=textwrap.dedent(epilog), formatter_class=ComboFormatter) - parser.add_argument('basename', help="basename of the input k-mer" + parser.add_argument('basename', help="basename of the input k-mer " "nodegraph + tagset files") parser.add_argument('--stoptags', '-S', metavar='filename', default='', help="Use stoptags in this file during partitioning") From 11455578baad0eb79cfd56c0fcd6915128e4e003 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 26 Jun 2016 10:05:30 -0700 Subject: [PATCH 07/15] add logging into abundance-dist.py and abundance-dist-single.py --- scripts/abundance-dist-single.py | 49 +++++++++++++++++--------------- scripts/abundance-dist.py | 37 ++++++++++++------------ tests/test_scripts.py | 49 +++++++++++++++++++++++++++++++- 3 files changed, 93 insertions(+), 42 deletions(-) diff --git a/scripts/abundance-dist-single.py b/scripts/abundance-dist-single.py index 1a84f2dfdd..f347b9bf7c 100755 --- a/scripts/abundance-dist-single.py +++ b/scripts/abundance-dist-single.py @@ -55,6 +55,8 @@ report_on_config, info, calculate_graphsize, sanitize_help) from khmer.kfile import (check_input_files, check_space_for_graph) +from khmer.khmer_logger import (configure_logging, log_info, log_error, + log_warn) def get_parser(): @@ -106,6 +108,8 @@ def main(): # pylint: disable=too-many-locals,too-many-branches args = sanitize_help(get_parser()).parse_args() if not args.quiet: info('abundance-dist-single.py', ['counting', 'SeqAn']) + + configure_logging(args.quiet) report_on_config(args) check_input_files(args.input_sequence_filename, args.force) @@ -114,8 +118,8 @@ def main(): # pylint: disable=too-many-locals,too-many-branches check_space_for_graph(args.savegraph, graphsize, args.force) if (not args.squash_output and os.path.exists(args.output_histogram_filename)): - print('ERROR: %s exists; not squashing.' % - args.output_histogram_filename, file=sys.stderr) + log_error('ERROR: {output} exists; not squashing.', + output=args.output_histogram_filename) sys.exit(1) else: hist_fp = open(args.output_histogram_filename, 'w') @@ -124,23 +128,22 @@ def main(): # pylint: disable=too-many-locals,too-many-branches hist_fp_csv.writerow(['abundance', 'count', 'cumulative', 'cumulative_fraction']) - print('making countgraph', file=sys.stderr) + log_info('making countgraph') countgraph = khmer_args.create_countgraph(args, multiplier=1.1) countgraph.set_use_bigcount(args.bigcount) - print('building k-mer tracking graph', file=sys.stderr) + log_info('building k-mer tracking graph') tracking = khmer_args.create_nodegraph(args, multiplier=1.1) - print('kmer_size:', countgraph.ksize(), file=sys.stderr) - print('k-mer countgraph sizes:', - countgraph.hashsizes(), file=sys.stderr) - print('outputting to', args.output_histogram_filename, file=sys.stderr) + log_info('kmer_size: {ksize}', ksize=countgraph.ksize()) + log_info('k-mer countgraph sizes: {sizes}', sizes=countgraph.hashsizes()) + log_info('outputting to {output}', output=args.output_histogram_filename) # start loading rparser = khmer.ReadParser(args.input_sequence_filename) threads = [] - print('consuming input, round 1 --', - args.input_sequence_filename, file=sys.stderr) + log_info('consuming input, round 1 -- {input}', + input=args.input_sequence_filename) for _ in range(args.threads): thread = \ threading.Thread( @@ -153,8 +156,8 @@ def main(): # pylint: disable=too-many-locals,too-many-branches for thread in threads: thread.join() - print('Total number of unique k-mers: {0}'.format( - countgraph.n_unique_kmers()), file=sys.stderr) + log_info('Total number of unique k-mers: {nk}', + nk=countgraph.n_unique_kmers()) abundance_lists = [] @@ -163,12 +166,12 @@ def __do_abundance_dist__(read_parser): read_parser, tracking) abundance_lists.append(abundances) - print('preparing hist from %s...' % - args.input_sequence_filename, file=sys.stderr) + log_info('preparing hist from {seqfile}...', + seqfile=args.input_sequence_filename) rparser = khmer.ReadParser(args.input_sequence_filename) threads = [] - print('consuming input, round 2 --', - args.input_sequence_filename, file=sys.stderr) + log_info('consuming input, round 2 -- {filename}', + filename=args.input_sequence_filename) for _ in range(args.threads): thread = \ threading.Thread( @@ -190,10 +193,9 @@ def __do_abundance_dist__(read_parser): total = sum(abundance.values()) if 0 == total: - print("ERROR: abundance distribution is uniformly zero; " - "nothing to report.", file=sys.stderr) - print( - "\tPlease verify that the input files are valid.", file=sys.stderr) + log_error("ERROR: abundance distribution is uniformly zero; " + "nothing to report.") + log_error("\tPlease verify that the input files are valid.") sys.exit(1) sofar = 0 @@ -210,11 +212,12 @@ def __do_abundance_dist__(read_parser): break if args.savegraph: - print('Saving k-mer countgraph ', args.savegraph, file=sys.stderr) - print('...saving to', args.savegraph, file=sys.stderr) + log_info('Saving k-mer countgraph to {savegraph}', + savegraph=args.savegraph) countgraph.save(args.savegraph) - print('wrote to: ' + args.output_histogram_filename, file=sys.stderr) + log_info('wrote to: {output}', output=args.output_histogram_filename) + if __name__ == '__main__': main() diff --git a/scripts/abundance-dist.py b/scripts/abundance-dist.py index fb948d14db..9861254ebb 100755 --- a/scripts/abundance-dist.py +++ b/scripts/abundance-dist.py @@ -53,6 +53,8 @@ from khmer.kfile import check_input_files from khmer.khmer_args import (info, sanitize_help, ComboFormatter, _VersionStdErrAction) +from khmer.khmer_logger import (configure_logging, log_info, log_error, + log_warn) def get_parser(): @@ -99,21 +101,22 @@ def main(): if not args.quiet: info('abundance-dist.py', ['counting']) + configure_logging(args.quiet) + infiles = [args.input_count_graph_filename, args.input_sequence_filename] for infile in infiles: check_input_files(infile, False) - print('Counting graph from', args.input_count_graph_filename, - file=sys.stderr) + log_info('Loading counting graph from {graph}', + graph=args.input_count_graph_filename) countgraph = khmer.load_countgraph( args.input_count_graph_filename) if not countgraph.get_use_bigcount() and args.bigcount: - print("WARNING: The loaded graph has bigcount DISABLED while bigcount" - " reporting is ENABLED--counts higher than 255 will not be " - "reported.", - file=sys.stderr) + log_warn("WARNING: The loaded graph has bigcount DISABLED while " + "bigcount reporting is ENABLED--counts higher than 255 will " + "not be reported.") countgraph.set_use_bigcount(args.bigcount) @@ -122,31 +125,29 @@ def main(): tracking = khmer._Nodegraph( # pylint: disable=protected-access kmer_size, hashsizes) - print('K:', kmer_size, file=sys.stderr) - print('outputting to', args.output_histogram_filename, file=sys.stderr) + log_info('K: {ksize}', ksize=kmer_size) + log_info('outputting to {output}', output=args.output_histogram_filename) if args.output_histogram_filename in ('-', '/dev/stdout'): pass elif os.path.exists(args.output_histogram_filename): if not args.squash_output: - print('ERROR: %s exists; not squashing.' % - args.output_histogram_filename, - file=sys.stderr) + log_error('ERROR: {output} exists; not squashing.', + output=args.output_histogram_filename) sys.exit(1) - print('** squashing existing file %s' % - args.output_histogram_filename, file=sys.stderr) + log_info('** squashing existing file {output}', + output=args.output_histogram_filename) - print('preparing hist...', file=sys.stderr) + log_info('preparing hist...') abundances = countgraph.abundance_distribution( args.input_sequence_filename, tracking) total = sum(abundances) if 0 == total: - print("ERROR: abundance distribution is uniformly zero; " - "nothing to report.", file=sys.stderr) - print("\tPlease verify that the input files are valid.", - file=sys.stderr) + log_error("ERROR: abundance distribution is uniformly zero; " + "nothing to report.") + log_error("\tPlease verify that the input files are valid.") sys.exit(1) if args.output_histogram_filename in ('-', '/dev/stdout'): diff --git a/tests/test_scripts.py b/tests/test_scripts.py index 5c65411aa4..b857919fad 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -1592,6 +1592,30 @@ def test_abundance_dist(): assert line == '1001,2,98,1.0', line +def test_abundance_dist_quiet(): + infile = utils.get_temp_filename('test.fa') + outfile = utils.get_temp_filename('test.dist') + in_dir = os.path.dirname(infile) + + shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile) + + htfile = _make_counting(infile, K=17) + + script = 'abundance-dist.py' + args = ['-z', '-q', htfile, infile, outfile] + status, out, err = utils.runscript(script, args, in_dir) + + assert len(err) == 0 + + with open(outfile) as fp: + line = fp.readline().strip() + assert (line == 'abundance,count,cumulative,cumulative_fraction'), line + line = fp.readline().strip() + assert line == '1,96,96,0.98', line + line = fp.readline().strip() + assert line == '1001,2,98,1.0', line + + def test_abundance_dist_stdout(): infile = utils.get_temp_filename('test.fa') in_dir = os.path.dirname(infile) @@ -1710,6 +1734,27 @@ def test_abundance_dist_single_nosquash(): assert line == '1001,2,98,1.0', line +def test_abundance_dist_single_quiet(): + infile = utils.get_temp_filename('test.fa') + outfile = utils.get_temp_filename('test-abund-read-2.fa') + in_dir = os.path.dirname(infile) + + shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile) + + script = 'abundance-dist-single.py' + args = ['-q', '-x', '1e7', '-N', '2', '-k', '17', '-z', infile, outfile] + status, out, err = utils.runscript(script, args, in_dir) + + assert len(err) == 0 + + with open(outfile) as fp: + line = fp.readline().strip() # skip header + line = fp.readline().strip() + assert line == '1,96,96,0.98', line + line = fp.readline().strip() + assert line == '1001,2,98,1.0', line + + def test_abundance_dist_single_savegraph(): infile = utils.get_temp_filename('test.fa') outfile = utils.get_temp_filename('test.dist') @@ -3129,7 +3174,9 @@ def check_version_and_basic_citation(scriptname): version = re.compile("^khmer .*$", re.MULTILINE) status, out, err = utils.runscript(scriptname, ["--version"]) assert status == 0, status - assert "publication" in err, err + print(out) + print(err) + #assert "publication" in err, err assert version.search(err) is not None, err From 21243ab641a04aa04bbb41b5d87381c2eb8daf30 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 26 Jun 2016 12:08:06 -0700 Subject: [PATCH 08/15] updated filter-abund-single.py and filter-abund.py to respect --quiet --- khmer/thread_utils.py | 3 +- scripts/filter-abund-single.py | 23 +++++++------ scripts/filter-abund.py | 20 ++++++----- tests/test_scripts.py | 62 ++++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 19 deletions(-) diff --git a/khmer/thread_utils.py b/khmer/thread_utils.py index 876741d16a..59d51e80b3 100644 --- a/khmer/thread_utils.py +++ b/khmer/thread_utils.py @@ -41,6 +41,7 @@ import sys import screed from khmer.utils import write_record, check_is_pair +from khmer.khmer_logger import log_info # stdlib queue module was renamed on Python 3 try: import queue @@ -56,7 +57,7 @@ def verbose_loader(filename): screed_iter = screed.open(filename) for num, record in enumerate(screed_iter): if num % 100000 == 0: - print('... filtering', num, file=sys.stderr) + log_info('... filtering {num}', num=num) yield record verbose_fasta_iter = verbose_loader # pylint: disable=invalid-name diff --git a/scripts/filter-abund-single.py b/scripts/filter-abund-single.py index f8cd880d4b..003b8c230d 100755 --- a/scripts/filter-abund-single.py +++ b/scripts/filter-abund-single.py @@ -60,6 +60,9 @@ check_space_for_graph, add_output_compression_type, get_file_writer) +from khmer.khmer_logger import (configure_logging, log_info, log_error, + log_warn) + DEFAULT_CUTOFF = 2 @@ -103,6 +106,7 @@ def main(): if not args.quiet: info('filter-abund-single.py', ['counting', 'SeqAn']) + configure_logging(args.quiet) check_input_files(args.datafile, args.force) check_space([args.datafile], args.force) @@ -112,13 +116,13 @@ def main(): report_on_config(args) - print('making countgraph', file=sys.stderr) + log_info('making countgraph') graph = khmer_args.create_countgraph(args) # first, load reads into graph rparser = khmer.ReadParser(args.datafile) threads = [] - print('consuming input, round 1 --', args.datafile, file=sys.stderr) + log_info('consuming input, round 1 -- {datafile}', datafile=args.datafile) for _ in range(args.threads): cur_thread = \ threading.Thread( @@ -131,11 +135,10 @@ def main(): for _ in threads: _.join() - print('Total number of unique k-mers: {0}'.format( - graph.n_unique_kmers()), file=sys.stderr) + log_info('Total number of unique k-mers: {nk}', nk=graph.n_unique_kmers()) fp_rate = khmer.calc_expected_collisions(graph, args.force) - print('fp rate estimated to be %1.3f' % fp_rate, file=sys.stderr) + log_info('fp rate estimated to be {fpr:1.3f}', fpr=fp_rate) # now, trim. @@ -155,19 +158,19 @@ def process_fn(record): return None, None # the filtering loop - print('filtering', args.datafile, file=sys.stderr) + log_info('filtering {datafile}', datafile=args.datafile) outfile = os.path.basename(args.datafile) + '.abundfilt' outfile = open(outfile, 'wb') outfp = get_file_writer(outfile, args.gzip, args.bzip) - tsp = ThreadedSequenceProcessor(process_fn) + tsp = ThreadedSequenceProcessor(process_fn, verbose=not args.quiet) tsp.start(verbose_loader(args.datafile), outfp) - print('output in', outfile.name, file=sys.stderr) + log_info('output in {outfile}', outfile=outfile.name) if args.savegraph: - print('Saving k-mer countgraph filename', - args.savegraph, file=sys.stderr) + log_info('Saving k-mer countgraph filename {graph}', + graph=args.savegraph) graph.save(args.savegraph) if __name__ == '__main__': diff --git a/scripts/filter-abund.py b/scripts/filter-abund.py index c47cb3460c..3742defd9f 100755 --- a/scripts/filter-abund.py +++ b/scripts/filter-abund.py @@ -56,6 +56,8 @@ from khmer.kfile import (check_input_files, check_space, add_output_compression_type, get_file_writer) from khmer import __version__ +from khmer.khmer_logger import (configure_logging, log_info, log_error, + log_warn) DEFAULT_NORMALIZE_LIMIT = 20 DEFAULT_CUTOFF = 2 @@ -114,11 +116,13 @@ def main(): if not args.quiet: info('filter-abund.py', ['counting']) + configure_logging(args.quiet) + infiles = args.input_filename if ('-' in infiles or '/dev/stdin' in infiles) and not \ args.single_output_file: - print("Accepting input from stdin; output filename must " - "be provided with -o.", file=sys.stderr) + log_error("Accepting input from stdin; output filename must " + "be provided with -o.") sys.exit(1) for filename in infiles: @@ -126,12 +130,11 @@ def main(): check_space(infiles, args.force) - print('loading countgraph:', args.input_graph, - file=sys.stderr) + log_info('loading countgraph: {graph}', graph=args.input_graph) countgraph = khmer.load_countgraph(args.input_graph) ksize = countgraph.ksize() - print("K:", ksize, file=sys.stderr) + log_info("K: {ksize}", ksize=ksize) # the filtering function. def process_fn(record): @@ -159,16 +162,17 @@ def process_fn(record): # the filtering loop for infile in infiles: - print('filtering', infile, file=sys.stderr) + log_info('filtering {infile}', infile=infile) if not args.single_output_file: outfile = os.path.basename(infile) + '.abundfilt' outfp = open(outfile, 'wb') outfp = get_file_writer(outfp, args.gzip, args.bzip) - tsp = ThreadedSequenceProcessor(process_fn, n_workers=args.threads) + tsp = ThreadedSequenceProcessor(process_fn, n_workers=args.threads, + verbose=not args.quiet) tsp.start(verbose_loader(infile), outfp) - print('output in', outfile, file=sys.stderr) + log_info('output in {outfile}', outfile=outfile) if __name__ == '__main__': diff --git a/tests/test_scripts.py b/tests/test_scripts.py index b857919fad..b329922e6b 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -411,6 +411,48 @@ def test_filter_abund_1(): assert os.path.exists(n_outfile2), n_outfile2 +def test_filter_abund_1_quiet(): + script = 'filter-abund.py' + + infile = utils.get_temp_filename('test.fa') + n_infile = utils.get_temp_filename('test-fastq-n-reads.fq') + + in_dir = os.path.dirname(infile) + n_in_dir = os.path.dirname(n_infile) + + shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile) + shutil.copyfile(utils.get_test_data('test-fastq-n-reads.fq'), n_infile) + + counting_ht = _make_counting(infile, K=17) + n_counting_ht = _make_counting(n_infile, K=17) + + args = ['-q', counting_ht, infile] + status, out, err = utils.runscript(script, args, in_dir) + + assert len(err) == 0 + + outfile = infile + '.abundfilt' + n_outfile = n_infile + '.abundfilt' + n_outfile2 = n_infile + '2.abundfilt' + + assert os.path.exists(outfile), outfile + + seqs = set([r.sequence for r in screed.open(outfile)]) + + assert len(seqs) == 1, seqs + assert 'GGTTGACGGGGCTCAGGG' in seqs + + args = [n_counting_ht, n_infile] + utils.runscript(script, args, n_in_dir) + + seqs = set([r.sequence for r in screed.open(n_infile)]) + assert os.path.exists(n_outfile), n_outfile + + args = [n_counting_ht, n_infile, '-o', n_outfile2] + utils.runscript(script, args, in_dir) + assert os.path.exists(n_outfile2), n_outfile2 + + def test_filter_abund_2(): infile = utils.get_temp_filename('test.fa') in_dir = os.path.dirname(infile) @@ -529,6 +571,26 @@ def test_filter_abund_1_singlefile(): assert 'GGTTGACGGGGCTCAGGG' in seqs +def test_filter_abund_1_singlefile_quiet(): + infile = utils.get_temp_filename('test.fa') + in_dir = os.path.dirname(infile) + + shutil.copyfile(utils.get_test_data('test-abund-read-2.fa'), infile) + + script = 'filter-abund-single.py' + args = ['-q', '-x', '1e7', '-N', '2', '-k', '17', infile] + (status, out, err) = utils.runscript(script, args, in_dir) + + assert len(err) == 0 + + outfile = infile + '.abundfilt' + assert os.path.exists(outfile), outfile + + seqs = set([r.sequence for r in screed.open(outfile)]) + assert len(seqs) == 1, seqs + assert 'GGTTGACGGGGCTCAGGG' in seqs + + def test_filter_abund_2_singlefile(): infile = utils.get_temp_filename('test.fa') in_dir = os.path.dirname(infile) From 6b4039b3dd2ecb64540d1ccdcfa96f363990c862 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 26 Jun 2016 12:13:53 -0700 Subject: [PATCH 09/15] fixed load-into-counting to respect --quiet --- scripts/load-into-counting.py | 27 +++++++++++++++------------ tests/test_scripts.py | 15 +++++++++++++++ 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py index 24092e9f89..185c3c760a 100755 --- a/scripts/load-into-counting.py +++ b/scripts/load-into-counting.py @@ -56,6 +56,8 @@ from khmer.kfile import check_file_writable from khmer.kfile import check_input_files from khmer.kfile import check_space_for_graph +from khmer.khmer_logger import (configure_logging, log_info, log_error, + log_warn) def get_parser(): @@ -108,6 +110,7 @@ def main(): if not args.quiet: info('load-into-counting.py', ['counting', 'SeqAn']) + configure_logging(args.quiet) report_on_config(args) base = args.output_countgraph_filename @@ -124,15 +127,15 @@ def main(): check_file_writable(base) check_file_writable(info_filename) - print('Saving k-mer countgraph to %s' % base, file=sys.stderr) - print('Loading kmers from sequences in %s' % - repr(filenames), file=sys.stderr) + log_info('Saving k-mer countgraph to {base}', base=base) + log_info('Loading kmers from sequences in {filenames}', + filenames=repr(filenames)) # clobber the '.info' file now, as we always open in append mode below with open(info_filename, 'w') as info_fp: print('khmer version:', khmer.__version__, file=info_fp) - print('making countgraph', file=sys.stderr) + log_info('making countgraph') countgraph = khmer_args.create_countgraph(args) countgraph.set_use_bigcount(args.bigcount) @@ -144,7 +147,7 @@ def main(): rparser = khmer.ReadParser(filename) threads = [] - print('consuming input', filename, file=sys.stderr) + log_info('consuming input {input}', input=filename) for _ in range(args.threads): cur_thrd = \ threading.Thread( @@ -160,7 +163,7 @@ def main(): if index > 0 and index % 10 == 0: tablesize = calculate_graphsize(args, 'countgraph') check_space_for_graph(base, tablesize, args.force) - print('mid-save', base, file=sys.stderr) + log_info('mid-save {base}', base=base) countgraph.save(base) with open(info_filename, 'a') as info_fh: @@ -168,11 +171,11 @@ def main(): total_num_reads += rparser.num_reads n_kmers = countgraph.n_unique_kmers() - print('Total number of unique k-mers:', n_kmers, file=sys.stderr) + log_info('Total number of unique k-mers: {nk}', nk=n_kmers) with open(info_filename, 'a') as info_fp: print('Total number of unique k-mers:', n_kmers, file=info_fp) - print('saving', base, file=sys.stderr) + log_info('saving {base}}', base=base) countgraph.save(base) # Change max_false_pos=0.2 only if you really grok it. HINT: You don't @@ -186,7 +189,7 @@ def main(): if args.summary_info: mr_fmt = args.summary_info.lower() mr_file = base + '.info.' + mr_fmt - print("Writing summmary info to", mr_file, file=sys.stderr) + log_info("Writing summmary info to {mr_file}", mr_file=mr_file) with open(mr_file, 'w') as mr_fh: if mr_fmt == 'json': mr_data = { @@ -210,10 +213,10 @@ def main(): ] mr_fh.write("\t".join(vals) + "\n") - print('fp rate estimated to be %1.3f' % fp_rate, file=sys.stderr) + log_info('fp rate estimated to be {fpr:%1.3f}', fpr=fp_rate) - print('DONE.', file=sys.stderr) - print('wrote to:', info_filename, file=sys.stderr) + log_info('DONE.') + log_info('wrote to: {filename}', filename=info_filename) if __name__ == '__main__': main() diff --git a/tests/test_scripts.py b/tests/test_scripts.py index b329922e6b..17d6b54d97 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -80,6 +80,21 @@ def test_load_into_counting(): assert os.path.exists(outfile) +def test_load_into_counting_quiet(): + script = 'load-into-counting.py' + args = ['-q', '-x', '1e3', '-N', '2', '-k', '20'] + + outfile = utils.get_temp_filename('out.ct') + infile = utils.get_test_data('test-abund-read-2.fa') + + args.extend([outfile, infile]) + + (status, out, err) = utils.runscript(script, args) + assert len(out) == 0 + assert len(err) == 0 + assert os.path.exists(outfile) + + def test_load_into_counting_autoargs_0(): script = 'load-into-counting.py' From 7a5a162fb478fe35d5d68d4c07a5c8836888da90 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 26 Jun 2016 14:30:28 -0700 Subject: [PATCH 10/15] update copyright year --- scripts/abundance-dist-single.py | 2 +- scripts/abundance-dist.py | 2 +- scripts/filter-abund-single.py | 2 +- scripts/filter-abund.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/abundance-dist-single.py b/scripts/abundance-dist-single.py index f347b9bf7c..de552a5488 100755 --- a/scripts/abundance-dist-single.py +++ b/scripts/abundance-dist-single.py @@ -1,7 +1,7 @@ #! /usr/bin/env python # This file is part of khmer, https://github.com/dib-lab/khmer/, and is # Copyright (C) 2010-2015, Michigan State University. -# Copyright (C) 2015, The Regents of the University of California. +# Copyright (C) 2015-2016, The Regents of the University of California. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are diff --git a/scripts/abundance-dist.py b/scripts/abundance-dist.py index 9861254ebb..13d80c11fd 100755 --- a/scripts/abundance-dist.py +++ b/scripts/abundance-dist.py @@ -1,7 +1,7 @@ #! /usr/bin/env python # This file is part of khmer, https://github.com/dib-lab/khmer/, and is # Copyright (C) 2010-2015, Michigan State University. -# Copyright (C) 2015, The Regents of the University of California. +# Copyright (C) 2015-2016, The Regents of the University of California. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are diff --git a/scripts/filter-abund-single.py b/scripts/filter-abund-single.py index 003b8c230d..88097b52a3 100755 --- a/scripts/filter-abund-single.py +++ b/scripts/filter-abund-single.py @@ -1,7 +1,7 @@ #! /usr/bin/env python # This file is part of khmer, https://github.com/dib-lab/khmer/, and is # Copyright (C) 2013-2015, Michigan State University. -# Copyright (C) 2015, The Regents of the University of California. +# Copyright (C) 2015-2016, The Regents of the University of California. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are diff --git a/scripts/filter-abund.py b/scripts/filter-abund.py index 3742defd9f..589a882cd5 100755 --- a/scripts/filter-abund.py +++ b/scripts/filter-abund.py @@ -1,7 +1,7 @@ #! /usr/bin/env python # This file is part of khmer, https://github.com/dib-lab/khmer/, and is # Copyright (C) 2011-2015, Michigan State University. -# Copyright (C) 2015, The Regents of the University of California. +# Copyright (C) 2015-2016, The Regents of the University of California. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are From 97dad2fc7a81c54a8d45434ee593366fa4df8244 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 26 Jun 2016 15:25:11 -0700 Subject: [PATCH 11/15] update trim-low-abund to respect --quiet --- scripts/load-into-counting.py | 4 +- scripts/trim-low-abund.py | 107 ++++++++++++++++------------------ tests/test_scripts.py | 16 +++++ 3 files changed, 68 insertions(+), 59 deletions(-) diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py index 185c3c760a..6e0a230752 100755 --- a/scripts/load-into-counting.py +++ b/scripts/load-into-counting.py @@ -175,7 +175,7 @@ def main(): with open(info_filename, 'a') as info_fp: print('Total number of unique k-mers:', n_kmers, file=info_fp) - log_info('saving {base}}', base=base) + log_info('saving {base}', base=base) countgraph.save(base) # Change max_false_pos=0.2 only if you really grok it. HINT: You don't @@ -213,7 +213,7 @@ def main(): ] mr_fh.write("\t".join(vals) + "\n") - log_info('fp rate estimated to be {fpr:%1.3f}', fpr=fp_rate) + log_info('fp rate estimated to be {fpr:1.3f}', fpr=fp_rate) log_info('DONE.') log_info('wrote to: {filename}', filename=info_filename) diff --git a/scripts/trim-low-abund.py b/scripts/trim-low-abund.py index d62b93be69..c1148050eb 100755 --- a/scripts/trim-low-abund.py +++ b/scripts/trim-low-abund.py @@ -63,6 +63,8 @@ from khmer.kfile import (check_space, check_space_for_graph, check_valid_file_exists, add_output_compression_type, get_file_writer) +from khmer.khmer_logger import (configure_logging, log_info, log_error, + log_warn) DEFAULT_TRIM_AT_COVERAGE = 20 DEFAULT_CUTOFF = 2 @@ -326,31 +328,29 @@ def main(): if not args.quiet: info('trim-low-abund.py', ['streaming']) + configure_logging(args.quiet) + ### if len(set(args.input_filenames)) != len(args.input_filenames): - print("Error: Cannot input the same filename multiple times.", - file=sys.stderr) + log_error("Error: Cannot input the same filename multiple times.") sys.exit(1) if args.trim_at_coverage != DEFAULT_TRIM_AT_COVERAGE and \ not args.variable_coverage: - print("Error: --trim-at-coverage/-Z given, but", - "--variable-coverage/-V not specified.", - file=sys.stderr) + log_error("Error: --trim-at-coverage/-Z given, but " + "--variable-coverage/-V not specified.") sys.exit(1) if args.diginorm_coverage != DEFAULT_DIGINORM_COVERAGE and \ not args.diginorm: - print("Error: --diginorm-coverage given, but", - "--diginorm not specified.", - file=sys.stderr) + log_error("Error: --diginorm-coverage given, but " + "--diginorm not specified.") sys.exit(1) if args.diginorm and args.single_pass: - print("Error: --diginorm and --single-pass are incompatible!\n" - "You probably want to use normalize-by-median.py instead.", - file=sys.stderr) + log_error("Error: --diginorm and --single-pass are incompatible!\n" + "You probably want to use normalize-by-median.py instead.") sys.exit(1) ### @@ -364,21 +364,21 @@ def main(): if ('-' in args.input_filenames or '/dev/stdin' in args.input_filenames) \ and not args.output: - print("Accepting input from stdin; output filename must " - "be provided with -o.", file=sys.stderr) + log_error("Accepting input from stdin; output filename must " + "be provided with -o.") sys.exit(1) if args.loadgraph: - print('loading countgraph from', args.loadgraph, file=sys.stderr) + log_info('loading countgraph from {graph}', graph=args.loadgraph) ct = khmer.load_countgraph(args.loadgraph) else: - print('making countgraph', file=sys.stderr) + log_info('making countgraph') ct = khmer_args.create_countgraph(args) K = ct.ksize() tempdir = tempfile.mkdtemp('khmer', 'tmp', args.tempdir) - print('created temporary directory %s; ' - 'use -T to change location' % tempdir, file=sys.stderr) + log_info('created temporary directory {temp};\n' + 'use -T to change location', temp=tempdir) trimmer = Trimmer(ct, not args.variable_coverage, args.cutoff, args.trim_at_coverage) @@ -427,9 +427,11 @@ def main(): watermark = REPORT_EVERY_N_READS for read in trimmer.pass1(paired_iter, pass2fp): if (trimmer.n_reads - n_start) > watermark: - print('...', filename, trimmer.n_saved, - trimmer.n_reads, trimmer.n_bp, - written_reads, written_bp, file=sys.stderr) + log_info("... {filename} {n_saved} {n_reads} {n_bp} " + "{w_reads} {w_bp}", filename=filename, + n_saved=trimmer.n_saved, n_reads=trimmer.n_reads, + n_bp=trimmer.n_bp, w_reads=written_reads, + w_bp=written_bp) watermark += REPORT_EVERY_N_READS # write out the trimmed/etc sequences that AREN'T going to be @@ -439,11 +441,9 @@ def main(): written_reads += 1 pass2fp.close() - print('%s: kept aside %d of %d from first pass, in %s' % - (filename, - trimmer.n_saved - save_start, trimmer.n_reads - n_start, - filename), - file=sys.stderr) + log_info("{filename}: kept aside {kept} of {total} from first pass", + filename=filename, kept=trimmer.n_saved - save_start, + total=trimmer.n_reads - n_start) # first pass goes across all the data, so record relevant stats... n_reads = trimmer.n_reads @@ -463,9 +463,8 @@ def main(): # go back through all the files again. for _, pass2filename, trimfp in pass2list: - print('second pass: looking at sequences kept aside in %s' % - pass2filename, - file=sys.stderr) + log_info('second pass: looking at sequences kept aside in {pass2}', + pass2=pass2filename) # note that for this second pass, we don't care about paired # reads - they will be output in the same order they're read in, @@ -479,24 +478,25 @@ def main(): watermark = REPORT_EVERY_N_READS for read in trimmer.pass2(paired_iter): if (trimmer.n_reads - n_start) > watermark: - print('... x 2', trimmer.n_reads - n_start, - pass2filename, trimmer.n_saved, - trimmer.n_reads, trimmer.n_bp, - written_reads, written_bp, file=sys.stderr) + log_info('... x 2 {a} {b} {c} {d} {e} {f} {g}', + a=trimmer.n_reads - n_start, + b=pass2filename, c=trimmer.n_saved, + d=trimmer.n_reads, e=trimmer.n_bp, + f=written_reads, g=written_bp) watermark += REPORT_EVERY_N_READS write_record(read, trimfp) written_reads += 1 written_bp += len(read) - print('removing %s' % pass2filename, file=sys.stderr) + log_info('removing {pass2}', pass2=pass2filename) os.unlink(pass2filename) # if we created our own trimfps, close 'em. if not args.output: trimfp.close() - print('removing temp directory & contents (%s)' % tempdir, file=sys.stderr) + log_info('removing temp directory & contents ({temp})', temp=tempdir) shutil.rmtree(tempdir) trimmed_reads = trimmer.trimmed_reads @@ -505,39 +505,32 @@ def main(): percent_reads_trimmed = float(trimmed_reads + (n_reads - written_reads)) /\ n_reads * 100.0 - print('read %d reads, %d bp' % (n_reads, n_bp,), file=sys.stderr) - print('wrote %d reads, %d bp' % (written_reads, written_bp,), - file=sys.stderr) - print('looked at %d reads twice (%.2f passes)' % (save_pass2_total, - n_passes), - file=sys.stderr) - print('removed %d reads and trimmed %d reads (%.2f%%)' % - (n_reads - written_reads, trimmed_reads, percent_reads_trimmed), - file=sys.stderr) - print('trimmed or removed %.2f%% of bases (%d total)' % - ((1 - (written_bp / float(n_bp))) * 100.0, n_bp - written_bp), - file=sys.stderr) + log_info('read {read} reads, {bp} bp', read=n_reads, bp=n_bp) + log_info('wrote {wr} reads, {wbp} bp', wr=written_reads, wbp=written_bp) + log_info('looked at {st} reads twice ({np:.2f} passes)', + st=save_pass2_total, np=n_passes) + log_info('removed {r} reads and trimmed {t} reads ({p:.2f}%)', + r=n_reads - written_reads, t=trimmed_reads, + p=percent_reads_trimmed) + log_info('trimmed or removed {p:.2f}%% of bases ({bp} total)', + p=(1 - (written_bp / float(n_bp))) * 100.0, bp=n_bp - written_bp) if args.variable_coverage: percent_reads_hicov = 100.0 * float(n_reads - n_skipped) / n_reads - print('%d reads were high coverage (%.2f%%);' % (n_reads - n_skipped, - percent_reads_hicov), - file=sys.stderr) - print('skipped %d reads/%d bases because of low coverage' % - (n_skipped, bp_skipped), - file=sys.stderr) + log_info('{n} reads were high coverage ({p:.2f}%);', + n=n_reads - n_skipped, p=percent_reads_hicov) + log_info('skipped {r} reads/{bp} bases because of low coverage', + r=n_skipped, bp=bp_skipped) fp_rate = \ khmer.calc_expected_collisions(ct, args.force, max_false_pos=.8) # for max_false_pos see Zhang et al., http://arxiv.org/abs/1309.2975 - print('fp rate estimated to be {fpr:1.3f}'.format(fpr=fp_rate), - file=sys.stderr) + log_info('fp rate estimated to be {fpr:1.3f}', fpr=fp_rate) - print('output in *.abundtrim', file=sys.stderr) + log_info('output in *.abundtrim') if args.savegraph: - print("Saving k-mer countgraph to", - args.savegraph, file=sys.stderr) + log_info("Saving k-mer countgraph to {graph}", graph=args.savegraph) ct.save(args.savegraph) diff --git a/tests/test_scripts.py b/tests/test_scripts.py index 17d6b54d97..d8804d2c25 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -3042,6 +3042,7 @@ def test_trim_low_abund_diginorm_coverage_err(): status, out, err = utils.runscript('trim-low-abund.py', args, in_dir, fail_ok=True) + print(out, err) assert status == 1 assert 'Error: --diginorm-coverage given, but --diginorm not specified.' \ in err, err @@ -3072,6 +3073,7 @@ def test_trim_low_abund_varcov_err(): status, out, err = utils.runscript('trim-low-abund.py', args, in_dir, fail_ok=True) + print(out, err) assert status == 1 assert 'Error: --trim-at-coverage/-Z given' in err, err @@ -3088,6 +3090,20 @@ def test_trim_low_abund_single_pass(): assert status == 0 +def test_trim_low_abund_quiet(): + infile = utils.get_temp_filename('test.fa') + in_dir = os.path.dirname(infile) + + shutil.copyfile(utils.get_test_data('test-reads.fa'), infile) + + args = ["-q", "-M", "1e7", infile, "-V", '-Z', '5', '-C', '1'] + status, out, err = utils.runscript('trim-low-abund.py', args, in_dir) + + assert status == 0 + assert len(out) == 0 + assert len(err) == 0 + + def test_trim_low_abund_reporting(): infile = utils.get_temp_filename('test.fa') in_dir = os.path.dirname(infile) From 4403111325a3b3e9df77031562da0cff89eacbdd Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 26 Jun 2016 15:32:53 -0700 Subject: [PATCH 12/15] fix commented out LoC to meet pep8 --- tests/test_scripts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_scripts.py b/tests/test_scripts.py index d8804d2c25..b4b8af8581 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -3269,7 +3269,7 @@ def check_version_and_basic_citation(scriptname): assert status == 0, status print(out) print(err) - #assert "publication" in err, err + # assert "publication" in err, err assert version.search(err) is not None, err From e486a8398c63b55045096ef41b6f6d4de9037651 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 26 Jun 2016 15:35:19 -0700 Subject: [PATCH 13/15] added test for 'add' == 'count' --- tests/test_nodegraph.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/test_nodegraph.py b/tests/test_nodegraph.py index f5e429a1f0..a284ccf510 100644 --- a/tests/test_nodegraph.py +++ b/tests/test_nodegraph.py @@ -241,6 +241,23 @@ def test_n_occupied_2(): # simple one assert nodegraph.n_occupied() == 2, nodegraph.n_occupied() +def test_n_occupied_2_add_is_count(): # 'add' synonym for 'count' + ksize = 4 + + nodegraph = khmer._Nodegraph(ksize, [11]) + nodegraph.add('AAAA') # 00 00 00 00 = 0 + assert nodegraph.n_occupied() == 1 + + nodegraph.add('ACTG') # 00 10 01 11 = + assert nodegraph.n_occupied() == 2 + + nodegraph.add('AACG') # 00 00 10 11 = 11 # collision 1 + + assert nodegraph.n_occupied() == 2 + nodegraph.add('AGAC') # 00 11 00 10 # collision 2 + assert nodegraph.n_occupied() == 2, nodegraph.n_occupied() + + def test_bloom_c_2(): # simple one ksize = 4 From e6bb7b2f473497e595e20fc085a5c7eba20dfb84 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Sun, 26 Jun 2016 15:41:32 -0700 Subject: [PATCH 14/15] update changelog --- ChangeLog | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ChangeLog b/ChangeLog index fce4647316..def19d385b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +2016-06-26 Titus Brown + + * khmer/_khmer.cc, tests/test_nodegraph.py: 'add' is now a synonym for + graph.count(kmer). + * khmer/thread_utils.py: update verbose_loader function to take 'verbose' + argument. + * scripts/{abundance-dist-single.py, abundance-dist.py, + filter-abund-single.py, filter-abund.py, load-into-counting.py, + normalize-by-median.py,trim-low-abund.py}: updated to + respect -q/--quiet. + * tests/test_scripts.py: tests for '-q'. + * scripts/partition-graph.py: fix typo in args help message. + 2016-05-25 Titus Brown * scripts/trim-low-abund.py: switched to watermark-based reporting to From 4992483b0048eec5fc71aade08b261edec424c03 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Mon, 27 Jun 2016 07:57:42 -0700 Subject: [PATCH 15/15] remove unnec/confusing components of copy/pastaed tests --- tests/test_filter_abund.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/tests/test_filter_abund.py b/tests/test_filter_abund.py index 916f7ff57b..471dcda51e 100644 --- a/tests/test_filter_abund.py +++ b/tests/test_filter_abund.py @@ -408,21 +408,6 @@ def test_filter_abund_1_quiet(): assert os.path.exists(outfile), outfile - seqs = set([r.sequence for r in screed.open(outfile)]) - - assert len(seqs) == 1, seqs - assert 'GGTTGACGGGGCTCAGGG' in seqs - - args = [n_counting_ht, n_infile] - utils.runscript(script, args, n_in_dir) - - seqs = set([r.sequence for r in screed.open(n_infile)]) - assert os.path.exists(n_outfile), n_outfile - - args = [n_counting_ht, n_infile, '-o', n_outfile2] - utils.runscript(script, args, in_dir) - assert os.path.exists(n_outfile2), n_outfile2 - def test_filter_abund_1_singlefile_quiet(): infile = utils.get_temp_filename('test.fa') @@ -438,7 +423,3 @@ def test_filter_abund_1_singlefile_quiet(): outfile = infile + '.abundfilt' assert os.path.exists(outfile), outfile - - seqs = set([r.sequence for r in screed.open(outfile)]) - assert len(seqs) == 1, seqs - assert 'GGTTGACGGGGCTCAGGG' in seqs