Merge pull request #626 from b-wyss/fix/scriptstderr

Changed stdout output in three scripts to stderr
dib-lab · Nov 14, 2014 · 482da12 · 482da12
2 parents d44e6d1 + 8945ab5
commit 482da12
Show file tree

Hide file tree

Showing 20 changed files with 229 additions and 179 deletions.
diff --git a/ChangeLog b/ChangeLog
@@ -3,6 +3,16 @@
     * do-partition.py: Add type=int to n_threads arg and assert to check
     number of active threads
 
+2014-10-10  Brian Wyss  <wyssbria@msu.edu>
+
+    * khmer/scripts/{abundance-dist, abundance-dist-single,
+    annotate-partitions, count-median, count-overlap, do-partition,
+    extract-paired-reads, extract-partitions, filter-abund, filter-abund-single,
+    filter-stoptags, find-knots, load-graph, load-into-counting,
+    make-initial-stoptags, merge-partitions, normalize-by-median, 
+    partition-graph, sample-reads-randomly}.py:
+    changed stdout output in scripts to go to stderr.
+
 2014-10-06  Michael R. Crusoe  <mcrusoe@msu.edu>
 
     * Doxyfile.in: add links to the stdc++ docs

diff --git a/scripts/abundance-dist-single.py b/scripts/abundance-dist-single.py
@@ -81,26 +81,28 @@ def main():  # pylint: disable=too-many-locals,too-many-branches
     else:
         hist_fp = open(args.output_histogram_filename, 'w')
 
-    print 'making k-mer counting table'
+    print >>sys.stderr, 'making k-mer counting table'
     counting_hash = khmer.new_counting_hash(args.ksize, args.min_tablesize,
                                             args.n_tables,
                                             args.threads)
     counting_hash.set_use_bigcount(args.bigcount)
 
-    print 'building k-mer tracking table'
+    print >> sys.stderr, 'building k-mer tracking table'
     tracking = khmer.new_hashbits(counting_hash.ksize(), args.min_tablesize,
                                   args.n_tables)
 
-    print 'kmer_size:', counting_hash.ksize()
-    print 'k-mer counting table sizes:', counting_hash.hashsizes()
-    print 'outputting to', args.output_histogram_filename
+    print >>sys.stderr, 'kmer_size:', counting_hash.ksize()
+    print >>sys.stderr, 'k-mer counting table sizes:', \
+        counting_hash.hashsizes()
+    print >>sys.stderr, 'outputting to', args.output_histogram_filename
 
     khmer.get_config().set_reads_input_buffer_size(args.threads * 64 * 1024)
 
     # start loading
     rparser = khmer.ReadParser(args.input_sequence_filename, args.threads)
     threads = []
-    print 'consuming input, round 1 --', args.input_sequence_filename
+    print >>sys.stderr, 'consuming input, round 1 --', \
+        args.input_sequence_filename
     for _ in xrange(args.threads):
         thread = \
             threading.Thread(
@@ -124,10 +126,12 @@ def __do_abundance_dist__(read_parser):
             read_parser, tracking)
         abundance_lists.append(abundances)
 
-    print 'preparing hist from %s...' % args.input_sequence_filename
+    print >>sys.stderr, 'preparing hist from %s...' % \
+        args.input_sequence_filename
     rparser = khmer.ReadParser(args.input_sequence_filename, args.threads)
     threads = []
-    print 'consuming input, round 2 --', args.input_sequence_filename
+    print >>sys.stderr, 'consuming input, round 2 --', \
+        args.input_sequence_filename
     for _ in xrange(args.threads):
         thread = \
             threading.Thread(
@@ -169,8 +173,8 @@ def __do_abundance_dist__(read_parser):
             break
 
     if args.savetable:
-        print 'Saving k-mer counting table ', args.savetable
-        print '...saving to', args.savetable
+        print >>sys.stderr, 'Saving k-mer counting table ', args.savetable
+        print >>sys.stderr, '...saving to', args.savetable
         counting_hash.save(args.savetable)
 
     print >> sys.stderr, 'wrote to: ' + args.output_histogram_filename

diff --git a/scripts/abundance-dist.py b/scripts/abundance-dist.py
@@ -55,7 +55,8 @@ def main():
     for infile in infiles:
         check_file_status(infile)
 
-    print('hashtable from', args.input_counting_table_filename)
+    print ('hashtable from', args.input_counting_table_filename,
+           file=sys.stderr)
     counting_hash = khmer.load_counting_hash(
         args.input_counting_table_filename)
 
@@ -64,9 +65,9 @@ def main():
     tracking = khmer._new_hashbits(  # pylint: disable=protected-access
         kmer_size, hashsizes)
 
-    print('K:', kmer_size)
-    print('HT sizes:', hashsizes)
-    print('outputting to', args.output_histogram_filename)
+    print ('K:', kmer_size, file=sys.stderr)
+    print ('HT sizes:', hashsizes, file=sys.stderr)
+    print ('outputting to', args.output_histogram_filename, file=sys.stderr)
 
     if os.path.exists(args.output_histogram_filename):
         if not args.squash_output:
@@ -75,9 +76,10 @@ def main():
                   file=sys.stderr)
             sys.exit(1)
 
-        print('** squashing existing file %s' % args.output_histogram_filename)
+        print('** squashing existing file %s' %
+              args.output_histogram_filename, file=sys.stderr)
 
-    print('preparing hist...')
+    print('preparing hist...', file=sys.stderr)
     abundances = counting_hash.abundance_distribution(
         args.input_sequence_filename, tracking)
     total = sum(abundances)

diff --git a/scripts/annotate-partitions.py b/scripts/annotate-partitions.py
@@ -20,6 +20,7 @@
 import argparse
 import textwrap
 import khmer
+import sys
 from khmer.file import check_file_status, check_space
 from khmer.khmer_args import info
 
@@ -73,15 +74,16 @@ def main():
 
     check_space(filenames)
 
-    print 'loading partition map from:', partitionmap_file
+    print >>sys.stderr, 'loading partition map from:', partitionmap_file
     htable.load_partitionmap(partitionmap_file)
 
     for infile in filenames:
-        print 'outputting partitions for', infile
+        print >>sys.stderr, 'outputting partitions for', infile
         outfile = os.path.basename(infile) + '.part'
         part_count = htable.output_partitions(infile, outfile)
-        print 'output %d partitions for %s' % (part_count, infile)
-        print 'partitions are in', outfile
+        print >>sys.stderr, 'output %d partitions for %s' % (
+            part_count, infile)
+        print >>sys.stderr, 'partitions are in', outfile
 
 if __name__ == '__main__':
     main()
diff --git a/scripts/count-median.py b/scripts/count-median.py
@@ -22,6 +22,7 @@
 import screed
 import argparse
 import khmer
+import sys
 from khmer.file import check_file_status, check_space
 from khmer.khmer_args import info
 import textwrap
@@ -67,11 +68,11 @@ def main():
 
     check_space(infiles)
 
-    print 'loading k-mer counting table from', htfile
+    print >>sys.stderr, 'loading k-mer counting table from', htfile
     htable = khmer.load_counting_hash(htfile)
     ksize = htable.ksize()
 
-    print 'writing to', output_filename
+    print >>sys.stderr, 'writing to', output_filename
     output = open(output_filename, 'w')
 
     for record in screed.open(input_filename):

diff --git a/scripts/count-overlap.py b/scripts/count-overlap.py
@@ -58,7 +58,7 @@ def main():
 
     check_space([args.ptfile, args.fafile])
 
-    print 'loading k-mer presence table from', args.ptfile
+    print >>sys.stderr, 'loading k-mer presence table from', args.ptfile
     ht1 = khmer.load_hashbits(args.ptfile)
     kmer_size = ht1.ksize()
 

diff --git a/scripts/do-partition.py b/scripts/do-partition.py
@@ -35,7 +35,7 @@
 import platform
 if "Linux" == platform.system():
     def __debug_vm_usage(msg):
-        print "===> DEBUG: " + msg
+        print >>sys.stderr, "===> DEBUG: " + msg
         for vmstat in re.findall(r".*Vm.*", file("/proc/self/status").read()):
             print vmstat
 else:
@@ -48,22 +48,22 @@ def worker(queue, basename, stop_big_traversals):
         try:
             (htable, index, start, stop) = queue.get(False)
         except Queue.Empty:
-            print 'exiting'
+            print >>sys.stderr, 'exiting'
             return
 
         outfile = basename + '.subset.%d.pmap' % (index,)
         if os.path.exists(outfile):
-            print 'SKIPPING', outfile, ' -- already exists'
+            print >>sys.stderr, 'SKIPPING', outfile, ' -- already exists'
             continue
 
-        print 'starting:', basename, index
+        print >>sys.stderr, 'starting:', basename, index
 
         # pay attention to stoptags when partitioning; take command line
         # direction on whether or not to exhaustively traverse.
         subset = htable.do_subset_partition(start, stop, True,
                                             stop_big_traversals)
 
-        print 'saving:', basename, index
+        print >>sys.stderr, 'saving:', basename, index
         htable.save_subset_partitionmap(subset, outfile)
         del subset
         gc.collect()
@@ -113,25 +113,25 @@ def main():  # pylint: disable=too-many-locals,too-many-statements
 
     check_space(args.input_filenames)
 
-    print 'Saving k-mer presence table to %s' % args.graphbase
-    print 'Loading kmers from sequences in %s' % repr(args.input_filenames)
-
-    print '--'
-    print 'SUBSET SIZE', args.subset_size
-    print 'N THREADS', args.n_threads
-    print '--'
+    print >>sys.stderr, 'Saving k-mer presence table to %s' % args.graphbase
+    print >>sys.stderr, 'Loading kmers from sequences in %s' % \
+        repr(args.input_filenames)
+    print >>sys.stderr, '--'
+    print >>sys.stderr, 'SUBSET SIZE', args.subset_size
+    print >>sys.stderr, 'N THREADS', args.n_threads
+    print >>sys.stderr, '--'
 
     # load-graph
 
-    print 'making k-mer presence table'
+    print >>sys.stderr, 'making k-mer presence table'
     htable = khmer.new_hashbits(args.ksize, args.min_tablesize, args.n_tables)
 
     for _, filename in enumerate(args.input_filenames):
-        print 'consuming input', filename
+        print >>sys.stderr, 'consuming input', filename
         htable.consume_fasta_and_tag(filename)
 
     fp_rate = khmer.calc_expected_collisions(htable)
-    print 'fp rate estimated to be %1.3f' % fp_rate
+    print >>sys.stderr, 'fp rate estimated to be %1.3f' % fp_rate
     if fp_rate > 0.15:          # 0.18 is ACTUAL MAX. Do not change.
         print >> sys.stderr, "**"
         print >> sys.stderr, ("** ERROR: the graph structure is too small for"
@@ -145,9 +145,11 @@ def main():  # pylint: disable=too-many-locals,too-many-statements
     # do we want to exhaustively traverse the graph?
     stop_big_traversals = args.no_big_traverse
     if stop_big_traversals:
-        print '** This script brakes for lumps: stop_big_traversals is true.'
+        print >>sys.stderr, '** This script brakes for lumps: ', \
+                            'stop_big_traversals is true.'
     else:
-        print '** Traverse all the things: stop_big_traversals is false.'
+        print >>sys.stderr, '** Traverse all the things:', \
+                            ' stop_big_traversals is false.'
 
     #
     # now, partition!
@@ -167,16 +169,16 @@ def main():  # pylint: disable=too-many-locals,too-many-statements
         end = divvy[_ + 1]
         worker_q.put((htable, _, start, end))
 
-    print 'enqueued %d subset tasks' % n_subsets
+    print >>sys.stderr, 'enqueued %d subset tasks' % n_subsets
     open('%s.info' % args.graphbase, 'w').write('%d subsets total\n'
                                                 % (n_subsets))
 
     if n_subsets < args.n_threads:
         args.n_threads = n_subsets
 
     # start threads!
-    print 'starting %d threads' % args.n_threads
-    print '---'
+    print >>sys.stderr, 'starting %d threads' % args.n_threads
+    print >>sys.stderr, '---'
 
     threads = []
     for _ in range(args.n_threads):
@@ -188,41 +190,43 @@ def main():  # pylint: disable=too-many-locals,too-many-statements
 
     assert threading.active_count() == args.n_threads + 1
 
-    print 'done starting threads'
+    print >>sys.stderr, 'done starting threads'
 
     # wait for threads
     for _ in threads:
         _.join()
 
-    print '---'
-    print 'done making subsets! see %s.subset.*.pmap' % (args.graphbase,)
+    print >>sys.stderr, '---'
+    print >>sys.stderr, 'done making subsets! see %s.subset.*.pmap' % \
+        (args.graphbase,)
 
     # merge-partitions
 
     pmap_files = glob.glob(args.graphbase + '.subset.*.pmap')
 
-    print 'loading %d pmap files (first one: %s)' % (len(pmap_files),
-                                                     pmap_files[0])
+    print >>sys.stderr, 'loading %d pmap files (first one: %s)' % \
+        (len(pmap_files), pmap_files[0])
 
     htable = khmer.new_hashbits(args.ksize, 1, 1)
 
     for pmap_file in pmap_files:
-        print 'merging', pmap_file
+        print >>sys.stderr, 'merging', pmap_file
         htable.merge_subset_from_disk(pmap_file)
 
     if args.remove_subsets:
-        print 'removing pmap files'
+        print >>sys.stderr, 'removing pmap files'
         for pmap_file in pmap_files:
             os.unlink(pmap_file)
 
     # annotate-partitions
 
     for infile in args.input_filenames:
-        print 'outputting partitions for', infile
+        print >>sys.stderr, 'outputting partitions for', infile
         outfile = os.path.basename(infile) + '.part'
         part_count = htable.output_partitions(infile, outfile)
-        print 'output %d partitions for %s' % (part_count, infile)
-        print 'partitions are in', outfile
+        print >>sys.stderr, 'output %d partitions for %s' % (
+            part_count, infile)
+        print >>sys.stderr, 'partitions are in', outfile
 
 if __name__ == '__main__':
     main()

diff --git a/scripts/extract-paired-reads.py b/scripts/extract-paired-reads.py
@@ -92,9 +92,9 @@ def main():
     single_fp = open(outfile + '.se', 'w')
     paired_fp = open(outfile + '.pe', 'w')
 
-    print 'reading file "%s"' % args.infile
-    print 'outputting interleaved pairs to "%s.pe"' % outfile
-    print 'outputting orphans to "%s.se"' % outfile
+    print >>sys.stderr, 'reading file "%s"' % args.infile
+    print >>sys.stderr, 'outputting interleaved pairs to "%s.pe"' % outfile
+    print >>sys.stderr, 'outputting orphans to "%s.se"' % outfile
 
     last_record = None
     last_name = None
@@ -141,8 +141,9 @@ def main():
     if n_pe == 0:
         raise Exception("no paired reads!? check file formats...")
 
-    print 'DONE; read %d sequences, %d pairs and %d singletons' % \
-          (index + 1, n_pe, n_se)
+    print >>sys.stderr, 'DONE; read %d sequences,' \
+        ' %d pairs and %d singletons' % \
+        (index + 1, n_pe, n_se)
 
     print >> sys.stderr, 'wrote to: ' + outfile \
         + '.se' + ' and ' + outfile + '.pe'