Skip to content

Commit

Permalink
Merge pull request #626 from b-wyss/fix/scriptstderr
Browse files Browse the repository at this point in the history
Changed stdout output in three scripts to stderr
  • Loading branch information
mr-c committed Nov 14, 2014
2 parents d44e6d1 + 8945ab5 commit 482da12
Show file tree
Hide file tree
Showing 20 changed files with 229 additions and 179 deletions.
10 changes: 10 additions & 0 deletions ChangeLog
Expand Up @@ -3,6 +3,16 @@
* do-partition.py: Add type=int to n_threads arg and assert to check
number of active threads

2014-10-10 Brian Wyss <wyssbria@msu.edu>

* khmer/scripts/{abundance-dist, abundance-dist-single,
annotate-partitions, count-median, count-overlap, do-partition,
extract-paired-reads, extract-partitions, filter-abund, filter-abund-single,
filter-stoptags, find-knots, load-graph, load-into-counting,
make-initial-stoptags, merge-partitions, normalize-by-median,
partition-graph, sample-reads-randomly}.py:
changed stdout output in scripts to go to stderr.

2014-10-06 Michael R. Crusoe <mcrusoe@msu.edu>

* Doxyfile.in: add links to the stdc++ docs
Expand Down
24 changes: 14 additions & 10 deletions scripts/abundance-dist-single.py
Expand Up @@ -81,26 +81,28 @@ def main(): # pylint: disable=too-many-locals,too-many-branches
else:
hist_fp = open(args.output_histogram_filename, 'w')

print 'making k-mer counting table'
print >>sys.stderr, 'making k-mer counting table'
counting_hash = khmer.new_counting_hash(args.ksize, args.min_tablesize,
args.n_tables,
args.threads)
counting_hash.set_use_bigcount(args.bigcount)

print 'building k-mer tracking table'
print >> sys.stderr, 'building k-mer tracking table'
tracking = khmer.new_hashbits(counting_hash.ksize(), args.min_tablesize,
args.n_tables)

print 'kmer_size:', counting_hash.ksize()
print 'k-mer counting table sizes:', counting_hash.hashsizes()
print 'outputting to', args.output_histogram_filename
print >>sys.stderr, 'kmer_size:', counting_hash.ksize()
print >>sys.stderr, 'k-mer counting table sizes:', \
counting_hash.hashsizes()
print >>sys.stderr, 'outputting to', args.output_histogram_filename

khmer.get_config().set_reads_input_buffer_size(args.threads * 64 * 1024)

# start loading
rparser = khmer.ReadParser(args.input_sequence_filename, args.threads)
threads = []
print 'consuming input, round 1 --', args.input_sequence_filename
print >>sys.stderr, 'consuming input, round 1 --', \
args.input_sequence_filename
for _ in xrange(args.threads):
thread = \
threading.Thread(
Expand All @@ -124,10 +126,12 @@ def __do_abundance_dist__(read_parser):
read_parser, tracking)
abundance_lists.append(abundances)

print 'preparing hist from %s...' % args.input_sequence_filename
print >>sys.stderr, 'preparing hist from %s...' % \
args.input_sequence_filename
rparser = khmer.ReadParser(args.input_sequence_filename, args.threads)
threads = []
print 'consuming input, round 2 --', args.input_sequence_filename
print >>sys.stderr, 'consuming input, round 2 --', \
args.input_sequence_filename
for _ in xrange(args.threads):
thread = \
threading.Thread(
Expand Down Expand Up @@ -169,8 +173,8 @@ def __do_abundance_dist__(read_parser):
break

if args.savetable:
print 'Saving k-mer counting table ', args.savetable
print '...saving to', args.savetable
print >>sys.stderr, 'Saving k-mer counting table ', args.savetable
print >>sys.stderr, '...saving to', args.savetable
counting_hash.save(args.savetable)

print >> sys.stderr, 'wrote to: ' + args.output_histogram_filename
Expand Down
14 changes: 8 additions & 6 deletions scripts/abundance-dist.py
Expand Up @@ -55,7 +55,8 @@ def main():
for infile in infiles:
check_file_status(infile)

print('hashtable from', args.input_counting_table_filename)
print ('hashtable from', args.input_counting_table_filename,
file=sys.stderr)
counting_hash = khmer.load_counting_hash(
args.input_counting_table_filename)

Expand All @@ -64,9 +65,9 @@ def main():
tracking = khmer._new_hashbits( # pylint: disable=protected-access
kmer_size, hashsizes)

print('K:', kmer_size)
print('HT sizes:', hashsizes)
print('outputting to', args.output_histogram_filename)
print ('K:', kmer_size, file=sys.stderr)
print ('HT sizes:', hashsizes, file=sys.stderr)
print ('outputting to', args.output_histogram_filename, file=sys.stderr)

if os.path.exists(args.output_histogram_filename):
if not args.squash_output:
Expand All @@ -75,9 +76,10 @@ def main():
file=sys.stderr)
sys.exit(1)

print('** squashing existing file %s' % args.output_histogram_filename)
print('** squashing existing file %s' %
args.output_histogram_filename, file=sys.stderr)

print('preparing hist...')
print('preparing hist...', file=sys.stderr)
abundances = counting_hash.abundance_distribution(
args.input_sequence_filename, tracking)
total = sum(abundances)
Expand Down
10 changes: 6 additions & 4 deletions scripts/annotate-partitions.py
Expand Up @@ -20,6 +20,7 @@
import argparse
import textwrap
import khmer
import sys
from khmer.file import check_file_status, check_space
from khmer.khmer_args import info

Expand Down Expand Up @@ -73,15 +74,16 @@ def main():

check_space(filenames)

print 'loading partition map from:', partitionmap_file
print >>sys.stderr, 'loading partition map from:', partitionmap_file
htable.load_partitionmap(partitionmap_file)

for infile in filenames:
print 'outputting partitions for', infile
print >>sys.stderr, 'outputting partitions for', infile
outfile = os.path.basename(infile) + '.part'
part_count = htable.output_partitions(infile, outfile)
print 'output %d partitions for %s' % (part_count, infile)
print 'partitions are in', outfile
print >>sys.stderr, 'output %d partitions for %s' % (
part_count, infile)
print >>sys.stderr, 'partitions are in', outfile

if __name__ == '__main__':
main()
5 changes: 3 additions & 2 deletions scripts/count-median.py
Expand Up @@ -22,6 +22,7 @@
import screed
import argparse
import khmer
import sys
from khmer.file import check_file_status, check_space
from khmer.khmer_args import info
import textwrap
Expand Down Expand Up @@ -67,11 +68,11 @@ def main():

check_space(infiles)

print 'loading k-mer counting table from', htfile
print >>sys.stderr, 'loading k-mer counting table from', htfile
htable = khmer.load_counting_hash(htfile)
ksize = htable.ksize()

print 'writing to', output_filename
print >>sys.stderr, 'writing to', output_filename
output = open(output_filename, 'w')

for record in screed.open(input_filename):
Expand Down
2 changes: 1 addition & 1 deletion scripts/count-overlap.py
Expand Up @@ -58,7 +58,7 @@ def main():

check_space([args.ptfile, args.fafile])

print 'loading k-mer presence table from', args.ptfile
print >>sys.stderr, 'loading k-mer presence table from', args.ptfile
ht1 = khmer.load_hashbits(args.ptfile)
kmer_size = ht1.ksize()

Expand Down
64 changes: 34 additions & 30 deletions scripts/do-partition.py
Expand Up @@ -35,7 +35,7 @@
import platform
if "Linux" == platform.system():
def __debug_vm_usage(msg):
print "===> DEBUG: " + msg
print >>sys.stderr, "===> DEBUG: " + msg
for vmstat in re.findall(r".*Vm.*", file("/proc/self/status").read()):
print vmstat
else:
Expand All @@ -48,22 +48,22 @@ def worker(queue, basename, stop_big_traversals):
try:
(htable, index, start, stop) = queue.get(False)
except Queue.Empty:
print 'exiting'
print >>sys.stderr, 'exiting'
return

outfile = basename + '.subset.%d.pmap' % (index,)
if os.path.exists(outfile):
print 'SKIPPING', outfile, ' -- already exists'
print >>sys.stderr, 'SKIPPING', outfile, ' -- already exists'
continue

print 'starting:', basename, index
print >>sys.stderr, 'starting:', basename, index

# pay attention to stoptags when partitioning; take command line
# direction on whether or not to exhaustively traverse.
subset = htable.do_subset_partition(start, stop, True,
stop_big_traversals)

print 'saving:', basename, index
print >>sys.stderr, 'saving:', basename, index
htable.save_subset_partitionmap(subset, outfile)
del subset
gc.collect()
Expand Down Expand Up @@ -113,25 +113,25 @@ def main(): # pylint: disable=too-many-locals,too-many-statements

check_space(args.input_filenames)

print 'Saving k-mer presence table to %s' % args.graphbase
print 'Loading kmers from sequences in %s' % repr(args.input_filenames)

print '--'
print 'SUBSET SIZE', args.subset_size
print 'N THREADS', args.n_threads
print '--'
print >>sys.stderr, 'Saving k-mer presence table to %s' % args.graphbase
print >>sys.stderr, 'Loading kmers from sequences in %s' % \
repr(args.input_filenames)
print >>sys.stderr, '--'
print >>sys.stderr, 'SUBSET SIZE', args.subset_size
print >>sys.stderr, 'N THREADS', args.n_threads
print >>sys.stderr, '--'

# load-graph

print 'making k-mer presence table'
print >>sys.stderr, 'making k-mer presence table'
htable = khmer.new_hashbits(args.ksize, args.min_tablesize, args.n_tables)

for _, filename in enumerate(args.input_filenames):
print 'consuming input', filename
print >>sys.stderr, 'consuming input', filename
htable.consume_fasta_and_tag(filename)

fp_rate = khmer.calc_expected_collisions(htable)
print 'fp rate estimated to be %1.3f' % fp_rate
print >>sys.stderr, 'fp rate estimated to be %1.3f' % fp_rate
if fp_rate > 0.15: # 0.18 is ACTUAL MAX. Do not change.
print >> sys.stderr, "**"
print >> sys.stderr, ("** ERROR: the graph structure is too small for"
Expand All @@ -145,9 +145,11 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
# do we want to exhaustively traverse the graph?
stop_big_traversals = args.no_big_traverse
if stop_big_traversals:
print '** This script brakes for lumps: stop_big_traversals is true.'
print >>sys.stderr, '** This script brakes for lumps: ', \
'stop_big_traversals is true.'
else:
print '** Traverse all the things: stop_big_traversals is false.'
print >>sys.stderr, '** Traverse all the things:', \
' stop_big_traversals is false.'

#
# now, partition!
Expand All @@ -167,16 +169,16 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
end = divvy[_ + 1]
worker_q.put((htable, _, start, end))

print 'enqueued %d subset tasks' % n_subsets
print >>sys.stderr, 'enqueued %d subset tasks' % n_subsets
open('%s.info' % args.graphbase, 'w').write('%d subsets total\n'
% (n_subsets))

if n_subsets < args.n_threads:
args.n_threads = n_subsets

# start threads!
print 'starting %d threads' % args.n_threads
print '---'
print >>sys.stderr, 'starting %d threads' % args.n_threads
print >>sys.stderr, '---'

threads = []
for _ in range(args.n_threads):
Expand All @@ -188,41 +190,43 @@ def main(): # pylint: disable=too-many-locals,too-many-statements

assert threading.active_count() == args.n_threads + 1

print 'done starting threads'
print >>sys.stderr, 'done starting threads'

# wait for threads
for _ in threads:
_.join()

print '---'
print 'done making subsets! see %s.subset.*.pmap' % (args.graphbase,)
print >>sys.stderr, '---'
print >>sys.stderr, 'done making subsets! see %s.subset.*.pmap' % \
(args.graphbase,)

# merge-partitions

pmap_files = glob.glob(args.graphbase + '.subset.*.pmap')

print 'loading %d pmap files (first one: %s)' % (len(pmap_files),
pmap_files[0])
print >>sys.stderr, 'loading %d pmap files (first one: %s)' % \
(len(pmap_files), pmap_files[0])

htable = khmer.new_hashbits(args.ksize, 1, 1)

for pmap_file in pmap_files:
print 'merging', pmap_file
print >>sys.stderr, 'merging', pmap_file
htable.merge_subset_from_disk(pmap_file)

if args.remove_subsets:
print 'removing pmap files'
print >>sys.stderr, 'removing pmap files'
for pmap_file in pmap_files:
os.unlink(pmap_file)

# annotate-partitions

for infile in args.input_filenames:
print 'outputting partitions for', infile
print >>sys.stderr, 'outputting partitions for', infile
outfile = os.path.basename(infile) + '.part'
part_count = htable.output_partitions(infile, outfile)
print 'output %d partitions for %s' % (part_count, infile)
print 'partitions are in', outfile
print >>sys.stderr, 'output %d partitions for %s' % (
part_count, infile)
print >>sys.stderr, 'partitions are in', outfile

if __name__ == '__main__':
main()
Expand Down
11 changes: 6 additions & 5 deletions scripts/extract-paired-reads.py
Expand Up @@ -92,9 +92,9 @@ def main():
single_fp = open(outfile + '.se', 'w')
paired_fp = open(outfile + '.pe', 'w')

print 'reading file "%s"' % args.infile
print 'outputting interleaved pairs to "%s.pe"' % outfile
print 'outputting orphans to "%s.se"' % outfile
print >>sys.stderr, 'reading file "%s"' % args.infile
print >>sys.stderr, 'outputting interleaved pairs to "%s.pe"' % outfile
print >>sys.stderr, 'outputting orphans to "%s.se"' % outfile

last_record = None
last_name = None
Expand Down Expand Up @@ -141,8 +141,9 @@ def main():
if n_pe == 0:
raise Exception("no paired reads!? check file formats...")

print 'DONE; read %d sequences, %d pairs and %d singletons' % \
(index + 1, n_pe, n_se)
print >>sys.stderr, 'DONE; read %d sequences,' \
' %d pairs and %d singletons' % \
(index + 1, n_pe, n_se)

print >> sys.stderr, 'wrote to: ' + outfile \
+ '.se' + ' and ' + outfile + '.pe'
Expand Down

0 comments on commit 482da12

Please sign in to comment.