Skip to content

Commit

Permalink
Merge pull request #87 from dib-lab/partition/determ
Browse files Browse the repository at this point in the history
Make "kevlar partition" order deterministic
  • Loading branch information
standage committed Jul 5, 2017
2 parents e295406 + 6fef5fd commit 87db6b2
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 3 deletions.
6 changes: 5 additions & 1 deletion kevlar/assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,12 @@ def main(args):
debugout = args.logfile

reads, kmers = load_reads_and_kmers(kevlar.open(args.augfastq, 'r'),
debugout)
args.logfile)
inputreads = list(reads)
message = 'loaded {:d} reads'.format(len(inputreads))
message += ' and {:d} interesting k-mers'.format(len(kmers))
print('[kevlar::assemble]', message, file=args.logfile)

graph = kevlar.overlap.graph_init_strict(reads, kmers, args.min_abund,
args.max_abund, debugout)
if args.gml:
Expand Down
5 changes: 4 additions & 1 deletion kevlar/overlap.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,10 @@ def write_partitions(read_graph, reads, ccprefix, logstream):
n = 0
reads_in_ccs = 0
cclog = open(ccprefix + '.cc.log', 'w')
for n, cc in enumerate(networkx.connected_components(read_graph)):
ccs = sorted(networkx.connected_components(read_graph), reverse=True,
# Sort first by number of reads, then by read names
key=lambda c: (len(c), sorted(c)))
for n, cc in enumerate(ccs):
print('CC', n, len(cc), cc, sep='\t', file=cclog)
reads_in_ccs += len(cc)
outfilename = '{:s}.cc{:d}.augfastq.gz'.format(ccprefix, n)
Expand Down
2 changes: 1 addition & 1 deletion kevlar/seqio.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def load_reads_and_kmers(instream, logstream=None):
kmers = defaultdict(set)
for n, record in enumerate(kevlar.parse_augmented_fastx(instream), 1):
if logstream and n % 10000 == 0: # pragma: no cover
print('[kevlar::assemble] loaded {:d} reads'.format(n),
print('[kevlar::seqio] loaded {:d} reads'.format(n),
file=logstream)
reads[record.name] = record
for kmer in record.ikmers:
Expand Down

0 comments on commit 87db6b2

Please sign in to comment.