Skip to content

Commit

Permalink
Merge pull request #73 from dib-lab/fix/open
Browse files Browse the repository at this point in the history
Replace argparse.FileType with kevlar.open package-wide
  • Loading branch information
standage committed May 11, 2017
2 parents 4c3e153 + eb3e581 commit 7ee0e72
Show file tree
Hide file tree
Showing 8 changed files with 34 additions and 34 deletions.
4 changes: 4 additions & 0 deletions kevlar/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
except: # pragma: no cover
import builtins
from collections import namedtuple
import sys
from kevlar import seqio
from kevlar.seqio import parse_augmented_fastq, print_augmented_fastq
from kevlar import dump
Expand All @@ -35,6 +36,9 @@
def open(filename, mode):
if mode not in ['r', 'w']:
raise ValueError('invalid mode "{}"'.format(mode))
if filename in ['-', None]:
filehandle = sys.stdin if mode == 'r' else sys.stdout
return filehandle
openfunc = builtins.open
if filename.endswith('.gz'):
openfunc = gzopen
Expand Down
12 changes: 6 additions & 6 deletions kevlar/assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,15 @@ def subparser(subparsers):
subparser = subparsers.add_parser('assemble')
subparser.add_argument('-d', '--debug', action='store_true',
help='print debugging output')
subparser.add_argument('-o', '--out', metavar='FILE', default=sys.stdout,
type=argparse.FileType('w'),
subparser.add_argument('-o', '--out', metavar='FILE',
help='output file; default is terminal (stdout)')
subparser.add_argument('--gml', metavar='FILE',
help='write graph to .gml file')
subparser.add_argument('-x', '--max-abund', type=int, metavar='X',
default=500, help='discard interesting k-mers that '
'occur more than X times')
subparser.add_argument('augfastq', type=argparse.FileType('r'),
help='annotated reads in augmented Fastq format')
subparser.add_argument('augfastq', help='annotated reads in augmented '
'Fastq format')


def print_read_pair(read1, pos1, read2, pos2, ksize, offset, overlap,
Expand Down Expand Up @@ -244,7 +243,7 @@ def main(args):
if args.debug:
debugout = args.logfile

reads, kmers = load_reads(args.augfastq, debugout)
reads, kmers = load_reads(kevlar.open(args.augfastq, 'r'), debugout)
inputreads = list(reads.keys())
graph = graph_init(reads, kmers, args.max_abund, debugout)
if args.gml:
Expand Down Expand Up @@ -318,13 +317,14 @@ def main(args):

contigcount = 0
unassembledcount = 0
outstream = kevlar.open(args.out, 'w')
for seqname in graph.nodes():
if seqname in inputreads:
unassembledcount += 1
continue
contigcount += 1
contigrecord = reads[seqname]
kevlar.print_augmented_fastq(contigrecord, args.out)
kevlar.print_augmented_fastq(contigrecord, outstream)

assembledcount = len(inputreads) - unassembledcount
message = '[kevlar::assemble] assembled'
Expand Down
8 changes: 4 additions & 4 deletions kevlar/dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,8 @@ def subparser(subparsers):
' is 2G')
subparser.add_argument('--mask-k', metavar='K', default=31, type=int,
help='k size for genome mask')
subparser.add_argument('--out', metavar='FILE',
type=argparse.FileType('w'),
help='output file; default is terminal (stdout)')
subparser.add_argument('--out', metavar='FILE', help='output file; default'
' is terminal (stdout)')
subparser.add_argument('refr', help='reference sequence in Fasta format')
subparser.add_argument('reads', help='read alignments in BAM format')

Expand All @@ -66,6 +65,7 @@ def main(args):
genomemask.consume_seqfile(args.genomemask)

bam = pysam.AlignmentFile(args.reads, 'rb')
fastq = kevlar.open(args.out, 'w')
for i, record in enumerate(bam):
if i > 0 and i % 50000 == 0:
print('...processed', i, 'records', file=args.logfile)
Expand Down Expand Up @@ -116,4 +116,4 @@ def main(args):
name += suffix

print('@', name, '\n', record.seq, '\n+\n', record.qual, sep='',
file=args.out)
file=fastq)
10 changes: 5 additions & 5 deletions kevlar/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,9 @@ def subparser(subparsers):
help='show this help message and exit')
misc_args.add_argument('-k', '--ksize', type=int, default=31, metavar='K',
help='k-mer size; default is 31')
misc_args.add_argument('-o', '--out', type=argparse.FileType('w'),
metavar='FILE', default=sys.stdout,
misc_args.add_argument('-o', '--out', metavar='FILE',
help='output file; default is terminal (stdout)')
misc_args.add_argument('--aug-out', type=argparse.FileType('w'),
metavar='FILE',
misc_args.add_argument('--aug-out', metavar='FILE',
help='optional augmented Fastq output')
misc_args.add_argument('--cc-prefix', metavar='PREFIX',
help='group reads by novel k-mers, and use the '
Expand Down Expand Up @@ -259,8 +257,10 @@ def main(args):
timer.start('validate')
print('[kevlar::filter] Validate k-mers and print reads',
file=args.logfile)
outstream = kevlar.open(args.out, 'w')
augstream = kevlar.open(args.aug_out, 'w') if args.aug_out else None
validate_and_print(readset, countgraph, refr, contam, args.min_abund,
args.skip2, args.out, args.aug_out, args.logfile)
args.skip2, outstream, augstream, args.logfile)
elapsed = timer.stop('validate')
print('[kevlar::filter] k-mers validated and reads printed',
'in {:.2f} sec'.format(elapsed), file=args.logfile)
Expand Down
6 changes: 3 additions & 3 deletions kevlar/find.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,7 @@ def subparser(subparsers):
help='show this help message and exit')
misc_args.add_argument('-k', '--ksize', type=int, default=31, metavar='K',
help='k-mer size; default is 31')
misc_args.add_argument('-o', '--out', type=argparse.FileType('w'),
metavar='FILE',
misc_args.add_argument('-o', '--out', metavar='FILE',
help='output file; default is terminal (stdout)')
misc_args.add_argument('--upint', type=float, default=1e6, metavar='INT',
help='update interval for log messages; default is '
Expand Down Expand Up @@ -167,6 +166,7 @@ def main(args):
nkmers = 0
nreads = 0
unique_kmers = set()
outstream = kevlar.open(args.out, 'w')
for n, record in enumerate(iter_screed(args.cases)):
if n > 0 and n % args.upint == 0:
elapsed = timer.probe('iter')
Expand Down Expand Up @@ -197,7 +197,7 @@ def main(args):
if read_kmers > 0:
nreads += 1
nkmers += read_kmers
kevlar.print_augmented_fastq(record, args.out)
kevlar.print_augmented_fastq(record, outstream)

elapsed = timer.stop('iter')
message = 'Iterated over {} reads in {:.2f} seconds'.format(n, elapsed)
Expand Down
10 changes: 4 additions & 6 deletions kevlar/mutate.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,9 @@

def subparser(subparsers):
subparser = subparsers.add_parser('mutate')
subparser.add_argument('-o', '--out', metavar='FILE', default=sys.stdout,
type=argparse.FileType('w'),
subparser.add_argument('-o', '--out', metavar='FILE',
help='output file; default is terminal (stdout)')
subparser.add_argument('mutations', type=argparse.FileType('r'),
help='mutations file')
subparser.add_argument('mutations', help='mutations file')
subparser.add_argument('genome', help='genome to mutate')


Expand Down Expand Up @@ -112,8 +110,8 @@ def mutate_genome(infile, mutations):

def main(args):
print('[kevlar::mutate] loading mutations', file=args.logfile)
mutations = load_mutations(args.mutations, args.logfile)
mutations = load_mutations(kevlar.open(args.mutations, 'r'), args.logfile)

print('[kevlar::mutate] mutating genome', file=args.logfile)
for record in mutate_genome(args.genome, mutations):
write_record(record, args.out)
write_record(record, kevlar.open(args.out, 'w'))
12 changes: 6 additions & 6 deletions kevlar/reaugment.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,20 @@

def subparser(subparsers):
subparser = subparsers.add_parser('reaugment')
subparser.add_argument('-o', '--out', metavar='FILE', default=sys.stdout,
type=argparse.FileType('w'),
subparser.add_argument('-o', '--out', metavar='FILE',
help='output file; default is terminal (stdout)')
subparser.add_argument('augfastq', type=argparse.FileType('r'),
help='original augmented Fastq file')
subparser.add_argument('augfastq', help='original augmented Fastq file')
subparser.add_argument('fastq', help='processed Fastq file to re-annotate')


def main(args):
reads = dict()
for record in kevlar.parse_augmented_fastq(args.augfastq):
instream = kevlar.open(args.augfastq, 'r')
for record in kevlar.parse_augmented_fastq(instream):
reads[record.name] = record

reader = khmer.ReadParser(args.fastq)
outstream = kevlar.open(args.out, 'w')
for read in reader:
augrecord = reads[read.name]
if len(read.sequence) < len(augrecord.sequence):
Expand All @@ -45,4 +45,4 @@ def main(args):
if len(ikmers) == 0:
continue
augrecord.ikmers = ikmers
kevlar.print_augmented_fastq(augrecord, args.out)
kevlar.print_augmented_fastq(augrecord, outstream)
6 changes: 2 additions & 4 deletions kevlar/tests/test_find.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,9 @@ def test_assumptions(kmer):

@pytest.mark.parametrize('case,ctrl,mem', [
('trio1/case1.fq', 'trio1/ctrl[1,2].fq', '500K'),
('trio1/case1.fq', 'trio1/ctrl[1,2].fq', '1M'),
('trio1/case2.fq', 'trio1/ctrl[1,2].fq', '1M'),
('trio1/case3.fq', 'trio1/ctrl[1,2].fq', '1M'),
('trio1/case4.fq', 'trio1/ctrl[1,2].fq', '500K'),
('trio1/case4.fq', 'trio1/ctrl[1,2].fq', '1M'),
('trio1/case5.fq', 'trio1/ctrl[3,4].fq', '1M'),
('trio1/case6.fq', 'trio1/ctrl[5,6].fq', '1M'),
])
Expand All @@ -75,7 +73,7 @@ def test_find_single_mutation(case, ctrl, mem, capsys):
arglist = ['find', '--ksize', '13', '--case_min', '8', '--ctrl_max', '0',
'--memory', mem, '--cases', casestr, '--controls'] + ctrls
args = kevlar.cli.parser().parse_args(arglist)
args.out = stdout
args.out = None
args.err = stderr
kevlar.find.main(args)
out, err = capsys.readouterr()
Expand All @@ -101,7 +99,7 @@ def test_find_two_cases(capsys):
'--case_min', '7']
arglist += ['--cases'] + cases + ['--controls'] + ctrls
args = kevlar.cli.parser().parse_args(arglist)
args.out = stdout
args.out = None
args.err = stderr
kevlar.find.main(args)
out, err = capsys.readouterr()
Expand Down

0 comments on commit 7ee0e72

Please sign in to comment.