Skip to content

Commit

Permalink
Merge pull request #231 from dib-lab/deprecate/jca
Browse files Browse the repository at this point in the history
Remove junction count assembler
  • Loading branch information
standage committed Mar 27, 2018
2 parents 81e2277 + 402c9cc commit e02e177
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 115 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ This project adheres to [Semantic Versioning](http://semver.org/).
- Bug with `kevlar count` when reading from multiple input files (see #202).
- Can now call SNVs near INDELs (see #229).

### Removed
- The JCA assembly mode is no longer supported (see #231).


## [0.3.0] - 2017-11-03

This release includes many new features, some refactoring of the core codebase, and the first end-to-end analysis workflow implemented in a single command.
Expand Down
71 changes: 1 addition & 70 deletions kevlar/assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,73 +38,6 @@ def main_fml_asm(args):
kevlar.print_augmented_fastx(contig, outstream)


# =============================================================================
# Junction count assembly mode
# =============================================================================

def assemble_jca(readstream, memory, maxfpr=0.01, collapse=True,
kmers_to_ignore=set(), logstream=sys.stderr):
print('[kevlar::assemble::jca] loading reads', file=logstream)
countgraph = None
variants = kevlar.VariantSet()
for record in readstream:
for kmer in record.ikmers:
variants.add_kmer(kmer.sequence, record.name)
if countgraph is None:
ksize = len(kmer.sequence)
countgraph = khmer.Countgraph(ksize, memory / 4, 4)
countgraph.consume(record.sequence)
fpr = kevlar.sketch.estimate_fpr(countgraph)
msg = '[kevlar::assemble::jca] done loading reads'
msg += ', {:d} distinct k-mers stored'.format(countgraph.n_unique_kmers())
msg += '; estimated false positive rate is {:1.3f}'.format(fpr)
if fpr > maxfpr:
msg += ' (FPR too high, bailing out!!!)'
raise kevlar.sketch.KevlarUnsuitableFPRError(msg)
print(msg, file=logstream)

asm = khmer.JunctionCountAssembler(countgraph)
for kmer in variants.kmers:
if kmer in kmers_to_ignore:
continue
contigs = asm.assemble(kmer)
for contig in contigs:
if hasattr(contig, 'decode'):
contig = contig.decode()
if contig == '':
print(' WARNING: no assembly found for k-mer', kmer,
file=args.logfile)
continue
variants.add_contig(contig, kmer)

print(' {:d} linear paths'.format(variants.ncontigs), file=logstream)

if collapse:
print('[kevlar::assemble::jca] Collapsing contigs', file=logstream)
variants.collapse()
print(' {:d} collapsed contigs'.format(variants.ncontigs),
file=logstream)

for n, contigdata in enumerate(variants, 1):
contig, contigrc, kmers, reads = contigdata
contigname = 'contig{:d}:length={:d}:nkmers={:d}:nreads={:d}'.format(
n, len(contig), len(kmers), len(reads)
)
contig = screed.Record(name=contigname, sequence=contig)
yield contig


def main_jca(args):
reads = kevlar.parse_augmented_fastx(kevlar.open(args.augfastq, 'r'))
outstream = kevlar.open(args.out, 'w')
ignore = set(args.ignore) if args.ignore else set()
contigstream = assemble_jca(reads, args.memory, args.max_fpr,
collapse=args.collapse, kmers_to_ignore=ignore,
logstream=args.logfile)
for contig in contigstream:
khmer.utils.write_record(contig, outstream)


# =============================================================================
# Greedy assembly mode
# =============================================================================
Expand Down Expand Up @@ -357,8 +290,6 @@ def main_greedy(args):

def main(args):
mainfunc = main_fml_asm
if args.jca:
mainfunc = main_jca
elif args.greedy:
if args.greedy:
mainfunc = main_greedy
mainfunc(args)
32 changes: 6 additions & 26 deletions kevlar/cli/assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@
def subparser(subparsers):
"""Define the `kevlar assemble` command-line interface."""

desc = ('Assemble reads into contigs representing putative variants. By '
'default `fermi-lite` is used to compute the assembly, but two '
'alternative assembly algorithms are available.')

desc = (
'Assemble reads into contigs representing putative variants. By '
'default `fermi-lite` is used to compute the assembly, but an '
'alternative assembly algorithm is available.'
)
subparser = subparsers.add_parser('assemble', description=desc,
add_help=False)

Expand All @@ -41,32 +42,11 @@ def subparser(subparsers):
default=500, help='discard interesting k-mers '
'that occur more than X times')

jca_args = subparser.add_argument_group(
'Junction count assembly mode',
'Alternatively, `kevlar assemble` can uses a junction count assembler '
'available from the khmer library.'
)
jca_args.add_argument('--jca', action='store_true', help="use khmer's "
"junction count assembler instead of kevlar's "
"greedy assembler")
jca_args.add_argument('--ignore', metavar='KM', nargs='+',
help='ignore the specified k-mer(s)')
jca_args.add_argument('--collapse', action='store_true', help='collapse '
'contigs that are contained within other contigs')
jca_args.add_argument('-M', '--memory', default='1e6', metavar='MEM',
type=khmer.khmer_args.memory_setting,
help='memory to allocate for assembly graph; '
'default is 1M')
jca_args.add_argument('--max-fpr', type=float, metavar='FPR',
default=0.001, help='terminate if the expected '
'false positive rate of the assembly graph is '
'higher than the specified FPR; default is 0.001')

misc_args = subparser.add_argument_group('Miscellaneous settings')
misc_args.add_argument('-h', '--help', action='help',
help='show this help message and exit')
misc_args.add_argument('-o', '--out', metavar='FILE',
help='output file; default is terminal (stdout)')

subparser.add_argument('augfastq', help='annotated reads in augmented '
'Fastq format')
'Fastq/Fasta format')
20 changes: 1 addition & 19 deletions kevlar/tests/test_assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,14 +517,9 @@ def test_assembly_contigs():
'TGGCTAACACG')


@pytest.mark.parametrize('jcamode', [
(True),
(False),
])
def test_assemble_main(jcamode, capsys):
def test_assemble_main(capsys):
cliargs = ['assemble', data_file('var1.reads.augfastq')]
args = kevlar.cli.parser().parse_args(cliargs)
args.jca = jcamode
kevlar.assemble.main(args)
out, err = capsys.readouterr()
contig = ('GTCCTTGAGTCCATTAGAGACGGCTTCCGCCGTAGGCCCACTTCCTTAAAGTCGAGACTTCTA'
Expand All @@ -534,19 +529,6 @@ def test_assemble_main(jcamode, capsys):
assert contig in out or kevlar.revcom(contig) in out


def test_assemble_jca_collapse(capsys):
infile = data_file('var1.reads.augfastq')
cliargs = ['assemble', '--jca', '--collapse', infile]
args = kevlar.cli.parser().parse_args(cliargs)
kevlar.__main__.main(args)
out, err = capsys.readouterr()
contig = ('GTCCTTGAGTCCATTAGAGACGGCTTCCGCCGTAGGCCCACTTCCTTAAAGTCGAGACTTCTA'
'AAAACCGGGGTGTAACTCTTTTATTACAAAGCGACTATCCACCTGTAAGGACAGTGATA')
print('DEBUG', contig)
print('DEBUG', out)
assert contig in out or kevlar.revcom(contig) in out


def test_assemble_no_edges(capsys):
cliargs = ['assemble', data_file('asmbl-no-edges.augfastq.gz')]
args = kevlar.cli.parser().parse_args(cliargs)
Expand Down

0 comments on commit e02e177

Please sign in to comment.