Skip to content

Commit

Permalink
make refine_assembly robust to empty inputs (addresses #498)
Browse files Browse the repository at this point in the history
refine_assembly now creates an empty output fasta file if the input
fasta or input bam (or both) are empty, and only indexes the output
fasta if it has non-zero size.
  • Loading branch information
tomkinsc committed Sep 21, 2016
1 parent c215cc8 commit 845ebe1
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 2 deletions.
14 changes: 12 additions & 2 deletions assembly.py
Expand Up @@ -687,6 +687,11 @@ def refine_assembly(
'''
chr_names = chr_names or []

# if the input fasta is empty, create an empty output fasta and return
if (os.path.getsize(inFasta) == 0):
util.file.touch(outFasta)
return 0

# Get tools
picard_index = tools.picard.CreateSequenceDictionaryTool()
picard_mkdup = tools.picard.MarkDuplicatesTool()
Expand Down Expand Up @@ -751,8 +756,13 @@ def refine_assembly(

# Index final output FASTA for Picard/GATK, Samtools, and Novoalign
picard_index.execute(outFasta, overwrite=True)
samtools.faidx(outFasta, overwrite=True)
novoalign.index_fasta(outFasta)
# if the input bam is empty, an empty fasta will be created, however
# faidx cannot index an empty fasta file, so only index if the fasta
# has a non-zero size
if (os.path.getsize(outFasta) > 0):
samtools.faidx(outFasta, overwrite=True)
novoalign.index_fasta(outFasta)

return 0


Expand Down
76 changes: 76 additions & 0 deletions test/unit/test_assembly.py
Expand Up @@ -35,6 +35,82 @@ def test_help_parser_for_each_command(self):
helpstring = parser.format_help()


class TestAssemble(TestCaseWithTmp):
''' Test edge cases of the de novo assembly pipeline '''

def test_empty_input_bam_assembly(self):
novoalign = tools.novoalign.NovoalignTool()
novoalign.install()

# prep inputs
orig_ref = os.path.join(util.file.get_test_input_path(), 'ebov-makona.fasta')
inFasta = util.file.mkstempfname('.ref.fasta')
shutil.copyfile(orig_ref, inFasta)
novoalign.index_fasta(inFasta)

inBam = os.path.join(util.file.get_test_input_path(), 'empty.bam')

outFasta = util.file.mkstempfname('.refined.fasta')

# run refine_assembly
args = [inFasta, inBam, outFasta, "--chr_names", 'G5012.3', "--min_coverage", '3', "--novo_params",
"-r Random -l 30 -g 40 -x 20 -t 502"]
args = assembly.parser_refine_assembly(argparse.ArgumentParser()).parse_args(args)
args.func_main(args)

# the expected output is an empty fasta file
self.assertTrue(os.path.isfile(outFasta))
self.assertTrue(os.path.getsize(outFasta) == 0)


def test_empty_input_fasta_assembly(self):
novoalign = tools.novoalign.NovoalignTool()
novoalign.install()

# make the input fasta empty
inFasta = util.file.mkstempfname('.input.fasta')
util.file.touch(inFasta)
novoalign.index_fasta(inFasta)

inBam = os.path.join(util.file.get_test_input_path(), 'G5012.3.testreads.bam')

outFasta = util.file.mkstempfname('.refined.fasta')

# run refine_assembly
args = [inFasta, inBam, outFasta, "--chr_names", 'G5012.3', "--min_coverage", '3', "--novo_params",
"-r Random -l 30 -g 40 -x 20 -t 502"]
args = assembly.parser_refine_assembly(argparse.ArgumentParser()).parse_args(args)
args.func_main(args)

# the expected output is an empty fasta file
self.assertTrue(os.path.isfile(outFasta))
self.assertTrue(os.path.getsize(outFasta) == 0)


def test_empty_input_succeed(self):
novoalign = tools.novoalign.NovoalignTool()
novoalign.install()

# make the input fasta empty
inFasta = util.file.mkstempfname('.input.fasta')
util.file.touch(inFasta)
novoalign.index_fasta(inFasta)

inBam = os.path.join(util.file.get_test_input_path(), 'empty.bam')

outFasta = util.file.mkstempfname('.refined.fasta')

# run refine_assembly
args = [inFasta, inBam, outFasta, "--chr_names", 'G5012.3', "--min_coverage", '3', "--novo_params",
"-r Random -l 30 -g 40 -x 20 -t 502"]
args = assembly.parser_refine_assembly(argparse.ArgumentParser()).parse_args(args)
print(args)
args.func_main(args)

# the expected output is an empty fasta file
self.assertTrue(os.path.isfile(outFasta))
self.assertTrue(os.path.getsize(outFasta) == 0)


class TestAssembleTrinity(TestCaseWithTmp):
''' Test the assemble_trinity command (no validation of output) '''
Expand Down

0 comments on commit 845ebe1

Please sign in to comment.