Skip to content

Commit

Permalink
Merge branch 'master' into ct-build-fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
tomkinsc committed Oct 12, 2017
2 parents c713e15 + 60c6199 commit a94a774
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 39 deletions.
9 changes: 4 additions & 5 deletions pipes/rules/hs_deplete.rules
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,17 @@ rule filter_to_taxon:
'''
input:
input_bam = config["data_dir"]+'/'+config["subdirs"]["depletion"]+'/{sample}.cleaned.bam',
lastalDb = objectify_remote(expand('{lastal_ref_db_dir}/lastal.{ext}', lastal_ref_db_dir=config["lastal_ref_db_dir"], ext=['bck','des','prj','sds','ssp','suf','tis'])),
lastalDb = objectify_remote(expand('{lastal_ref_db_dir}/lastal.fasta', lastal_ref_db_dir=config["lastal_ref_db_dir"])),
output:
config["data_dir"]+'/'+config["subdirs"]["depletion"]+'/{sample}.taxfilt.bam'
resources:
mem = 8
mem = 7
params:
LSF = config.get('LSF_queues', {}).get('short', '-W 4:00'),
UGER = config.get('UGER_queues', {}).get('long', '-l h_rt 36:00:00'),
UGER = config.get('UGER_queues', {}).get('short', '-l h_rt 2:00:00'),
logid = "{sample}"
run:
lastal_db_path_prefix=os.path.splitext(input.lastalDb[0])[0]
shell("{config[bin_dir]}/taxon_filter.py filter_lastal_bam {input.input_bam} "+lastal_db_path_prefix+" {output}")
shell("{config[bin_dir]}/taxon_filter.py filter_lastal_bam {input.input_bam} {input.lastalDb} {output}")


class MergeInputException(Exception):
Expand Down
12 changes: 0 additions & 12 deletions pipes/rules/ncbi.rules
Original file line number Diff line number Diff line change
Expand Up @@ -46,18 +46,6 @@ rule download_lastal_sources:
shell("{config[bin_dir]}/ncbi.py fetch_fastas {email_address} "+fasta_path_prefix+" {accessions_list} --combinedFilePrefix "+fasta_file_prefix+" --removeSeparateFiles --forceOverwrite --chunkSize 300")


rule build_lastal_db:
input:
fasta = objectify_remote(expand('{lastal_ref_db_dir}/'+'lastal.fasta', lastal_ref_db_dir=config["lastal_ref_db_dir"]))
output:
lastalDb = objectify_remote(expand('{lastal_ref_db_dir}/lastal.{ext}', lastal_ref_db_dir=config["lastal_ref_db_dir"], ext=["bck","des","prj","sds","ssp","suf","tis"]))
params:
UGER = config.get('UGER_queues', {}).get('short', '-l h_rt 04:00:00'),
emailAddress = config["email_point_of_contact_for_ncbi"]
run:
lastal_db_path_prefix=os.path.dirname(output.lastalDb[0])
shell("{config[bin_dir]}/taxon_filter.py lastal_build_db {input.fasta} "+lastal_db_path_prefix)

rule annot_transfer:
input:
multiple_alignment_fastas = expand("{data_dir}/{subdir}/aligned_{chrom}.fasta",
Expand Down
11 changes: 5 additions & 6 deletions taxon_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,14 +170,13 @@ def filter_lastal_bam(
reference database using LASTAL.
'''

with util.file.tmp_dir('-lastal_db') as tmp_db_dir:
# auto build db if needed
if not all(os.path.exists(db + x)
for x in ('.bck', '.des', '.prj', '.sds', '.ssp', '.suf', '.tis')):
db = tools.last.Lastdb().build_database(db, os.path.join(os.path.abspath(tmp_db_dir), 'lastal_db'))
with util.file.tmp_dir('-lastdb') as tmp_db_dir:
# index db if necessary
lastdb = tools.last.Lastdb()
if not lastdb.is_indexed(db):
db = lastdb.build_database(db, os.path.join(tmp_db_dir, 'lastdb'))

with util.file.tempfname('.read_ids.txt') as hitList:

# look for lastal hits in BAM and write to temp file
with open(hitList, 'wt') as outf:
for read_id in tools.last.Lastal().get_hits(
Expand Down
33 changes: 17 additions & 16 deletions tools/last.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,37 +79,38 @@ class Lastdb(LastTools):
subtool_name = 'lastdb'
subtool_name_on_broad = 'lastdb'

def build_database(self, fasta_files, database_prefix_path): # pylint: disable=W0221
output_file_prefix = os.path.basename(database_prefix_path)
output_directory = os.path.dirname(database_prefix_path)


input_fasta = ""
def is_indexed(self, db_prefix):
return all(os.path.exists(db_prefix + x)
for x in ('.bck', '.des', '.prj', '.sds', '.ssp', '.suf', '.tis'))

def build_database(self, fasta_files, database_prefix_path=None): # pylint: disable=W0221
if database_prefix_path is None:
output_file_prefix = "lastdb"
output_directory = tempfile.mkdtemp()
else:
output_file_prefix = os.path.basename(os.path.abspath(database_prefix_path))
output_directory = os.path.dirname(os.path.abspath(database_prefix_path))

# we can pass in a string containing a fasta file path
# or a list of strings
if 'basestring' not in globals():
basestring = str
if isinstance(fasta_files, basestring):
if isinstance(fasta_files, str):
fasta_files = [fasta_files]
elif isinstance(fasta_files, list):
elif isinstance(fasta_files, list) and fasta_files:
pass
else:
raise TypeError("fasta_files was not a single fasta file, nor a list of fasta files") # or something along that line

# if more than one fasta file is specified, join them
# otherwise if only one is specified, just use it
if len(fasta_files) > 1:
input_fasta = util.file.mkstempfname("fasta")
util.file.cat(input_fasta, fasta_files)
elif len(fasta_files) == 1:
if len(fasta_files) == 1 and not fasta_files[0].endswith('.gz'):
input_fasta = fasta_files[0]
else:
raise IOError("No fasta file provided")
input_fasta = util.file.mkstempfname(".fasta")
util.file.cat(input_fasta, fasta_files) # automatically decompresses gz inputs

self.execute(input_fasta, output_directory, output_file_prefix)

return database_prefix_path
return os.path.join(output_directory, output_file_prefix)


def execute(self, inputFasta, outputDirectory, outputFilePrefix): # pylint: disable=W0221
Expand Down

0 comments on commit a94a774

Please sign in to comment.