Skip to content

Commit

Permalink
Merge branch 'master' into ct-ag100-exploration
Browse files Browse the repository at this point in the history
  • Loading branch information
tomkinsc committed Feb 16, 2018
2 parents 725a4aa + 6f5e93b commit a1e88c7
Show file tree
Hide file tree
Showing 9 changed files with 52 additions and 49 deletions.
8 changes: 6 additions & 2 deletions metagenomics.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,13 +753,14 @@ def parser_krona(parser=argparse.ArgumentParser()):
parser.add_argument('outHtml', help='Output html report.')
parser.add_argument('--queryColumn', help='Column of query id. (default %(default)s)', type=int, default=2)
parser.add_argument('--taxidColumn', help='Column of taxonomy id. (default %(default)s)', type=int, default=3)
parser.add_argument('--scoreColumn', help='Column of score. (default %(default)s)', type=int)
parser.add_argument('--scoreColumn', help='Column of score. (default %(default)s)', type=int, default=None)
parser.add_argument('--magnitudeColumn', help='Column of magnitude. (default %(default)s)', type=int, default=None)
parser.add_argument('--noHits', help='Include wedge for no hits.', action='store_true')
parser.add_argument('--noRank', help='Include no rank assignments.', action='store_true')
util.cmd.common_args(parser, (('loglevel', None), ('version', None)))
util.cmd.attach_main(parser, krona, split_args=True)
return parser
def krona(inTsv, db, outHtml, queryColumn=None, taxidColumn=None, scoreColumn=None, noHits=None, noRank=None):
def krona(inTsv, db, outHtml, queryColumn=None, taxidColumn=None, scoreColumn=None, magnitudeColumn=None, noHits=None, noRank=None):
'''
Create an interactive HTML report from a tabular metagenomic report
'''
Expand All @@ -773,6 +774,7 @@ def krona(inTsv, db, outHtml, queryColumn=None, taxidColumn=None, scoreColumn=No
to_import = [tmp_tsv]
else:
to_import = [inTsv]
root_name = os.path.basename(inTsv)

krona_tool.import_taxonomy(
db,
Expand All @@ -781,6 +783,8 @@ def krona(inTsv, db, outHtml, queryColumn=None, taxidColumn=None, scoreColumn=No
query_column=queryColumn,
taxid_column=taxidColumn,
score_column=scoreColumn,
magnitude_column=magnitudeColumn,
root_name=root_name,
no_hits=noHits,
no_rank=noRank
)
Expand Down
58 changes: 25 additions & 33 deletions pipes/WDL/workflows/tasks/assembly.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ task assemble {
String? assembler="trinity" # trinity, spades, or trinity-spades
String cleaned_assembler = select_first([assembler, ""]) # workaround for https://gatkforums.broadinstitute.org/wdl/discussion/10462/string-type-in-output-section

String sample_name = basename(reads_unmapped_bam, ".bam")
# do this in two steps in case the input doesn't actually have "taxfilt" in the name
String sample_name = basename(basename(reads_unmapped_bam, ".bam"), ".taxfilt")

command {
set -ex -o pipefail
Expand Down Expand Up @@ -95,7 +96,8 @@ task scaffold {
Int? nucmer_min_cluster
Int? scaffold_min_pct_contig_aligned

String sample_name = basename(contigs_fasta, ".fasta")
# do this in multiple steps in case the input doesn't actually have "assembly1-x" in the name
String sample_name = basename(basename(basename(contigs_fasta, ".fasta"), ".assembly1-trinity"), ".assembly1-spades")

command {
set -ex -o pipefail
Expand Down Expand Up @@ -169,7 +171,7 @@ task refine {
Float? major_cutoff=0.5
Int? min_coverage=1

String assembly_basename=basename(assembly_fasta, ".fasta")
String assembly_basename=basename(basename(assembly_fasta, ".fasta"), ".scaffold")

command {
set -ex -o pipefail
Expand Down Expand Up @@ -241,8 +243,8 @@ task refine_2x_and_plot {

String? plot_coverage_novoalign_options="-r Random -l 40 -g 40 -x 20 -t 100 -k"

String assembly_basename = basename(assembly_fasta, ".fasta")
String sample_name = basename(reads_unmapped_bam, ".bam")
# do this in two steps in case the input doesn't actually have "cleaned" in the name
String sample_name = basename(basename(reads_unmapped_bam, ".bam"), ".cleaned")

command {
set -ex -o pipefail
Expand All @@ -265,8 +267,8 @@ task refine_2x_and_plot {
assembly.py refine_assembly \
assembly.fasta \
${reads_unmapped_bam} \
${assembly_basename}.refine1.fasta \
--outVcf ${assembly_basename}.refine1.pre_fasta.vcf.gz \
${sample_name}.refine1.fasta \
--outVcf ${sample_name}.refine1.pre_fasta.vcf.gz \
--min_coverage ${refine1_min_coverage} \
--major_cutoff ${refine1_major_cutoff} \
--GATK_PATH gatk/ \
Expand All @@ -276,10 +278,10 @@ task refine_2x_and_plot {

# refine 2
assembly.py refine_assembly \
${assembly_basename}.refine1.fasta \
${sample_name}.refine1.fasta \
${reads_unmapped_bam} \
${assembly_basename}.refine2.fasta \
--outVcf ${assembly_basename}.refine2.pre_fasta.vcf.gz \
${sample_name}.fasta \
--outVcf ${sample_name}.refine2.pre_fasta.vcf.gz \
--min_coverage ${refine2_min_coverage} \
--major_cutoff ${refine2_major_cutoff} \
--GATK_PATH gatk/ \
Expand All @@ -290,30 +292,20 @@ task refine_2x_and_plot {
# final alignment
read_utils.py align_and_fix \
${reads_unmapped_bam} \
${assembly_basename}.refine2.fasta \
--outBamAll ${sample_name}.bam \
${sample_name}.fasta \
--outBamAll ${sample_name}.all.bam \
--outBamFiltered ${sample_name}.mapped.bam \
--GATK_PATH gatk/ \
--aligner_options "${plot_coverage_novoalign_options}" \
--JVMmemory "$mem_in_mb"m \
--loglevel=DEBUG

# plot_coverage
reports.py plot_coverage \
${sample_name}.mapped.bam \
${sample_name}.coverage_plot.pdf \
--plotFormat pdf \
--plotWidth 1100 \
--plotHeight 850 \
--plotDPI 100 \
--loglevel=DEBUG

# collect figures of merit
grep -v '^>' assembly.fasta | tr -d '\n' | wc -c | tee assembly_length
grep -v '^>' assembly.fasta | tr -d '\nNn' | wc -c | tee assembly_length_unambiguous
grep -v '^>' ${sample_name}.fasta | tr -d '\n' | wc -c | tee assembly_length
grep -v '^>' ${sample_name}.fasta | tr -d '\nNn' | wc -c | tee assembly_length_unambiguous
samtools view -c ${sample_name}.mapped.bam | tee reads_aligned
samtools flagstat ${sample_name}.bam | tee ${sample_name}.bam.flagstat.txt
grep properly ${sample_name}.bam.flagstat.txt | cut -f 1 -d ' ' | tee read_pairs_aligned
samtools flagstat ${sample_name}.all.bam | tee ${sample_name}.all.bam.flagstat.txt
grep properly ${sample_name}.all.bam.flagstat.txt | cut -f 1 -d ' ' | tee read_pairs_aligned
samtools view ${sample_name}.mapped.bam | cut -f10 | tr -d '\n' | wc -c | tee bases_aligned
echo $(( $(cat bases_aligned) / $(cat assembly_length) )) | tee mean_coverage

Expand All @@ -331,17 +323,17 @@ task refine_2x_and_plot {
--plotDPI 100 \
--loglevel=DEBUG
else
touch ${sample_name}.coverage_plot.pdf ${sample_name}.mapped_fastqc.html
touch ${sample_name}.coverage_plot.pdf
fi
}

output {
File refine1_sites_vcf_gz = "${assembly_basename}.refine1.pre_fasta.vcf.gz"
File refine1_assembly_fasta = "${assembly_basename}.refine1.fasta"
File refine2_sites_vcf_gz = "${assembly_basename}.refine2.pre_fasta.vcf.gz"
File refine2_assembly_fasta = "${assembly_basename}.refine2.fasta"
File aligned_bam = "${sample_name}.bam"
File aligned_bam_flagstat = "${sample_name}.bam.flagstat.txt"
File refine1_sites_vcf_gz = "${sample_name}.refine1.pre_fasta.vcf.gz"
File refine1_assembly_fasta = "${sample_name}.refine1.fasta"
File refine2_sites_vcf_gz = "${sample_name}.refine2.pre_fasta.vcf.gz"
File final_assembly_fasta = "${sample_name}.fasta"
File aligned_bam = "${sample_name}.all.bam"
File aligned_bam_flagstat = "${sample_name}.all.bam.flagstat.txt"
File aligned_only_reads_bam = "${sample_name}.mapped.bam"
File aligned_only_reads_fastqc = "${sample_name}.mapped_fastqc.html"
File coverage_plot = "${sample_name}.coverage_plot.pdf"
Expand Down
6 changes: 3 additions & 3 deletions pipes/rules/ncbi.rules
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ rule annot_transfer:
LSF = config.get('LSF_queues', {}).get('short', '-W 4:00'),
UGER = config.get('UGER_queues', {}).get('short', '-l h_rt=04:00:00')
run:
output_path_prefix = os.path.dirname(output.feature_tables)
output_path_prefix = os.path.dirname(output.feature_tables[0])
for alignmentFile in expand("{data_dir}/{subdir}/aligned_{chrom}.fasta",
data_dir=config["data_dir"],
subdir=config["subdirs"]["multialign_ref"],
Expand All @@ -94,9 +94,9 @@ rule prepare_genbank:
subdir=config["subdirs"]["annot"],
samp=read_samples_file(config["samples_assembly"]),
chrom=range(1, len(config["accessions_for_ref_genome_build"])+1)),
fasta_files=" ".join(expand("{dir}/{subdir}/{sample}.fasta",
fasta_files=expand("{dir}/{subdir}/{sample}.fasta",
dir=[config["data_dir"]], subdir=[config["subdirs"]["assembly"]],
sample=list(read_samples_file(config["samples_assembly"])))),
sample=list(read_samples_file(config["samples_assembly"]))),
output:
config["data_dir"]+'/'+config["subdirs"]["annot"]+'/errorsummary.val'
params:
Expand Down
2 changes: 1 addition & 1 deletion requirements-conda.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ prinseq=0.20.4
samtools=1.6
snpeff=4.1l
spades=3.11.1
tbl2asn=25.3
tbl2asn=25.6
trimmomatic=0.36
trinity=date.2011_11_26
unzip=6.0
Expand Down
2 changes: 1 addition & 1 deletion test/unit/test_metagenomics.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def test_krona_import_taxonomy(self):
noHits=True, noRank=True)
self.mock_krona().import_taxonomy.assert_called_once_with(
self.db, [self.inTsv], out_html, query_column=3, taxid_column=5, score_column=7,
no_hits=True, no_rank=True)
no_hits=True, no_rank=True, magnitude_column=None, root_name=os.path.basename(self.inTsv))


@pytest.fixture
Expand Down
6 changes: 6 additions & 0 deletions tools/krona.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def import_taxonomy(self,
query_column=None,
taxid_column=None,
score_column=None,
magnitude_column=None,
root_name=None,
no_hits=None,
no_rank=None):
self.install()
Expand All @@ -48,6 +50,10 @@ def import_taxonomy(self,
cmd.extend(['-t', str(taxid_column)])
if score_column is not None:
cmd.extend(['-s', str(score_column)])
if magnitude_column is not None:
cmd.extend(['-m', str(magnitude_column)])
if root_name is not None:
cmd.extend(['-n', root_name])
if no_hits is not None:
cmd.append('-i')
if no_rank is not None:
Expand Down
4 changes: 2 additions & 2 deletions tools/tbl2asn.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,10 @@ def execute(
# See: https://www.ncbi.nlm.nih.gov/IEB/ToolBox/C_DOC/lxr/source/demo/tbl2asn.c#L9674
# We can try to work around this by examining the output for the upgrade message
try:
subprocess.check_output(tool_cmd)
subprocess.check_output(tool_cmd, stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
old_version_expected_output = "This copy of tbl2asn is more than a year old. Please download the current version."
if old_version_expected_output in e.output:
if old_version_expected_output in e.output.decode('UTF-8'):
pass
else:
raise
Expand Down
13 changes: 7 additions & 6 deletions travis/install-wdl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,23 @@ set -e -o pipefail

cached_fetch_jar_from_github () {
_github_org=$1
_tool_name=$2
_jar_version=$3
_repo_name=$2
_tool_name=$3
_jar_version=$4
_jar_fname="$_tool_name-$_jar_version.jar"
if [ ! -f $CACHE_DIR/$_jar_fname ]; then
echo "Fetching $_jar_fname"
wget --quiet https://github.com/$_github_org/$_tool_name/releases/download/$_jar_version/$_jar_fname
wget --quiet https://github.com/$_github_org/$_repo_name/releases/download/$_jar_version/$_jar_fname
mv $_jar_fname $CACHE_DIR
else
echo "Using cached $_jar_fname"
fi
ln -s $CACHE_DIR/$_jar_fname $_tool_name.jar
}

cached_fetch_jar_from_github broadinstitute wdltool 0.14
cached_fetch_jar_from_github broadinstitute cromwell 30.2
cached_fetch_jar_from_github dnanexus dxWDL 0.58.1
cached_fetch_jar_from_github broadinstitute cromwell womtool 30.2
cached_fetch_jar_from_github broadinstitute cromwell cromwell 30.2
cached_fetch_jar_from_github dnanexus dxWDL dxWDL 0.59

TGZ=dx-toolkit-v0.240.1-ubuntu-14.04-amd64.tar.gz
if [ ! -f $CACHE_DIR/$TGZ ]; then
Expand Down
2 changes: 1 addition & 1 deletion travis/validate-wdl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ set -e -o pipefail
ln -s pipes/WDL/workflows/tasks .
for workflow in pipes/WDL/workflows/*.wdl; do
echo "validating $workflow"
java -jar wdltool.jar validate $workflow
java -jar womtool.jar validate $workflow
done
rm tasks

0 comments on commit a1e88c7

Please sign in to comment.