Skip to content

Commit

Permalink
expose intrahost optional params in config.yaml
Browse files Browse the repository at this point in the history
This addresses #412 and exposes the following parameters in the config
file:
intrahost.py vphaser_one_sample --minReadsEach and --maxBias
intrahost.py merge_to_vcf --naive_filter
  • Loading branch information
tomkinsc committed Jul 26, 2016
1 parent 1dd5271 commit 49faf14
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 5 deletions.
2 changes: 1 addition & 1 deletion intrahost.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ def parser_vphaser_one_sample(parser=argparse.ArgumentParser()):
parser.add_argument("--removeDoublyMappedReads",
default=False,
action="store_true",
help="""When calling V-Phaser, keep reads mapping to more than one contig.""")
help="""When calling V-Phaser, remove reads mapping to more than one contig. Default is to keep the reads.""")
util.cmd.common_args(parser, (('loglevel', None), ('version', None)))
util.cmd.attach_main(parser, vphaser_one_sample, split_args=True)
return parser
Expand Down
13 changes: 13 additions & 0 deletions pipes/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,19 @@ mafft_maxiters: 1000
# See: https://github.com/trinityrnaseq/trinityrnaseq/wiki/Trinity-FAQ#ques_why_so_many_transcripts
trinity_n_reads: 250000

# Minimum number of reads on each strand
vphaser_min_reads_each: 5

# Maximum allowable ratio of number of reads on the two
# strands. Ignored if vphaser_max_bins=0.
vphaser_max_bins: 10

# A simple filter for the VCF merge step.
# If set to true, keep only the alleles that have at least two
# independent libraries of support and
# allele freq > 0.005. If false, no filtering is performed.
vcf_merge_naive_filter: false

# |----------------------- Data storage locations ---------------------------

# The parent directory containing data sub-directories.
Expand Down
12 changes: 8 additions & 4 deletions pipes/rules/intrahost.rules
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ rule isnvs_per_sample:
params: LSF=config.get('LSF_queues', {}).get('short', '-W 4:00'),
UGER=config.get('UGER_queues', {}).get('short', '-q short'),
logid="{sample}",
numThreads=str(config.get("number_of_threads", 1))
numThreads=str(config.get("number_of_threads", 1)),
minReadsPerStrand="--minReadsEach {}".format(config['vphaser_min_reads_each']) if 'vphaser_min_reads_each' in config else "",
maxBias="--maxBias {}".format(config['vphaser_max_bins']) if 'vphaser_max_bins' in config else ""
run:
makedirs(config["data_dir"]+'/'+config["subdirs"]["intrahost"])
shell("{config[bin_dir]}/intrahost.py vphaser_one_sample {input} {output} --vphaserNumThreads {params.numThreads} --removeDoublyMappedReads")
shell("{config[bin_dir]}/intrahost.py vphaser_one_sample {input} {output} --vphaserNumThreads {params.numThreads} --removeDoublyMappedReads {params.minReadsPerStrand} {params.maxBias}")

rule isnvs_vcf:
input:
Expand Down Expand Up @@ -87,14 +89,16 @@ rule isnvs_vcf_filtered:
refGenome=os.path.join(config["ref_genome_dir"],"reference"+".fasta"),
snpEff_ref=" ".join(config["accessions_for_ref_genome_build"]),
samples=list(read_samples_file(config["samples_assembly"])),
emailAddress=config["email_point_of_contact_for_ncbi"]
emailAddress=config["email_point_of_contact_for_ncbi"],
naiveFilter="--naive_filter" if config["email_point_of_contact_for_ncbi"] else ""
run:
shell("{config[bin_dir]}/intrahost.py merge_to_vcf {params.refGenome} {output[0]}"
+ " --samples " + " ".join(params.samples)
+ " --isnvs " + " ".join(["{config[data_dir]}/{config[subdirs][intrahost]}/vphaser2."+s+".txt.gz" for s in params.samples])
+ " --alignments " + " ".join(["{config[data_dir]}/{config[subdirs][multialign_ref]}/aligned_" + str(n) + ".fasta" for n in range(1, len(config["accessions_for_ref_genome_build"])+1)])
+ " --strip_chr_version"
+ "--parse_accession" # the vcf chr column must match a chr known to snpEff; we have an option to parse out only the accession
+ " {params.naiveFilter}"
+ " --parse_accession" # the vcf chr column must match a chr known to snpEff; we have an option to parse out only the accession
)
shell("{config[bin_dir]}/interhost.py snpEff {output[0]} {params.snpEff_ref} {output[1]} {params.emailAddress}")
shell("{config[bin_dir]}/intrahost.py iSNV_table {output[1]} {output[2]}")
Expand Down

0 comments on commit 49faf14

Please sign in to comment.