-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added GRIDSS 2.2.3 backport for adjusting the SC field to exclude loc…
…al assembly anchor PapenfussLab/gridss#213
- Loading branch information
Daniel Cameron
committed
May 15, 2019
1 parent
81bbcb0
commit cbfafd0
Showing
1 changed file
with
81 additions
and
0 deletions.
There are no files selected for viewing
81 changes: 81 additions & 0 deletions
81
hmftools/gridss/do_run_gridss_2_2_3_postpurple_backport_pilot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
#!/bin/bash | ||
# | ||
# Adjusts the breakpoint position to match GRIDSS 2.1.0 calling positions | ||
# | ||
source do_run_gridss_lib | ||
|
||
export PATH=$base_path/tools/bedops_v2.4.30/:$PATH | ||
|
||
PICARD_JAR=$base_path/tools/picard-tools_v1.135/picard.jar | ||
libgridss=/data/common/repos/scripts/gridss/ | ||
gridss_pon=/data/common/dbs/gridss/pon3792v1/ | ||
|
||
original_vcf=$(find -L ${run_dir}/structuralVariants/gridss/*/ -type f -name *gridss.vcf.gz) | ||
raw_somatic_vcf=$(dirname ${original_vcf})/${tumor_sample}.gridss.somatic.vcf.gz | ||
full_vcf=$(dirname ${original_vcf})/${tumor_sample}.gridss.somatic.full.vcf.gz | ||
assembly_sv_bam=$assembly_bam.gridss.working/$(basename $assembly_bam).sv.bam | ||
out_dir=$(dirname $full_vcf)/breakpoint_position_backport | ||
|
||
if [ ! -d "$out_dir" ] ; then | ||
echo "Missing backport directory $out_dir. This assembly from this directory is required." | ||
fi | ||
|
||
normal_bam=$out_dir/$(basename $normal_bam) | ||
tumor_bam=$out_dir/$(basename $tumor_bam) | ||
assembly_bam=$out_dir/$(basename $assembly_bam) | ||
somatic_vcf=$out_dir/$(basename $raw_somatic_vcf .gz) | ||
|
||
|
||
input_vcf=$1 | ||
output_vcf=${input_vcf/.sv.ann.vcf.gz/.sv.ann2.vcf} | ||
unadjusted_vcf=${input_vcf/.sv.ann.vcf.gz/.sv.unadjusted.vcf} | ||
fixedSC_vcf=${input_vcf/.sv.ann.vcf.gz/.sv.fixedSC.vcf} | ||
input_gridss_decompressed=${input_vcf/.vcf.gz/.gridss.vcf} | ||
input_purple_decompressed=${input_vcf/.vcf.gz/.purple.vcf} | ||
|
||
gunzip -c $input_vcf | grep -v purple_ > $input_gridss_decompressed | ||
gunzip -c $input_vcf | grep -E "^#" > $input_purple_decompressed | ||
gunzip -c $input_vcf | grep purple_ >> $input_purple_decompressed | ||
|
||
gridss_jvm_args=" | ||
-ea | ||
-Dsamjdk.create_index=true | ||
-Dsamjdk.use_async_io_read_samtools=true | ||
-Dsamjdk.use_async_io_write_samtools=true | ||
-Dsamjdk.use_async_io_write_tribble=true | ||
-Dsamjdk.buffer_size=2097152 | ||
-cp $gridss_jar " | ||
gridss_common_args=" | ||
TMP_DIR=$out_dir/tmp | ||
WORKING_DIR=$out_dir | ||
REFERENCE_SEQUENCE=$ref_genome " | ||
|
||
java -Xmx4G $gridss_jvm_args gridss.AllocateEvidence \ | ||
ALLOCATE_READS=false \ | ||
I=$normal_bam \ | ||
I=$tumor_bam \ | ||
ASSEMBLY=$assembly_bam \ | ||
INPUT_VCF=$input_gridss_decompressed \ | ||
OUTPUT_VCF=$fixedSC_vcf \ | ||
$gridss_common_args \ | ||
CONFIGURATION_FILE=$gridss_config_file \ | ||
WORKER_THREADS=$threads | exit 1 | ||
|
||
java -jar $PICARD_JAR SortVcf \ | ||
I=$fixedSC_vcf \ | ||
I=$input_purple_decompressed \ | ||
O=$unadjusted_vcf | ||
|
||
# Need to feed back to R to adjust nominal homology call position back to match gridss_somatic_filter | ||
Rscript ${libgridss}/gridss_adjust_call_positions.R -i ${unadjusted_vcf} -o ${output_vcf} -s ${libgridss} | ||
mv ${output_vcf}.bgz ${output_vcf}.gz | ||
mv ${output_vcf}.bgz.tbi ${output_vcf}.gz.tbi | ||
rm $fixedSC_vcf $unadjusted_vcf $input_purple_decompressed $input_gridss_decompressed | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|