Skip to content

Commit

Permalink
Added GRIDSS 2.2.3 backport for adjusting the SC field to exclude loc…
Browse files Browse the repository at this point in the history
…al assembly anchor

PapenfussLab/gridss#213
  • Loading branch information
Daniel Cameron committed May 15, 2019
1 parent 81bbcb0 commit cbfafd0
Showing 1 changed file with 81 additions and 0 deletions.
81 changes: 81 additions & 0 deletions hmftools/gridss/do_run_gridss_2_2_3_postpurple_backport_pilot
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/bin/bash
#
# Adjusts the breakpoint position to match GRIDSS 2.1.0 calling positions
#
source do_run_gridss_lib

export PATH=$base_path/tools/bedops_v2.4.30/:$PATH

PICARD_JAR=$base_path/tools/picard-tools_v1.135/picard.jar
libgridss=/data/common/repos/scripts/gridss/
gridss_pon=/data/common/dbs/gridss/pon3792v1/

original_vcf=$(find -L ${run_dir}/structuralVariants/gridss/*/ -type f -name *gridss.vcf.gz)
raw_somatic_vcf=$(dirname ${original_vcf})/${tumor_sample}.gridss.somatic.vcf.gz
full_vcf=$(dirname ${original_vcf})/${tumor_sample}.gridss.somatic.full.vcf.gz
assembly_sv_bam=$assembly_bam.gridss.working/$(basename $assembly_bam).sv.bam
out_dir=$(dirname $full_vcf)/breakpoint_position_backport

if [ ! -d "$out_dir" ] ; then
echo "Missing backport directory $out_dir. This assembly from this directory is required."
fi

normal_bam=$out_dir/$(basename $normal_bam)
tumor_bam=$out_dir/$(basename $tumor_bam)
assembly_bam=$out_dir/$(basename $assembly_bam)
somatic_vcf=$out_dir/$(basename $raw_somatic_vcf .gz)


input_vcf=$1
output_vcf=${input_vcf/.sv.ann.vcf.gz/.sv.ann2.vcf}
unadjusted_vcf=${input_vcf/.sv.ann.vcf.gz/.sv.unadjusted.vcf}
fixedSC_vcf=${input_vcf/.sv.ann.vcf.gz/.sv.fixedSC.vcf}
input_gridss_decompressed=${input_vcf/.vcf.gz/.gridss.vcf}
input_purple_decompressed=${input_vcf/.vcf.gz/.purple.vcf}

gunzip -c $input_vcf | grep -v purple_ > $input_gridss_decompressed
gunzip -c $input_vcf | grep -E "^#" > $input_purple_decompressed
gunzip -c $input_vcf | grep purple_ >> $input_purple_decompressed

gridss_jvm_args="
-ea
-Dsamjdk.create_index=true
-Dsamjdk.use_async_io_read_samtools=true
-Dsamjdk.use_async_io_write_samtools=true
-Dsamjdk.use_async_io_write_tribble=true
-Dsamjdk.buffer_size=2097152
-cp $gridss_jar "
gridss_common_args="
TMP_DIR=$out_dir/tmp
WORKING_DIR=$out_dir
REFERENCE_SEQUENCE=$ref_genome "

java -Xmx4G $gridss_jvm_args gridss.AllocateEvidence \
ALLOCATE_READS=false \
I=$normal_bam \
I=$tumor_bam \
ASSEMBLY=$assembly_bam \
INPUT_VCF=$input_gridss_decompressed \
OUTPUT_VCF=$fixedSC_vcf \
$gridss_common_args \
CONFIGURATION_FILE=$gridss_config_file \
WORKER_THREADS=$threads | exit 1

java -jar $PICARD_JAR SortVcf \
I=$fixedSC_vcf \
I=$input_purple_decompressed \
O=$unadjusted_vcf

# Need to feed back to R to adjust nominal homology call position back to match gridss_somatic_filter
Rscript ${libgridss}/gridss_adjust_call_positions.R -i ${unadjusted_vcf} -o ${output_vcf} -s ${libgridss}
mv ${output_vcf}.bgz ${output_vcf}.gz
mv ${output_vcf}.bgz.tbi ${output_vcf}.gz.tbi
rm $fixedSC_vcf $unadjusted_vcf $input_purple_decompressed $input_gridss_decompressed








0 comments on commit cbfafd0

Please sign in to comment.