From ee30143a05c260269b54169cf4221068146a1d56 Mon Sep 17 00:00:00 2001 From: Colby Chiang Date: Mon, 9 Feb 2015 16:48:15 -0600 Subject: [PATCH] lumpyexpress seems to work... --- .gitmodules | 3 +++ scripts/bamkit | 1 + scripts/lumpyexpress | 39 ++++++++++++++----------------------- scripts/lumpyexpress.config | 9 +++++---- 4 files changed, 24 insertions(+), 28 deletions(-) create mode 100644 .gitmodules create mode 160000 scripts/bamkit diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..c87d778 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "scripts/bamkit"] + path = scripts/bamkit + url = https://github.com/cc2qe/bamkit.git diff --git a/scripts/bamkit b/scripts/bamkit new file mode 160000 index 0000000..3468eb9 --- /dev/null +++ b/scripts/bamkit @@ -0,0 +1 @@ +Subproject commit 3468eb9b659b63b21f63b965620659414cbe7cf6 diff --git a/scripts/lumpyexpress b/scripts/lumpyexpress index 135e967..248184c 100755 --- a/scripts/lumpyexpress +++ b/scripts/lumpyexpress @@ -416,13 +416,13 @@ then # generate split-read string for LUMPY LUMPY_SPL_STRING="$LUMPY_SPL_STRING -sr bam_file:${TEMP_DIR}/$OUTBASE.sample$(($i+1)).splitters.bam,back_distance:10,min_mapping_threshold:20,weight:1,id:$(($i+1))$(($j+1))1,min_clip:20,${RG_STRING}" - # generate LUMPY sample config file - DISC_BAM=$TEMP_DIR/$OUTBASE.sample$(($i+1)).lib$(($j+1)).discordants.bam - SPL_BAM=${TEMP_DIR}/$OUTBASE.sample$(($i+1)).lib$(($j+1)).splitters.bam - DISC_SAMPLE=`$SAMBAMBA view -H $DISC_BAM | grep -m 1 "^@RG" | awk -v i=$i '{ for (j=1;j<=NF;++j) {if ($j~"^SM:") { gsub("^SM:","",$j); print $j } } }'` - SPL_SAMPLE=`$SAMBAMBA view -H $SPL_BAM | grep -m 1 "^@RG" | awk -v i=$i '{ for (j=1;j<=NF;++j) {if ($j~"^SM:") { gsub("^SM:","",$j); print $j } } }'` - echo -e "$DISC_SAMPLE\t$(($i+1))$(($j+1))0\tPE\t$DISC_BAM" >> $TEMP_DIR/$OUTBASE.sample.config - echo -e "$SPL_SAMPLE\t$(($i+1))$(($j+1))1\tSR\t$SPL_BAM" >> $TEMP_DIR/$OUTBASE.sample.config + # # generate LUMPY sample config file + # DISC_BAM=$TEMP_DIR/$OUTBASE.sample$(($i+1)).lib$(($j+1)).discordants.bam + # SPL_BAM=${TEMP_DIR}/$OUTBASE.sample$(($i+1)).lib$(($j+1)).splitters.bam + # DISC_SAMPLE=`$SAMBAMBA view -H $DISC_BAM | grep -m 1 "^@RG" | awk -v i=$i '{ for (j=1;j<=NF;++j) {if ($j~"^SM:") { gsub("^SM:","",$j); print $j } } }'` + # SPL_SAMPLE=`$SAMBAMBA view -H $SPL_BAM | grep -m 1 "^@RG" | awk -v i=$i '{ for (j=1;j<=NF;++j) {if ($j~"^SM:") { gsub("^SM:","",$j); print $j } } }'` + # echo -e "$DISC_SAMPLE\t$(($i+1))$(($j+1))0\tPE\t$DISC_BAM" >> $TEMP_DIR/$OUTBASE.sample.config + # echo -e "$SPL_SAMPLE\t$(($i+1))$(($j+1))1\tSR\t$SPL_BAM" >> $TEMP_DIR/$OUTBASE.sample.config done # merge the splitters and discordants files @@ -464,8 +464,8 @@ then # else (user provided a splitter and discordants file) else - # initialize LUMPY sample config file for generating the VCF - > $TEMP_DIR/$OUTBASE.sample.config + # # initialize LUMPY sample config file for generating the VCF + # > $TEMP_DIR/$OUTBASE.sample.config # parse the libraries in the BAM header to extract readgroups from the same library for i in $( seq 0 $(( ${#FULL_BAM_LIST[@]}-1 )) ) @@ -484,7 +484,7 @@ else do # calculate read length if not provided LIB_READ_LENGTH_LIST+=(`$SAMBAMBA view ${FULL_BAM_LIST[$i]} | head -n 10000 | awk 'BEGIN { MAX_LEN=0 } { LEN=length($10); if (LEN>MAX_LEN) MAX_LEN=LEN } END { print MAX_LEN }'`) - echo "Library read groups:${LIB_RG_LIST[$j]}" + echo "Library read groups: ${LIB_RG_LIST[$j]}" echo "Library read length: ${LIB_READ_LENGTH_LIST[$j]}" $SAMBAMBA view -h -F 'paired and mate_is_reverse_strand and not (unmapped or mate_is_unmapped or reverse_strand or secondary_alignment or duplicate)' ${FULL_BAM_LIST[$i]} \ | $PYTHON $BAMFILTERRG -S -n 4000000 --readgroup ${LIB_RG_LIST[$j]} \ @@ -498,8 +498,8 @@ else # construct LUMPY_SPL_STRING SPL_SAMPLE=`$SAMBAMBA view -H $SPL_BAM | grep -m 1 "^@RG" | awk -v i=$i '{ for (j=1;j<=NF;++j) {if ($j~"^SM:") { gsub("^SM:","",$j); print $j } } }'` LUMPY_SPL_STRING="$LUMPY_SPL_STRING -sr bam_file:${SPL_BAM},back_distance:10,min_mapping_threshold:20,weight:1,id:${SPL_SAMPLE},min_clip:20" - # append to the sample config file - echo -e "$SPL_SAMPLE\t$(($i+1))01\tSR\t$SPL_BAM" >> $TEMP_DIR/$OUTBASE.sample.config + # # append to the sample config file + # echo -e "$SPL_SAMPLE\t$(($i+1))01\tSR\t$SPL_BAM" >> $TEMP_DIR/$OUTBASE.sample.config # construct LUMPY_DISC_STRING for j in $( seq 0 $(( ${#LIB_RG_LIST[@]}-1 )) ) @@ -513,8 +513,8 @@ else LUMPY_DISC_STRING="$LUMPY_DISC_STRING -pe bam_file:${DISC_BAM},histo_file:${TEMP_DIR}/$OUTBASE.sample$(($i+1)).lib$(($j+1)).x4.histo,mean:${MEAN},stdev:${STDEV},read_length:${LIB_READ_LENGTH_LIST[$j]},min_non_overlap:${LIB_READ_LENGTH_LIST[$j]},discordant_z:5,back_distance:10,weight:1,id:${DISC_SAMPLE},min_mapping_threshold:20,${RG_STRING}" - # append to the sample config file - echo -e "$DISC_SAMPLE\t$(($i+1))$(($j+1))0\tPE\t$DISC_BAM" >> $TEMP_DIR/$OUTBASE.sample.config + # # append to the sample config file + # echo -e "$DISC_SAMPLE\t$(($i+1))$(($j+1))0\tPE\t$DISC_BAM" >> $TEMP_DIR/$OUTBASE.sample.config done done fi @@ -537,16 +537,7 @@ $LUMPY $PROB_CURVE -t ${TEMP_DIR}/${OUTBASE} -mw $MIN_WEIGHT -tt $TRIM_THRES \ $EXCLUDE_BED_FMT \ $LUMPY_DISC_STRING \ $LUMPY_SPL_STRING \ - > $TEMP_DIR/$OUTBASE.sv.vcf - -# write output vcf file -cat $TEMP_DIR/$OUTBASE.sv.vcf \ - | grep -v "^#" \ - | sort -k1,1 -k2,2n \ - | cat $TEMP_DIR/header.txt - \ - | $BGZIP -c \ - > $OUTPUT.sv.vcf.gz -$TABIX -p vcf $OUTPUT.sv.vcf.gz + > $OUTPUT.sv.vcf # clean up if [[ "$KEEP" -eq 0 ]] diff --git a/scripts/lumpyexpress.config b/scripts/lumpyexpress.config index 99f9d46..fdc22ab 100644 --- a/scripts/lumpyexpress.config +++ b/scripts/lumpyexpress.config @@ -3,7 +3,8 @@ # general LUMPY_HOME=~/code/lumpy-sv-dev -LUMPY=`which lumpy || true` +# LUMPY=`which lumpy || true` +LUMPY=$LUMPY_HOME/bin/lumpy SAMBAMBA=`which samtools || true` SAMBLASTER=`which samblaster || true` @@ -13,9 +14,9 @@ PYTHON=`which python || true` # python scripts PAIREND_DISTRO=$LUMPY_HOME/scripts/pairend_distro.py -BAMGROUPREADS=$LUMPY_HOME/scripts/bamgroupreads.py -BAMFILTERRG=$LUMPY_HOME/scripts/bamfilterrg.py -BAMLIBS=$LUMPY_HOME/scripts/bamlibs.py +BAMGROUPREADS=$LUMPY_HOME/scripts/bamkit/bamgroupreads.py +BAMFILTERRG=$LUMPY_HOME/scripts/bamkit/bamfilterrg.py +BAMLIBS=$LUMPY_HOME/scripts/bamkit/bamlibs.py # SAMBAMBA=$SCRIPTS_DIR/sambamba