From e7e83821e4e0e9c1c3d6905a5e0bf25084554559 Mon Sep 17 00:00:00 2001 From: Christina Bergey Date: Fri, 19 Feb 2016 00:42:15 -0500 Subject: [PATCH] Switched from bwa aln to mem. Not fully switched for single end reads. --- full_analysis.mk | 37 +++++++------------------------------ indiv_analysis | 2 +- scripts/align.sh | 23 ++++------------------- test.sh | 1 + 4 files changed, 13 insertions(+), 50 deletions(-) create mode 100755 test.sh diff --git a/full_analysis.mk b/full_analysis.mk index 4f5847e..bc6108f 100644 --- a/full_analysis.mk +++ b/full_analysis.mk @@ -27,11 +27,9 @@ else ifeq ($(READ_TYPE),PE) endif # --- alignment_steps ifeq ($(READ_TYPE),SE) - align : results/${IND_ID}.readSE.bwa.${GENOME_NAME}.sai - sampe_or_samse : results/${IND_ID}.SE.bwa.${GENOME_NAME}.sam + align : results/${IND_ID}.SE.bwa.${GENOME_NAME}.sam else ifeq ($(READ_TYPE),PE) - align : results/${IND_ID}.read1.bwa.${GENOME_NAME}.sai - sampe_or_samse : results/${IND_ID}.PE.bwa.${GENOME_NAME}.sam + align : results/${IND_ID}.PE.bwa.${GENOME_NAME}.sam endif sam2bam : results/${IND_ID_W_PE_SE}.bwa.${GENOME_NAME}.sam.bam sort_and_index_bam : results/${IND_ID_W_PE_SE}.bwa.${GENOME_NAME}.sam.bam.sorted.bam.bai @@ -56,7 +54,7 @@ compress_and_upload : results/${IND_ID_W_PE_SE}.bwa.${GENOME_NAME}.tar.gz # --- Individual steps preliminary_steps : index_genome make_dict pre_aln_analysis_steps : fastqc -alignment_steps : align sampe_or_samse sam2bam sort_and_index_bam get_alignment_stats +alignment_steps : align sam2bam sort_and_index_bam get_alignment_stats post_alignment_filtering_steps : fix_mate_pairs filter_unmapped add_read_groups filter_bad_qual snp_calling_steps : local_realign_targets local_realign call_snps filter_snps get_snp_stats call_consensus archive_steps : compress_and_upload @@ -148,38 +146,17 @@ reports/${IND_ID}.readSE.stats.zip : ${READ_SE} ${FASTQC}/* #scripts/run_fastqc. # --- Align reads to genome with BWA # -------------------------------------------------------------------------------------- # -# Alignment output (*.sai) depends on bwa, the reads FASTAs, the genome (index), and align.sh -# Using the first read as a stand in for the both -results/${IND_ID}.read1.bwa.${GENOME_NAME}.sai : ${BWA}/* ${READ1} ${READ2} ${GENOME_FA}i #scripts/align.sh +# Alignment output (*.sam) depends on bwa, the reads FASTAs, the genome (index), and align.sh +results/${IND_ID}.PE.bwa.${GENOME_NAME}.sam : ${BWA}/* ${READ1} ${READ2} ${GENOME_FA}i #scripts/align.sh @echo "# === Aligning reads to genome ================================================ #"; ./scripts/align.sh ${GENOME_FA} ${GENOME_NAME}; -# Read 2 depends on read 1 -ifeq ($(READ_TYPE),PE) - results/${IND_ID}.read2.bwa.${GENOME_NAME}.sai : results/${IND_ID}.read1.bwa.${GENOME_NAME}.sai -endif - # Align SE reads -# Alignment output (*.sai) depends on bwa, the reads FASTAs, the genome (index), and alignSE.sh -results/${IND_ID}.readSE.bwa.${GENOME_NAME}.sai : ${BWA}/* ${READ_SE} ${GENOME_FA}i #scripts/align.sh +# Alignment output (*.sam) depends on bwa, the reads FASTAs, the genome (index), and alignSE.sh +results/${IND_ID}.SE.bwa.${GENOME_NAME}.sam : ${BWA}/* ${READ_SE} ${GENOME_FA}i #scripts/align.sh @echo "# === Aligning SE reads to genome ============================================= #"; ./scripts/alignSE.sh ${GENOME_FA} ${GENOME_NAME}; -# -------------------------------------------------------------------------------------- # -# --- Run sampe and samse to generate SAM files -# -------------------------------------------------------------------------------------- # - -# sampe output (*.sam) depends on *.sai files and sampe.sh -# Using the first read as a stand in for the both -results/${IND_ID}.PE.bwa.${GENOME_NAME}.sam : results/${IND_ID}.read1.bwa.${GENOME_NAME}.sai #scripts/sampe.sh - @echo "# === Making SAM file from PE reads =========================================== #"; - ./scripts/sampe.sh ${GENOME_FA} ${GENOME_NAME}; - -# samse output (*.sam) depends on *.sai file and sampe.sh -results/${IND_ID}.SE.bwa.${GENOME_NAME}.sam : results/${IND_ID}.readSE.bwa.${GENOME_NAME}.sai #scripts/sampe.sh - @echo "# === Making SAM file from SE reads =========================================== #"; - ./scripts/samse.sh ${GENOME_FA} ${GENOME_NAME}; - # -------------------------------------------------------------------------------------- # # --- Convert SAM file to BAM file # -------------------------------------------------------------------------------------- # diff --git a/indiv_analysis b/indiv_analysis index b6a5cd5..186b1b1 100755 --- a/indiv_analysis +++ b/indiv_analysis @@ -4,4 +4,4 @@ # This compares a single individual # Input data must be specified by editing config.mk -make -s -f full_analysis.mk indiv \ No newline at end of file +make -s -f full_analysis.mk indiv diff --git a/scripts/align.sh b/scripts/align.sh index f06e63b..0481ca8 100755 --- a/scripts/align.sh +++ b/scripts/align.sh @@ -1,7 +1,7 @@ #!/bin/sh # ------------------------------------------------------------------------------ -# --- Align reads to genome with BWA +# --- Align reads to genome with BWA mem # ------------------------------------------------------------------------------ # Check that genome FASTA, and genome code were passed as parameters @@ -15,24 +15,9 @@ fi GENOME_PATH=$(echo $1 | sed 's/.[^.]*$//g') GENOME_CODE=$2 -echo "CMD: $BWA/bwa aln \ +$BWA/bwa mem \ $BWA_ALN_PARAM \ $GENOME_PATH \ - $READ1 > results/${IND_ID}.read1.bwa.${GENOME_CODE}.sai"; + $READ1 $READ2 > results/${IND_ID}.PE.bwa.${GENOME_CODE}.sam -$BWA/bwa aln \ - $BWA_ALN_PARAM \ - $GENOME_PATH \ - $READ1 > results/${IND_ID}.read1.bwa.${GENOME_CODE}.sai - -echo "CMD: $BWA/bwa aln \ - $BWA_ALN_PARAM \ - $GENOME_PATH \ - $READ2 > results/${IND_ID}.read2.bwa.${GENOME_CODE}.sai"; - -$BWA/bwa aln \ - $BWA_ALN_PARAM \ - $GENOME_PATH \ - $READ2 > results/${IND_ID}.read2.bwa.${GENOME_CODE}.sai - -exit; \ No newline at end of file +exit; diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..1529fa2 --- /dev/null +++ b/test.sh @@ -0,0 +1 @@ +make --just-print -s -f full_analysis.mk indiv IND_ID=ssese1