diff --git a/src/bis_seq/run_bismark.sh b/src/bis_seq/run_bismark.sh new file mode 100755 index 0000000..8ec0408 --- /dev/null +++ b/src/bis_seq/run_bismark.sh @@ -0,0 +1,291 @@ +#!/usr/bin/env bash + +set -eu +set -o pipefail + + +##### Global parameters ##### +suffix=.fastq.gz +thisdir="$(cd "$(dirname $0)"; pwd)" +srcdir="$(pwd)/src" +refdir="$(pwd)/sequences" +trimmomatic_jar="$(pwd)/lib/Trimmomatic-0.32/trimmomatic-0.32.jar" +trimmomatic="java -Xmx2048m -jar ${trimmomatic_jar}" +bismarkdir="$(pwd)/lib/bismark_v0.12.3" +bowtiedir="$(pwd)/lib/bowtie-1.1.1" +adapters="${refdir}/adapters.fa" + +# DISPATCH: method for running SGE-compatible subscripts +DISPATCH="${DISPATCH:-qsub}" + +##### Parse command-line arguments and ensure dependencies exist ##### +function usage { + cat <_L001_R1_001${suffix}". + +- ${refdir}/bismark/phiX must already exist and be prepared with bismark. +- All reads from the same strand will be merged across lanes and file parts. +- The script creates SGE-compatible bash scripts within the sample directory. +- Depending on the value of the DISPATCH environment variable, scripts will + be run over SGE, run on the local machine, or not run: + 'qsub': scripts will be automatically dispatched + 'bash': scripts will be in serial on the local machine + 'echo': scripts will not be run (their paths will be echoed +EOF + exit 1 +} + +function require_files { + while [[ $# -gt 0 ]]; do + file="$1" + shift + if [[ ! -e "${file}" ]]; then + echo "Unable to find required file: ${file}" >&2 + exit 1 + fi + done +} + +function require_dirs { + while [[ $# -gt 0 ]]; do + dir="$1" + shift + if [[ ! -d "${dir}" ]]; then + echo "Unable to find required directory: ${dir}" >&2 + exit 1 + fi + done +} + +require_files "${trimmomatic_jar}" "${adapters}" +require_dirs "${srcdir}" "${refdir}" "${bismarkdir}" "${bowtiedir}" + +if [[ DISPATCH = "qsub" ]]; then + which qsub > /dev/null +elif [[ "${DISPATCH}" = "echo" ]]; then + require_dir "${TMPDIR}" +fi + +if [[ $# -eq 0 ]]; then + usage +elif [[ $# -ne 2 ]]; then + echo -e "Error: unexpected number of arguments\n" >&2 + usage +fi + + +export PATH="${bismarkdir}:${bowtiedir}:${PATH}" + + +##### Define functions ##### + +function trim_sample { + local lanes=( $(cd "${dir}" && ls *"${suffix}" | perl -pe 's/.*(L\d+).*/\1/' | sort -u) ) + + if [[ "${#lanes[@]}" -lt 1 ]]; then + echo "Unable to find lanes for ${name}" >&2 + return 1 + fi + + if [[ ! -s "${dir}/${name}.R1P.fq.gz" ]]; then + local script="${dir}/pipeline-trim.sh" + cat > "${script}" <> "${name}.R1P.fq.gz" + cat "${name}.\${lane}.R2P.fq.gz" >> "${name}.R2P.fq.gz" + cat "${name}.\${lane}.R1U.fq.gz" >> "${name}.R1U.fq.gz" + cat "${name}.\${lane}.R2U.fq.gz" >> "${name}.R2U.fq.gz" +done + +rm -f "${name}".*.*.fq.gz + +mv *.fq.gz "${dir}/" +EOF + + ${DISPATCH} "${script}" + fi +} + +function unspike { + local assembly=phiX + local ref="${refdir}/bismark/$assembly" + + if [[ ! -e "${dir}/${name}.${assembly}.paired_pe.sam" || ! -e "${dir}/${name}.${assembly}.unpaired_r1.sam" || ! -e "${dir}/${name}.${assembly}.unpaired_r2.sam" ]]; then + local script="${dir}/pipeline-align-${assembly}.sh" + cat > "${script}" < "${script}" < "${script}" < "${name}.${assembly}.merged.cov" + +mv -v "${name}.${assembly}.merged.cov" "${dir}/" + +EOF + ${DISPATCH} "${script}" + fi +} + +function process_dir { + # Set some globals that most functions will need to use + assembly="$1" + dir="$2" + + name="$(basename "${dir}")" + name="${name#Sample_}" + + logdir="${dir}/log" + mkdir -pv "${logdir}" + + ref="${refdir}/bismark/${assembly}" + + # Step 1a: trim + trim_sample + + # Step 1b: remove phiX + unspike + + # Step 2: align with bismark + align + + # Step 3: measure methylation coverage + coverage +} + + +##### Run program ##### +process_dir "$1" "$2" \ No newline at end of file