## Align reads to genome using `HISAT2`

In [None]:
# Needed these variables to restart the analysis from this point

REF_DIR="/home/ilya/data1/ref/Caenorhabditis_elegans"

# Here BASE_DIR and BASE_DIR1 point to the data location
# for the runs we want to combine
BASE_DIR="../data"
BASE_DIR1="../data/2018-07-25"
INDEX="$REF_DIR/wbcel235/genome"
GENES="$REF_DIR/Ensembl/WBcel235/Annotation/Genes/genes.gtf"

# Sample naming patters (e.g. cm01-cm08)
PATTERN="cm"

#stringtie_exe="$HOME/bin/stringtie-1.3.3b.Linux_x86_64/stringtie"
# This should point to the appropriate versions of hisat2 and stringtie executables
stringtie_exe="$HOME/bin/stringtie-1.3.5.Linux_x86_64/stringtie"
hisat_exe="$HOME/bin/hisat2-2.1.0/hisat2"

In [None]:
for sample in $(ls $BASE_DIR | grep $PATTERN)
do
    sample_dir="$BASE_DIR/$sample"
    result_dir="../results/$sample"
    
    if [ ! -d "$result_dir" ]; then
        echo "Creating $result_dir ..."
        mkdir -p $result_dir
    fi
    
    echo "Unzipping reads for $sample ..."
    read1=$sample_dir/read1.fifo
    read2=$sample_dir/read2.fifo
    mkfifo $read1
    mkfifo $read2
    
    zcat $sample_dir/R1.fastq.gz > $read1 &
    zcat $sample_dir/R2.fastq.gz > $read2 &
    
    $hisat_exe -p 30 --dta --quiet \
        -x $INDEX \
        -1 $read1 -2 $read2 \
    | samtools view -bhS - > "$result_dir/$sample.bam"
    
    rm $sample_dir/*.fifo 
    
done

## Assemble transcripts and estimate abundances

In [None]:
$stringtie_exe --merge -p 30 -G $GENES \
    -o "../ref/merged_combined.gtf" "../ref/mergelist_combined.txt"

In [None]:
for i in $(seq -f "%02g" 1 2 5)
do
    sample="$PATTERN$i"
    result_dir="../results/$sample"
    $stringtie_exe -e -B -p 30 -G "../ref/merged_combined.gtf" \
        -o "$result_dir/${sample}_combined.gtf" \
        "$result_dir/$sample.sorted.bam"
done

In [None]:
for i in $(seq -f "%02g" 2 2 6)
do
    sample="$PATTERN$i"
    result_dir="../results/$sample"
    $stringtie_exe -e -B -p 30 -G "../ref/merged_combined.gtf" \
        -o "$result_dir/$sample.gtf" \
        "$result_dir/$sample.sorted.bam"
done

In [None]:
for i in $(seq -f "%02g" 7 2 11)
do
    sample="$PATTERN$i"
    result_dir="$BASE_DIR1/results/$sample"
    $stringtie_exe -e -B -p 30 -G "../ref/merged_combined.gtf" \
        -o "$result_dir/${sample}_combined.gtf" \
        "$result_dir/$sample.sorted.bam"
done

for i in $(seq -f "%02g" 8 2 12)
do
    sample="$PATTERN$i"
    result_dir="$BASE_DIR1/results/$sample"
    $stringtie_exe -e -B -p 30 -G "../ref/merged_combined.gtf" \
        -o "$result_dir/$sample.gtf" \
        "$result_dir/$sample.sorted.bam"
done