## Load Variables and Make Directories

In [None]:
set -u
source bioinf_intro_config.sh
# clean up
rm -rf $CUROUT
mkdir -p $STAR_OUT $GENOME_DIR $MYINFO $TRIMMED $QC_RAW $QC_TRIM

## Make Adapter File

In [None]:
echo ">Adapter
AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
>AdapterRead2
AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
>Adapter_rc
TGACTGGAGTTCAGACGTGTGCTCTTCCGATCT
>AdapterRead2_rc
ACACTCTTTCCCTACACGACGCTCTTCCGATCT" > $ADAPTERS

## Download Genome and Annotation

In [None]:
for CUR in $FA_URL $GTF_URL ; do
    wget --directory-prefix ${GENOME_DIR} ${CUR}
done

In [None]:
gunzip --force ${GENOME_DIR}/${GTF}.gz
gunzip --force ${GENOME_DIR}/${FA}.gz

## Index Genome

In [None]:
STAR \
    --runThreadN $THREADS \
    --runMode genomeGenerate \
    --genomeDir $GENOME_DIR \
    --genomeFastaFiles ${GENOME_DIR}/${FA} \
    --sjdbGTFfile ${GENOME_DIR}/${GTF} \
    --outFileNamePrefix ${STAR_OUT}/genome_ \
    --sjdbGTFfeatureExon exon \
    --sjdbGTFtagExonParentTranscript transcript_id \
    --sjdbGTFtagExonParentGene gene_id \
    --genomeSAindexNbases 6

## Read Quality Control

In [None]:
fastqc --quiet --threads $THREADS $RAW_FASTQS/21_2019_P_M1_S21_L00[1-2]_R1_001.fastq.gz --outdir $QC_RAW

With globs and `basename` in our toolbox, we are ready to **conquer the world** or at least run multiple FASTQs through our pipeline, without breaking a sweat!

## Trim and Map Reads

In [None]:
for FASTQ in $RAW_FASTQS/21_2019_P_M1_S21_L00[1-2]_R1_001.fastq.gz
    do
        FASTQ_BASE="$(basename ${FASTQ} '_001.fastq.gz')"
        echo "---------------- TRIMMING: $FASTQ_BASE ----------------"
        fastq-mcf \
            $MYINFO/neb_e7600_adapters.fasta \
            $RAW_FASTQS/${FASTQ_BASE}_001.fastq.gz \
            -q 20 -x 0.5 \
            -o $TRIMMED/${FASTQ_BASE}_001.trim.fastq.gz
        
        echo "---------------- MAPPING: $FASTQ_BASE ----------------"
        STAR \
            --runMode alignReads \
            --twopassMode None \
            --genomeDir $GENOME_DIR \
            --readFilesIn $TRIMMED/${FASTQ_BASE}_001.trim.fastq.gz \
            --readFilesCommand gunzip -c \
            --outFileNamePrefix ${STAR_OUT}/${FASTQ_BASE}_ \
            --quantMode GeneCounts \
            --outSAMtype BAM SortedByCoordinate \
            --runThreadN $THREADS \
            --alignIntronMax 5000 \
            --outSJfilterIntronMaxVsReadN 500 1000 2000    
    done

In [None]:
fastqc --quiet --threads $THREADS $TRIMMED/21_2019_P_M1_S21_L00[1-2]_R1_001.trim.fastq.gz --outdir $QC_TRIM

### And let's check the result

In [None]:
ls ${STAR_OUT}

In [None]:
head ${STAR_OUT}/21_2019_P_M1_S21_L00?_R1_ReadsPerGene.out.tab

In [None]:
multiqc --force ${STAR_OUT} --outdir ${STAR_OUT} --filename multiqc_report_counts.html
multiqc --force $QC_RAW --outdir ${STAR_OUT} --filename multiqc_report_rawfastq.html
multiqc --force $QC_TRIM --outdir ${STAR_OUT} --filename multiqc_report_trimfastq.html