**Transfer data from Imstor to scratch space within O2 server**

```bash
rsync -aPr ajn16@transfer.rc.hms.harvard.edu:/n/files/ImStor/sorger/data/rnaseq/ajit_johns
on/melanoma_rarecyte .
```
***

**INTIAL QUALITY CONTROL**
**FASTQC ANALYSIS**

```bash
mkdir fastqc
cd fastqc
vim fastqc.run
```
***

```bash
#!/bin/bash
#SBATCH -p short                # partition name
#SBATCH -t 0-2:00               # hours:minutes runlimit after which job will be killed
#SBATCH -c 16                    # number of cores requested -- this needs to be greater than or equal to the number of cores you plan to use to run your job
#SBATCH --job-name fastqc       # Job name
#SBATCH --mem=70G
#SBATCH -o %fastqc.out               # File to which standard out will be written
#SBATCH -e %fastqc.err               # File to which standard err will be written
#SBATCH --mail-type=END         # Type of email notification- BEGIN,END,FAIL,ALL
#SBATCH --mail-user=ajitj_nirmal@dfci.harvard.edu   # Email to which notifications will be sent


## Changing directories to where the fastq files are located
cd /n/scratch2/ajit/melanoma_rarecyte/raw_files

## Loading modules required for script commands
module load fastqc/0.11.5

## Running FASTQC
fastqc -t 8 *.fastq

## Move files 
mv *fastqc* ../fastqc
```
***

**Run the job**

```bash
sbatch fastqc.run
```

**MultiQC analysis for aggregateing results across many samples into a single report**

```bash
cd round1/
module load gcc/6.2.0  python/2.7.12 multiqc/1.5
multiqc .
```

***

**Trimming the reads using trimmomatic**

```bash

mkdir trimmomatic
cp -r /n/scratch2/ajit/melanoma_rarecyte/raw_files/* /n/scratch2/ajit/melanoma_rarecyte/trimmomatic

vim auto_trim.sh

module load trimmomatic/0.36

# Execution
# Case(1) run on a couple of PE files with extension *.fq
# $ sh auto_trim.sh *.fastq

red=`tput setaf 1`
green=`tput setaf 2`
yellow=`tput setaf 3`
reset=`tput sgr0`
count=0

usage ()
{
  echo -e "${green}Usage: sh auto_trim [*.extension]\n \
      extension: <fq> or <fastq> or <fq.gz> or <fastq.gz>\n \
      example: sh auto_trim.sh *.fq.gz\n ${reset}\n\
${yellow}Help:  sh autotrim -h or --help${reset}"
  exit
}

file_not_found ()
{
echo -e "\n${red}FileNotFoundError: No such file with extension $@ found!${reset}"
echo -e "${green}Supported extensions are: <.fq> or <.fastq> or <.fq.gz> or <.fastq.gz>${reset}\n"
exit 
}

file_name_error ()
{
echo -e "\n${red}Filename Error: Paired end file names should contain _R1 _R2${reset}"
echo -e "${green}Example: test_R1.fq.gz, test_R2.fq.gz${reset}\n"
exit 
}

file_extension_error ()
{
echo -e "\n${red}FileExtensionError: Invalid extension${reset}"
echo -e "${green}Supported extensions are: <.fq> or <.fastq> or <.fq.gz> or <.fastq.gz>${reset}\n"
exit     
}

if [[ ( $1 == '-h' ) || ( $1 == '--help') ]] ;then
 usage
elif [[ $# -eq 0 ]] ;then
 echo "${red}Error: No parameter(s) provided${reset}"
 usage
 exit 0
else
  for i in $@; do
        count=$((count+1))
        if [ -f $i ] ;then
            if [[ (${i#*.} == "fastq.gz") || (${i#*.} == "fq.gz") || (${i#*.} == "fastq") || (${i#*.} == "fq") ]] ;then 
                if echo $1 | grep -q -e "_R1" -e "_R2"; then
		   if [[ $count%2 -ne 0 ]]; then
                       sample_name=`echo $i | awk -F "_R1"  '{print $1}'`
                       extension=`echo $i | awk -F "_R1"  '{print $2}'`
                       R1=${sample_name}_R1${extension}
	               R2=${sample_name}_R2${extension}
		       R1_pair=${sample_name}_R1_pair${extension}
		       R1_unpair=${sample_name}_R1_unpair${extension}
		       R2_pair=${sample_name}_R2_pair${extension}
		       R2_unpair=${sample_name}_R2_unpair${extension}
                       echo -e "\n${yellow}[Running trimmomatic for sample] ${sample_name} at `whoami`${reset}\n"
                       date && time java -jar $TRIMMOMATIC/trimmomatic-0.36.jar \
                       PE \
                       $R1 $R2 $R1_pair $R1_unpair $R2_pair $R2_unpair \
                       HEADCROP:7 \
                       LEADING:20 \
                       TRAILING:20 \
                       SLIDINGWINDOW:20:20 \
                       MINLEN:50 
                       rm *unpair*

                   fi
                else file_name_error
                fi      
            elif [[ (${i#*.} == "sh") || (${i#*.} == "sh~")  ]] ;then
                 echo -n
            else
                echo -ne "${red}Check:$i${reset}"
                file_extension_error  
            fi     
        else
	    file_not_found $@
        fi
  done
fi
```

***

**Run Trimmomatic**

```bash
sh auto_trim.sh *.fastq
```
***

```bash
cd trimmomatic
find . -type f \! -name "*pair*" -delete
```
```bash
# Change the file names so that the old alignment.csv can be used
for filename in *.fastq; do 
    [ -f "$filename" ] || continue
    mv "$filename" "${filename//_pair/}"
done
```

**Run Aignment**

```bash
mkdir reference
wget ftp://ftp.ensembl.org/pub/release-96/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz
wget ftp://ftp.ensembl.org/pub/release-96/gtf/homo_sapiens/Homo_sapiens.GRCh38.96.gtf.gz
gunzip *
```

***
**Prepare the configuration/yaml file**

```bash
wget https://labsyspharm.github.io/rnaseq/example_settings/rna_seq/O2.yaml
vim O2.yaml

details:
  - analysis: RNA-seq
    genome_build: hg38
    algorithm:
      transcriptome_fasta: /n/scratch2/ajit/melanoma_rarecyte/reference/Homo_sapiens.GRCh38.cdna.all.fa
      transcriptome_gtf: /n/scratch2/ajit/melanoma_rarecyte/reference/Homo_sapiens.GRCh38.96.gtf
      aligner: hisat2
      strandedness: unstranded
upload:
  dir: ../final
```

***
**Create a sample description file**

```bash
cd trimmomatic
(echo 'samplename,description'; for f in trimmomatic/*fastq*; do readlink -f $f | perl -pe 's/(.*?_(S[0-9]+)_.*)/\1,\2/'; done) > alignment.csv
```
***

**Intiate bcBio**

```bash
module load bcbio/latest
unset PYTHONPATH
bcbio_nextgen.py -w template O2.yaml alignment.csv trimmomatic/
```
***

**Run bcBio**
```bash
vim submit_bcbio.sh
```

```bash
#!/bin/sh
#SBATCH -p medium
#SBATCH -J bcbio_O2
#SBATCH -c 16                
#SBATCH -o run.o
#SBATCH -e run.e
#SBATCH -t 1-00:00
#SBATCH --cpus-per-task=3
#SBATCH --mem=64G
#SBATCH --mail-type=END         # Type of email notification- BEGIN,END,FAIL,ALL
#SBATCH --mail-user=ajitj_nirmal@dfci.harvard.edu   # Email to which notifications will be sent

export PATH=/n/app/bcbio/tools/bin:$PATH
bcbio_nextgen.py ../config/alignment.yaml \
    -n 24 -t ipython -s slurm -q medium -r t=1-00:00 --timeout 2000
    
sbatch submit_bcbio.sh
```