**Rename Files by removing the first 23 characters**

```
for file in *; do mv $file `echo $file | cut -c23-`; done
```

**Unzip all the files**
```
bunzip2 *.bz2
```

**Create the merge.csv file**

```
(echo 'samplename,description'; for f in raw/*fastq; do readlink -f $f | perl -pe 's/(.*?_(S[0-9]+)_.*)/\1,\2/'; done) > to_merge.csv
```



**Merge multiple files into a single file**

```
#!/bin/sh
#SBATCH -p priority
#SBATCH -J merge
#SBATCH -o run.o
#SBATCH -e run.e
#SBATCH -t 0-12:00
#SBATCH --cpus-per-task=20
#SBATCH --mail-type=END         # Type of email notification- BEGIN,END,FAIL,ALL
#SBATCH --mail-user=ajitj_nirmal@dfci.harvard.edu   # Email to which notifications will be sent

module load bcbio/latest
cd raw
bcbio_prepare_samples.py --memory-per-job 2G --out merged --csv tomerge.csv 

sbatch merge.sh
```


**Create the alignment.csv file**

```
(echo 'samplename,description'; for f in merged/*fastq.gz; do readlink -f $f | perl -pe 's/(.*?_(S[0-9]+)_.*)/\1,\2/'; done) > alignment.csv
```

**Prepare the O2.yaml file**

```
cd ..
vim O2.yaml

details:
  - analysis: RNA-seq
    genome_build: mmul10
    algorithm:
      aligner: hisat2
      strandedness: unstranded
upload:
  dir: ../final
```

**Intiate bcBio**

```
module load bcbio/latest
unset PYTHONPATH
bcbio_nextgen.py -w template O2.yaml alignment.csv merged/
```

**Create Submission script to O2**

```
vim submit_bcbio.sh

#!/bin/sh
#SBATCH -p priority
#SBATCH -J pickseq
#SBATCH -o run.o
#SBATCH -e run.e
#SBATCH -t 10-00:00
#SBATCH --cpus-per-task=20
#SBATCH --mem=150G
#SBATCH --mail-type=END         # Type of email notification- BEGIN,END,FAIL,ALL
#SBATCH --mail-user=ajitj_nirmal@dfci.harvard.edu   # Email to which notifications will be sent

export PATH=/n/app/bcbio/tools/bin:$PATH
bcbio_nextgen.py ../config/alignment.yaml \
    -n 24 -t local
```


**Submit job to O2 for processing**

```
cp submit_bcbio.sh alignment/work
cd alignment/work
sbatch submit_bcbio.sh
```


## Align viral genome: Pre-processing

**Extract the unaligned reads from the BAM fiels**

```
# copy all the bam files
find alignment/final/S* -name '*-ready.bam' -exec cp -prv '{}' '/n/scratch3/users/a/ajn16/covid/bamfiles/' ';'

```

**Loop through all the files to extract the unmapped files**


```
module load sambamba/0.7.1

mkdir unmapped

for f in *.bam
do
 echo "Processing $f"
 sambamba view -f bam $f -F "(unmapped or mate_is_unmapped)" > unmapped/$f
done

```

**Convert the bam files to FASTQ**

**step:1 Sort all the bam files**
```
# Sort all the bam files

mkdir sorted

sort=_sorted.bam
for f in *.bam
do
 ff=`echo $f | cut -d \- -f 1`
 samtools sort -n $f > sorted/$ff$sort
done

```

**step:2 Convert sorted bamfile to fastq files**

```
# convert sorted bamfile to fastq files
module load gcc/6.2.0 bedtools/2.27.1

R1=_R1.fq
R2=_R2.fq
for f in *.bam
do
 ff=`echo $f | cut -d \_ -f 1`
 echo "Processing $ff"
 bedtools bamtofastq -i $f -fq unmapped_fastq/$ff$R1 -fq2 unmapped_fastq/$ff$R2
done

```


## Align the Viral Genome to the left over reads

**Create the alignment.csv file**

```
(echo 'samplename,description'; for f in raw/*fq; do readlink -f $f | perl -pe 's/(.*?_(S[0-9]+)_.*)/\1,\2/'; done) > alignment.csv
```

**Prepare the O2.yaml file**

```
cd ..
vim O2.yaml

details:
  - analysis: RNA-seq
    genome_build: MN908947.3
    algorithm:
      aligner: star
      strandedness: unstranded
upload:
  dir: ../final
```

**Intiate bcBio**

```
module load bcbio/latest
unset PYTHONPATH
bcbio_nextgen.py -w template O2.yaml alignment.csv raw/
```

**Create Submission script to O2**

```
vim submit_bcbio.sh

#!/bin/sh
#SBATCH -p priority
#SBATCH -J pickseq
#SBATCH -o run.o
#SBATCH -e run.e
#SBATCH -t 10-00:00
#SBATCH --cpus-per-task=20
#SBATCH --mem=150G
#SBATCH --mail-type=END         # Type of email notification- BEGIN,END,FAIL,ALL
#SBATCH --mail-user=ajitj_nirmal@dfci.harvard.edu   # Email to which notifications will be sent

export PATH=/n/app/bcbio/tools/bin:$PATH
bcbio_nextgen.py ../config/alignment.yaml \
    -n 24 -t local
```


**Submit job to O2 for processing**

```
cp submit_bcbio.sh alignment/work
cd alignment/work
sbatch submit_bcbio.sh
```