**UNZIP data**
```
soar
cd /n/scratch2/ajit/duv_pdx/rawdata
find . -name "*.gz" | while read filename; do gunzip "`dirname "$filename"`" "$filename"; done;
```
**Create a sample description file**
```
alignment.csv
```

**Download Reference genome**

```
mkdir reference
wget ftp://ftp.ensembl.org/pub/release-96/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz
wget ftp://ftp.ensembl.org/pub/release-96/gtf/homo_sapiens/Homo_sapiens.GRCh38.96.gtf.gz
gunzip *
```

**Create YAML File**

```
vim o2.yaml
details:
  - analysis: RNA-seq
    genome_build: hg38
    algorithm:
      transcriptome_fasta: /n/scratch2/ajit/duv_pdx/reference/Homo_sapiens.GRCh38.cdna.all.fa
      transcriptome_gtf: /n/scratch2/ajit/duv_pdx/reference/Homo_sapiens.GRCh38.96.gtf
      aligner: hisat2
      strandedness: unstranded
      tools_on: [bcbiornaseq]
      bcbiornaseq:
          organism: homo sapiens
          interesting_groups: conditions
upload:
  dir: ../final
```

**Intiate bcBio**

```bash
module load bcbio/latest
unset PYTHONPATH
bcbio_nextgen.py -w template o2.yaml alignment.csv rawdata/ --separator '_'
```

**Run bcBio**

```bash
cd alignment/work
vim submit_bcbio.sh

#!/bin/sh
#SBATCH -p medium
#SBATCH -J bcbio_O2              
#SBATCH -o run.o
#SBATCH -e run.e
#SBATCH -t 1-00:00
#SBATCH --cpus-per-task=3
#SBATCH --mem=100G
#SBATCH --mail-type=END         # Type of email notification- BEGIN,END,FAIL,ALL
#SBATCH --mail-user=ajitj_nirmal@dfci.harvard.edu   # Email to which notifications will be sent

export PATH=/n/app/bcbio/tools/bin:$PATH
bcbio_nextgen.py ../config/alignment.yaml \
    -n 24 -t ipython -s slurm -q medium -r t=1-00:00 --timeout 2000

sbatch submit_bcbio.sh
```

---

**Processing EXOME sequecing data**

```
srun --pty -p interactive --mem 70G -t 0-06:00 /bin/bash
find . -name "*.gz" | while read filename; do gunzip "`dirname "$filename"`" "$filename"; done;
```

**Move all the files fom inside a folder to outside**

```
find . -name '*.fq' -exec mv {} . \;
find . -depth -type d -empty -exec rmdir {} \;
```

**Move extra smaller files into a directory**

```
du -bsh *
```

**Make a copy of the vechile samples and renmae them**

```
mkdir vechile_1
cp V* vechile_1
mkdir vechile_2
cp V* vechile_2/

cd vechile_1
rename 'USD16092608' comb *
rename 'USD16092609' comb *
rename 'USD16092610' comb *

cd vechile_2
rename 'USD16092608' romi *
rename 'USD16092609' romi *
rename 'USD16092610' romi *
```

**Rename the files**

```
for file in *; do mv "${file}" "${file/_HMGWWDSXX_L4/}"; done # Remove defined string from name
for file in *; do mv "${file}" "${file/s47880_/}"; done # Remove defined string from name
```

**Create a sample description file**


(echo 'samplename,description'; for f in raw_files/*fq*; do readlink -f $f | perl -pe 's/(.*?_(S[0-9]+)_.*)/\1,\2/'; done) > alignment.csv

Edit alignment.csv remotely to include phenotype, batch and drug details.

**Create the YAML file**

```
details:
- analysis: variant2
  genome_build: hg38
  algorithm:
    aligner: bwa
    disambiguate: [mm10]
    tools_on: [gemini]
    remove_lcr: true
    variantcaller: [mutect2]
    variant_regions: ../input/S07604514_Regions.bed
    svcaller: [cnvkit, lumpy, delly]
upload:
  dir: ../final
```

**Download the variant region file**

Download the BED files from https://earray.chem.agilent.com/suredesign/index.htm and save it inside a folder named input

**Create the submission script**

```
#!/bin/sh
#SBATCH -p priority
#SBATCH -J exome
#SBATCH -o run.o
#SBATCH -e run.e
#SBATCH -t 7-00:00
#SBATCH --cpus-per-task=20
#SBATCH --mem=80G
#SBATCH --mail-type=END         # Type of email notification- BEGIN,END,FAIL,ALL
#SBATCH --mail-user=ajitj_nirmal@dfci.harvard.edu   # Email to which notifications will be sent

export PATH=/n/app/bcbio/tools/bin:$PATH
bcbio_nextgen.py ../config/alignment.yaml \
    -n 24 -t local
```

**Run bcbio**

```bash
module load bcbio/latest
unset PYTHONPATH
bcbio_nextgen.py -w template O2.yaml alignment.csv raw_files/ --separator '_'
```

**Submit Job**

```
cp submit_bcbio.sh alignment/work/
cd alignment/work
sbatch submit_bcbio.sh
```

## Re-running Exome sequencing data

**Merge files: multiple lanes and all vechile samples into a single entity**

**Create tomerge file**

```
(echo 'samplename,description'; for f in raw_files/*; do readlink -f $f | perl -pe 's/(.?(S[0-9]+).)/\1,\2/'; done) > tomerge.csv
```

**Merge multiple files into a single file**

```
#!/bin/sh
#SBATCH -p priority
#SBATCH -J merge
#SBATCH -o run.o
#SBATCH -e run.e
#SBATCH -t 0-12:00
#SBATCH --cpus-per-task=20
#SBATCH --mail-type=END         # Type of email notification- BEGIN,END,FAIL,ALL
#SBATCH --mail-user=ajitj_nirmal@dfci.harvard.edu   # Email to which notifications will be sent

module load bcbio/latest
cd raw_files
bcbio_prepare_samples.py --memory-per-job 2G --out merged --csv tomerge.csv
```

**Create a sample description file**

(echo 'samplename,description'; for f in raw_files/*; do readlink -f $f | perl -pe 's/(.*?_(S[0-9]+)_.*)/\1,\2/'; done) > alignment.csv

Edit alignment.csv remotely to include phenotype, batch and drug details.

**Create the YAML file**

```
details:
- analysis: variant2
  genome_build: hg38
  algorithm:
    aligner: bwa
    disambiguate: [mm10]
    tools_on: [gemini]
    remove_lcr: true
    variantcaller: [mutect2]
    variant_regions: ../input/S07604514_Regions.bed
    svcaller: [cnvkit, lumpy, delly]
upload:
  dir: ../final
```

**Download the variant region file**

Download the BED files from https://earray.chem.agilent.com/suredesign/index.htm and save it inside a folder named input

**Create the submission script**

```
vim submit_bcbio.sh


#!/bin/sh
#SBATCH -p priority
#SBATCH -J exome
#SBATCH -o run.o
#SBATCH -e run.e
#SBATCH -t 7-00:00
#SBATCH --cpus-per-task=20
#SBATCH --mem=80G
#SBATCH --mail-type=END         # Type of email notification- BEGIN,END,FAIL,ALL
#SBATCH --mail-user=ajitj_nirmal@dfci.harvard.edu   # Email to which notifications will be sent

export PATH=/n/app/bcbio/tools/bin:$PATH
bcbio_nextgen.py ../config/alignment.yaml \
    -n 24 -t local
```

**Run bcbio**

```bash
module load bcbio/latest
unset PYTHONPATH
bcbio_nextgen.py -w template O2.yaml alignment.csv raw_files/ --separator '_'
```

**Submit Job**

```
cp submit_bcbio.sh alignment/work/
cd alignment/work
sbatch submit_bcbio.sh
```


#### Working with the VCF files

```
# Merge common entries
module load gcc/6.2.0 bcftools/1.9

# D1 and D2
bcftools isec -p isec_output -Oz 4-mutect2-annotated.vcf.gz 5-mutect2-annotated.vcf.gz
bcftools merge --merge all 0002.vcf.gz 0003.vcf.gz > D12merged.vcf

# D2 and D3
bcftools isec -p isec_output -Oz 5-mutect2-annotated.vcf.gz 6-mutect2-annotated.vcf.gz
bcftools merge --merge all 0002.vcf.gz 0003.vcf.gz > D23merged.vcf

# D1 and D3
bcftools isec -p isec_output -Oz 4-mutect2-annotated.vcf.gz 6-mutect2-annotated.vcf.gz
bcftools merge --merge all 0002.vcf.gz 0003.vcf.gz > D13merged.vcf

# D1, D2 and D3
bcftools isec -n +3 4-mutect2-annotated.vcf.gz 5-mutect2-annotated.vcf.gz 6-mutect2-annotated.vcf.gz -p D123

# Variants in two or more VCF files
bcftools isec -n +2 4-mutect2-annotated.vcf.gz 5-mutect2-annotated.vcf.gz 6-mutect2-annotated.vcf.gz -p Dtwo


```