#### Exome analysis of Patient 16042-023 treated with Duv

##### Move all the files fom inside a folder to outside

```
mkdir to_merge
find . -name '*.fq.gz' -exec mv {} . \;
mv *.fq.gz to_merge/
find . -name '*.txt' -exec mv {} . \;
find . -depth -type d -empty -exec rmdir {} \;
```

##### Block to merge multiple lanes into one
```
# Unzip all files
gunzip *

# Create to_merge csv file
(echo 'samplename,description'; for f in to_merge/*fq; do readlink -f $f | perl -pe 's/(.*?_(S[0-9]+)_.*)/\1,\2/'; done) > to_merge.csv

vim merge.sh

#!/bin/sh
#SBATCH -p priority
#SBATCH -J merge
#SBATCH -o run.o
#SBATCH -e run.e
#SBATCH -t 0-12:00
#SBATCH --cpus-per-task=20
#SBATCH --mail-type=END         # Type of email notification- BEGIN,END,FAIL,ALL
#SBATCH --mail-user=ajitj_nirmal@dfci.harvard.edu   # Email to which notifications will be sent

module load bcbio/latest
cd to_merge
bcbio_prepare_samples.py --out merged --csv to_merge.csv

sbatch merge.sh 

```

##### Make a copy of the vechile samples and rename them

```
# Three comparsions
# 70- pre, 76- post, 77- germline
# 1. Post-treatment vs Pre-treatment (76 vs 70)
# 2. Post-treatment vs germline (76 vs 77)
# 3. Pre-treatment vs germline (70 vs 77)

mkdir c1 c2 c3
for dir in */; do mkdir -- "$dir/raw"; done
```

##### Create Alignment file

```
(echo 'samplename,description'; for f in raw/*.gz; do readlink -f $f | perl -pe 's/(.*?(S[0-9]+)_.*)/\1,\2/'; done) > alignment.csv
```

##### Yaml file

```
vim O2.yaml

details:
- analysis: variant2
  genome_build: hg38
  algorithm:
    aligner: bwa
    tools_on: [gemini]
    remove_lcr: true
    variantcaller: [mutect2]
    variant_regions: /n/scratch3/users/a/ajn16/exome/run/input/S07604514_Regions.bed
    svcaller: [cnvkit, lumpy, delly]
upload:
  dir: ../final
  
```

##### Sbatch script

```
#!/bin/sh
#SBATCH -p priority
#SBATCH -J exome_1
#SBATCH -o run.o
#SBATCH -e run.e
#SBATCH -t 2-00:00
#SBATCH --cpus-per-task=20
#SBATCH --mem=80G
#SBATCH --mail-type=END         # Type of email notification- BEGIN,END,FAIL,ALL
#SBATCH --mail-user=ajitj_nirmal@dfci.harvard.edu   # Email to which notifications will be sent

export PATH=/n/app/bcbio/tools/bin:$PATH
bcbio_nextgen.py ../config/alignment.yaml \
    -n 24 -t local
```

##### Run bcbio

```
module load bcbio/latest
unset PYTHONPATH
bcbio_nextgen.py -w template O2.yaml alignment.csv raw/ --separator '_'
```

##### Submit Job

```
cp submit_sbatch.sh alignment/work/
cd alignment/work
sbatch submit_sbatch.sh
```

##### Working with the VCF files

```
# Merge all vcf entries
module load gcc/6.2.0 bcftools/1.10.2

# did not work
bgzip c1.vcf
bgzip c2.vcf
bgzip c3.vcf
bcftools index c1.vcf.gz && bcftools index c2.vcf.gz && bcftools index c3.vcf.gz
bcftools merge --merge all c1.vcf.gz c2.vcf.gz c3.vcf.gz > merged.vcf

# convert vcf to maf and merge later
conda activate vcf2maf

# remove line breaks
perl -lne 's/\r//; print "$_";' c1.vcf > c1_clean.vcf
perl -lne 's/\r//; print "$_";' c2.vcf > c2_clean.vcf
perl -lne 's/\r//; print "$_";' c3.vcf > c3_clean.vcf

# copy the vcf2maf.pl
cp ~/softwares/vcf2maf/mskcc-vcf2maf-47c4a18/vcf2maf.pl .

# Run the vcf2maf tool
perl vcf2maf.pl --input-vcf /n/scratch3/users/a/ajn16/VCF/vcf2/c1_clean.vcf --output-maf maf/c1.maf --inhibit-vep --ref-fasta /n/shared_db/bcbio/biodata/genomes/Hsapiens/hg38/seq/hg38.fa.gz --filter-vcf 0

perl vcf2maf.pl --input-vcf /n/scratch3/users/a/ajn16/VCF/vcf2/c2_clean.vcf --output-maf maf/c2.maf --inhibit-vep --ref-fasta /n/shared_db/bcbio/biodata/genomes/Hsapiens/hg38/seq/hg38.fa.gz --filter-vcf 0

perl vcf2maf.pl --input-vcf /n/scratch3/users/a/ajn16/VCF/vcf2/c3_clean.vcf --output-maf maf/c3.maf --inhibit-vep --ref-fasta /n/shared_db/bcbio/biodata/genomes/Hsapiens/hg38/seq/hg38.fa.gz --filter-vcf 0

```
