**Unzip all files**

```
find . -name "*.gz" | while read filename; do gunzip "`dirname "$filename"`" "$filename"; done;
```

**Move all the files fom inside a folder to outside**
```
find . -name '*.fastq' -exec mv {} . \;
find . -depth -type d -empty -exec rmdir {} \;
```

**Rename the files**
```
for file in *; do mv "${file}" "${file/_001/}"; done # Remove defined string from name
for file in *; do mv "${file}" "${file/_L001/}"; done
for file in *; do mv $file ${file:3}; done # Remove first 3 characters
```

**Download the reference**
```
mkdir reference
wget ftp://ftp.ensembl.org/pub/release-96/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz
wget ftp://ftp.ensembl.org/pub/release-96/gtf/homo_sapiens/Homo_sapiens.GRCh38.96.gtf.gz
gunzip *
```

**Create a sample description file**
```
(echo 'samplename,description'; for f in raw_files/*fastq*; do readlink -f $f | perl -pe 's/(.*?_(S[0-9]+)_.*)/\1,\2/'; done) > alignment.csv 
```

**Create YAML file**
```
vim O2.yaml
details:
  - analysis: RNA-seq
    genome_build: hg38
    algorithm:
      transcriptome_fasta: /n/scratch2/ajit/round0_pickseq/reference/Homo_sapiens.GRCh38.cdna.all.fa
      transcriptome_gtf: /n/scratch2/ajit/round0_pickseq/reference/Homo_sapiens.GRCh38.96.gtf
      aligner: hisat2
      strandedness: unstranded
      tools_on: [bcbiornaseq]
      bcbiornaseq:
          organism: homo sapiens
          interesting_groups: cells
upload:
  dir: ../final
 ```
 
**Intiate bcBio**
```
module load bcbio/latest
unset PYTHONPATH
bcbio_nextgen.py -w template O2.yaml alignment.csv raw_files/
```

**Run bcBio**
```
cd alignment/work
vim submit_bcbio.sh

#!/bin/sh
#SBATCH -p medium
#SBATCH -J bcbio_O2
#SBATCH -o run.o
#SBATCH -e run.e
#SBATCH -t 1-00:00
#SBATCH --cpus-per-task=3
#SBATCH --mem=64G
#SBATCH --mail-type=END         # Type of email notification- BEGIN,END,FAIL,ALL
#SBATCH --mail-user=ajitj_nirmal@dfci.harvard.edu   # Email to which notifications will be sent

export PATH=/n/app/bcbio/tools/bin:$PATH
bcbio_nextgen.py ../config/alignment.yaml \
    -n 24 -t ipython -s slurm -q medium -r t=1-00:00 --timeout 2000

sbatch submit_bcbio.sh
```

## For manuscript


**Move all the files fom inside a folder to outside**

```
find . -name '*.gz' -exec mv {} raw/ \;
find . -depth -type d -empty -exec rmdir {} \;
```

**Rename the files**

```
for file in *; do mv "${file}" "${file/_001/}"; done # Remove defined string from name
for file in *; do mv "${file}" "${file/_L001/}"; done
for file in *; do mv "${file}" `echo $file | sed 's/^[^_]*_//g'`; done # remove everything before the first underscore
```

**Create a sample description file**

```
(echo 'samplename,description'; for f in raw/*.gz; do readlink -f $f | perl -pe 's/(.*?_(S[0-9]+)_.*)/\1,\2/'; done) > alignment.csv 
```


**Create YAML file**

```
details:
  - analysis: RNA-seq
    genome_build: hg38
    algorithm:
      aligner: hisat2
      strandedness: unstranded
upload:
  dir: ../final
```

**Intiate bcBio**

```
module load bcbio/latest
unset PYTHONPATH
bcbio_nextgen.py -w template O2.yaml alignment.csv raw/
```

**Run bcBio**

```
#!/bin/sh
#SBATCH -p priority
#SBATCH -J breast
#SBATCH -o run.o
#SBATCH -e run.e
#SBATCH -t 10-00:00
#SBATCH --cpus-per-task=20
#SBATCH --mem=150G
#SBATCH --mail-type=END         # Type of email notification- BEGIN,END,FAIL,ALL
#SBATCH --mail-user=ajitj_nirmal@dfci.harvard.edu   # Email to which notifications will be sent

export PATH=/n/app/bcbio/tools/bin:$PATH
bcbio_nextgen.py ../config/alignment.yaml \
    -n 24 -t local
```

**Submit job to O2 for processing**

```
cp submit_bcbio.sh alignment/work
cd alignment/work
sbatch submit_bcbio.sh
```
