### File baseNames

In [7]:
fBase = [
    '04_zfUV',
    '05_zfUV',
    '06_zfUV',
    '07_zfUV',
    '08_zfS',
    '09_zfS',
    '10_zfS',
    '11_zfS']

## Create command for _**trimmomatic**_

In [None]:


class trimCMD:
    def __init__(self,fname):
        self.dPath = '/data/angueyraaristjm/20181018_RNAseq'
        self.tPath = '/data/angueyraaristjm/20181018_Trimmed'
        self.cmd = f"java -jar $TRIMMOJAR PE -threads 8 -phred33 {dPath}/{fname}_R1.fastq.gz {dPath}/{fname}_R2.fastq.gz {tPath}/{fname}_fP.fq.gz {tPath}/{fname}_fU.fq.gz {tPath}/{fname}_rP.fq.gz {tPath}/{fname}_rU.fq.gz ILLUMINACLIP:/usr/local/apps/trimmomatic/Trimmomatic-0.36/adapters/TruSeq2-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:20 MINLEN:30"

# cmd = f"java -jar $TRIMMOJAR PE -phred33 {dPath}/{fBase}_R1.fastq.gz {dPath}/{fBase}_R2.fastq.gz {tPath}/{fBase}_fP.fq.gz {tPath}/{fBase}_fU.fq.gz {tPath}/{fBase}_rP.fq.gz {tPath}/{fBase}_rU.fq.gz ILLUMINACLIP:/usr/local/apps/trimmomatic/Trimmomatic-0.36/adapters/TruSeq2-PE.fa:2:30:10 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:20 MINLEN:7"

In [None]:
a=trimCMD(fBase[7])
a.cmd

### Create file for a single pair of files

In [None]:
f=open("/Users/angueyraaristjm/Documents/LiMolec/zfRNAseq/trim_04.sh","w+")
f.write("#!/bin/bash\n\n")
f.write("module load trimmomatic || exit 1\n")
f.write(trimCMD(fBase[0]).cmd)
f.close() 


### Create **swarm** files for all files

In [None]:
f=open("/Users/angueyraaristjm/Documents/LiMolec/zfRNAseq/trim_all.swarm","w+")
for file in fBase: 
    f.write(trimCMD(file).cmd)
    f.write('\n')
f.close() 


In [None]:
# sbatch --mem=32g --cpus-per-task=12 rnaseq/trim_04.sh


# java -jar $TRIMMOJAR PE -threads 8 -phred33 input1a input1b [...]

# swarm -f trim_all.swarm -g 32 -t 8 --module trimmomatic

### Run _**fastQC**_ on trimmed sequences

In [None]:
# fastqc -o /data/angueyraaristjm/20181018_RNAseq_fastqc/ -f fastq /data/angueyraaristjm/20181018_RNAseq/08_zfS_R1.fastq.gz
class fqcTrimCMD:
    def __init__(self,fname):
        self.tPath = '/data/angueyraaristjm/20181018_Trimmed'
        self.fQCPath = self.tPath + '/fastQC'
        self.tsuf = ['fP','fU','rP','rU']
        self.cmds=[]
        for i in range(len(self.tsuf)):
            self.cmds.append(f"fastqc -o {self.fQCPath} -f fastq {tPath}/{fname}_{self.tsuf[i]}.fq.gz")
    
    def writeCMD(self,swarmfile):
        for cmd in self.cmds:
            swarmfile.write(cmd)
            swarmfile.write('\n')

In [None]:
f=open("/Users/angueyraaristjm/Documents/LiMolec/zfRNAseq/fastQC_trimmed_all.swarm","w+")
for file in fBase:
    fqcTrimCMD(file).writeCMD(f)
f.close()

# mkdir /data/angueyraaristjm/20181018_Trimmed/fastQC
# swarm -f rnaseq/fastQC_trimmed_all.swarm -g 10 --module fastqc

### Run _**HiSat2**_ alignment on all files

In [1]:
# hisat2 -p 8 --rna-strandness FR -x /data/angueyraaristjm/zfGenome/danRer11_index -1 /data/angueyraaristjm/20181018_Trimmed/04_zfUV_fP.fq.gz -2 /data/angueyraaristjm/20181018_Trimmed/04_zfUV_rP.fq.gz -S /data/angueyraaristjm/20181018_aligned/04_zfUV_aligned.sam --summary /data/angueyraaristjm/20181018_aligned/alignedStats/04_zfUV_alignStats.txt
class hisatCMD:
    def __init__(self,fname):
        self.alignPath = '/data/angueyraaristjm/20181018_aligned'
        self.indexPath = '/data/angueyraaristjm/zfGenome/danRer11_index'
        self.tPath = '/data/angueyraaristjm/20181018_Trimmed'
        self.cmd = f"hisat2 -p 8 --rna-strandness FR -x {self.indexPath} -1 {self.tPath}/{fname}_fP.fq.gz -2 {self.tPath}/{fname}_rP.fq.gz -S {self.alignPath}/{fname}_aligned.sam --summary {self.alignPath}/alignedStats/{fname}_alignStats.txt"
    
    def writeCMD(self,swarmfile):
        swarmfile.write(self.cmd)
        swarmfile.write('\n')

In [8]:
f=open("/Users/angueyraaristjm/Documents/LiMolec/zfRNAseq/rnaseq_cmds/hisat2Align_all.swarm","w+")
for file in fBase:
    hisatCMD(file).writeCMD(f)
f.close()

### sort aligned sam files by chromosome location and save as .bam

In [9]:
# samtools view -Su /data/angueyraaristjm/20181018_aligned/04_zfUV_aligned.sam | samtools sort -o /data/angueyraaristjm/20181018_bamSorted/04_zfUV_sorted.bam
class bamsortCMD:
    def __init__(self,fname):
        self.alignPath = '/data/angueyraaristjm/20181018_aligned'
        self.bamPath = '/data/angueyraaristjm/20181018_bamSorted'
        self.cmd = f"samtools view -Su {self.alignPath}/{fname}_aligned.sam | samtools sort -o {self.bamPath}/{fname}_sorted.bam"
    
    def writeCMD(self,swarmfile):
        swarmfile.write(self.cmd)
        swarmfile.write('\n')

In [10]:
f=open("/Users/angueyraaristjm/Documents/LiMolec/zfRNAseq/rnaseq_cmds/sortAligned_all.swarm","w+")
for file in fBase:
    bamsortCMD(file).writeCMD(f)
f.close()

### assemble transcriptome using _**Stringtie**_

In [15]:
class stringtieCMD:
    def __init__(self,fname):
        self.gtfPath = '/data/angueyraaristjm/20181018_assembly'
        self.bamPath = '/data/angueyraaristjm/20181018_bamSorted'
        self.cmd = f"stringtie {self.bamPath}/{fname}_sorted.bam -p 8 --rf -o {self.gtfPath}/{fname}.gtf"
    
    def writeCMD(self,swarmfile):
        swarmfile.write(self.cmd)
        swarmfile.write('\n')


In [16]:
f=open("/Users/angueyraaristjm/Documents/LiMolec/zfRNAseq/rnaseq_cmds/assemble_all.swarm","w+")
for file in fBase:
    stringtieCMD(file).writeCMD(f)
f.close()

### Make stringtie output to be used by ballgown

In [19]:
class stringtiedeCMD:
    def __init__(self,fname):
        self.gtfPath = '/data/angueyraaristjm/20181018_assembly'
        self.bamPath = '/data/angueyraaristjm/20181018_bamSorted'
        self.dePath = '/data/angueyraaristjm/20181018_DE'
        self.cmd = f"stringtie -e -B -p 8 -G {self.gtfPath}/zfAssembly_merged.gtf -o {self.dePath}/{fname}.gtf {self.bamPath}/{fname}_sorted.bam"
        
    
    def writeCMD(self,swarmfile):
        swarmfile.write(self.cmd)
        swarmfile.write('\n')

In [21]:
f=open("/Users/angueyraaristjm/Documents/LiMolec/zfRNAseq/rnaseq_cmds/stringtieDE_all.swarm","w+")
for file in fBase:
    stringtiedeCMD(file).writeCMD(f)
f.close()

