### Initial data

In [None]:
# We will need the files after RNA-sequencing (1 forward, 2 reverse):
# V300093791_L01_89_1.fq.gz V300093791_L01_89_2.fq.gz
# V300093791_L01_90_1.fq.gz V300093791_L01_90_2.fq.gz
# V300093791_L01_91_1.fq.gz V300093791_L01_91_2.fq.gz
# V300093791_L01_92_1.fq.gz V300093791_L01_92_2.fq.gz
# V300093791_L01_93_1.fq.gz V300093791_L01_93_2.fq.gz
# V300093791_L01_94_1.fq.gz V300093791_L01_94_2.fq.gz
# V300093791_L01_95_1.fq.gz V300093791_L01_95_2.fq.gz
# V300093791_L01_96_1.fq.gz V300093791_L01_96_2.fq.gz

# These files are acquired with MGISEQ platform. More info can be seen in this file:
# Probes.xlsx

### Quality control

FastQC

In [None]:
fastqc *.fq.gz -o ./fastqc_analysis

In [None]:
# Results:
# V300093791_L01_89_1_fastqc.html V300093791_L01_89_2_fastqc.html
# V300093791_L01_90_1_fastqc.html V300093791_L01_90_2_fastqc.html
# V300093791_L01_91_1_fastqc.html V300093791_L01_91_2_fastqc.html
# V300093791_L01_92_1_fastqc.html V300093791_L01_92_2_fastqc.html
# V300093791_L01_93_1_fastqc.html V300093791_L01_93_2_fastqc.html
# V300093791_L01_94_1_fastqc.html V300093791_L01_94_2_fastqc.html
# V300093791_L01_95_1_fastqc.html V300093791_L01_95_2_fastqc.html
# V300093791_L01_96_1_fastqc.html V300093791_L01_96_2_fastqc.html

MultiQC

In [None]:
multiqc ./fastqc_analysis

In [None]:
# Result: 
# multiqc_report.html

### Quality correction

Bowtie2 | rRNA filtering

In [None]:
# firstly, we need to create an index

# for reference we used:
# dmel-all-rRNA-r6.41.fasta

# the command looks like this:
bowtie2-build dmel-all-rRNA-r6.41.fasta folder

In [None]:
# then, we can filter
# the command looks like this:
bowtie2 -x ./ -1 file_1.fq.qz -2 file_2.fq.gz --un-conc-gz ./alignments/file --threads 12

# we have a .sh script:
# bowtie2_align
# with a following content:

#!/bin/bash
for infile in *_1.fq.gz
	do
	base=$(basename ${infile} _1.fq.gz)
	bowtie2 -x ../align/bowtie2_rrna_index/rrna -1 ${infile} -2 ${base}_2.fq.gz --un-conc-gz ../align/bowtie2_rrna_alignments/${base} --threads 12
	done

Trimmomatic 0.39 | Adapter/quality trimming

In [None]:
# the command looks like this:
java -jar /usr/share/java/trimmomatic-0.39.jar PE -threads 12 file_1.fq.gz file_2.fq.gz file_1_p.fq.gz file_1_up.fq.gz file_2_p.fq.gz file_2_up.fq.gz LEADING:20 TRAILING:20 SLIDINGWINDOW:4:15 ILLUMINACLIP:mgiadapters.fasta:2:30:10

# we have a .sh script:
# autotrim.sh
# with the following content:

#!/bin/bash
for infile in *_1.fq.gz
	do
	base=$(basename ${infile} _1.fq.gz)
	java -jar /usr/share/java/trimmomatic-0.39.jar PE -threads 12 ${infile} ${base}_2.fq.gz ${base}_1_p.fq.gz ${base}_1_up.fq.gz ${base}_2_p.fq.gz ${base}_2_up.fq.gz ILLUMINACLIP:mgiadapters.fasta:2:30:10 LEADING:20 TRAILING:20 SLIDINGWINDOW:4:15
	done

In [None]:
# A reference for adapter trimming:
# mgiadapters.fasta

# Compiled from MGI official file:
# Oligos-and-primers-for-BGISEQ&amp;DNBSEQ-NGS-system.pdf

### Alignment

Salmon | Alignment on transcripts

In [None]:
# firstly, we need to create an index

# for reference we used:
# dmel-all-transcript-r6.41.fasta

# the command looks like this:
salmon index -t dmel-all-transcript-r6.41.fasta -i transcript_index_salmon

In [None]:
# for this alignment we created  a file that shows transcript to gene connection:
# tr2gene.csv

# we have a .sh script:
# create_genemap.sh
# with the following content:

# #!/bin/bash
# when doing new research, substitute dmel-all-transcript-r6.41.fasta by any transcript of choice and change the paths.

grep -o "\bID=\w*" ../ref_rna/functional/dmel-all-transcript-r6.41.fasta > ID.txt
grep -o "\bparent=\w*" ../ref_rna/functional/dmel-all-transcript-r6.41.fasta > parent.txt
wc -l parent.txt; wc -l ID.txt && echo "Please, perform equality check" || echo "Error"
cut -d= -f2 ID.txt > ID
cut -d= -f2 parent.txt > parent
paste ID parent > tr2gene_transcript.csv
cp tr2gene_transcript.csv ../ref_rna/functional
rm -f ID.txt parent.txt ID parent

In [None]:
# then, we can align

# the command looks like this:

# we have a .sh script:
# autoalign_transcript.sh
# with the following content:
salmon quant -i ./transcript -l A -1 file_1.fq.gz -2 file_2.fq.gz -p 12 -g tr2gene.csv -o ./alignments

#!/bin/bash
for infile in *_1.fq.gz
	do
	base=$(basename ${infile} _1.fq.gz)
	salmon quant -i ../ref_rna/functional/transcript -l A -1 ${infile} -2 ${base}_2.fq.gz -p 12 -g tr2gene_transcript.csv -o ../align/transcript_${base}
	done