# RNA-Seq Workflow by @furkanmtorun 
### [furkanmtorun@gmail.com](mailto:furkanmtorun@gmail.com)  | GitHub: [@furkanmtorun](https://github.com/furkanmtorun)  | [Google Scholar](https://scholar.google.com/citations?user=d5ZyOZ4AAAAJ) | [Personal Website](https://furkanmtorun.github.io/) 

### Libraries , packages and required functions

In [None]:
# +--------------------------------------------------+
# Import required librarys & packages
# +--------------------------------------------------+
import glob2
import subprocess

# +--------------------------------------------------+
# Define folders and bin for tools
# +--------------------------------------------------+
fastq_folder, genome_folder, index_folder, bam_sam_folder, logs_folder, results_folder, \
FastQC_bin, STAR_bin, cufflinks_bin, bowtie_bin, TopHat_bin, R_bin = ["./files/fastq/", 
            "./files/genome/", "./files/index/", "./files/bam_sam/", "./files/logs/", "./files/results/", "./softs/FastQC/", 
            "./softs/STAR-2.7.3a/bin/Linux_x86_64/", "./softs/cufflinks-2.2.1.Linux_x86_64/", 
            "./softs/bowtie2-2.3.5.1-linux-x86_64/", "./softs/tophat-2.1.1.Linux_x86_64/", 
            "C:/Program Files/R/R-3.6.1/bin/i386/"]


# +--------------------------------------------------+
# Define files
# +--------------------------------------------------+
fasta_files = " ".join(glob2.glob(genome_folder + "*.fa*"))
gtf_files = " ".join(glob2.glob(genome_folder + "*.gtf*"))
fastq_files = " ".join(glob2.glob(fastq_folder + "*.fastq*"))


# +--------------------------------------------------+
# The function for messages
# +--------------------------------------------------+
def msg_output(text):
    msg_txt = "\n# +" + "-"*len(text) + "+\n> {}\n# +" + "-"*len(text) + "+\n"
    print(msg_txt.format(text))
    

# +--------------------------------------------------+
# Execute and track the shell commands
# +--------------------------------------------------+
def run_command(command):
    try:
        return subprocess.check_output(command, shell=True)
    except (Exception, TypeError):
        msg_output("! Error!: Your command was:\n\t" + command)

        
# +--------------------------------------------------+
# Execute and track the shell commands
# +--------------------------------------------------+
def confirmation_runCommand(command):
    msg_output("Your command is:\n\t" + command)
    qa = input("> Are you OK with that command? Type 'YES' or 'NO': ")
    if qa.upper() == "YES":
        output = run_command(command).decode("utf-8")
        msg_output(output)
    elif qa.upper() == "NO":
        print("! You can change the command and then, re-run the cell")
    else:
        print("! Just type YES or NO: Please, re-run the cell")

### Quality Control using FastQC

In [None]:
fastqc_command = "{}fastqc {}*.fastq* -f fastq -o {}".format(FastQC_bin, fastq_folder, results_folder + "QC_reports")
confirmation_runCommand(fastqc_command)

### Curation of Genome Index using STAR

In [None]:
run_mode = "genomeGenerate"
number_of_threads = input("> Number Of Threads: ")
overhang_number = input("> Overhang (ideally: ReadLength - 1): ")
extra_option = input("> Paste your extra options: \nUse formal manual: https://raw.githubusercontent.com/alexdobin/STAR/921a50b1b4730a2c8b6bffc03b85081e9de3f777/doc/STARmanual.pdf \nExample: --limitSjdbInsertNsj 4000 --limitGenomeGenerateRAM 269860224 --genomeSAindexNbases 12\n")
if True == number_of_threads.isdigit() and True == overhang_number.isdigit():
    if len(glob2.glob(genome_folder+"*.fasta")) > 0:
        run_command("gzip {}".format(genome_folder + "*fasta")).decode("utf-8")
    indexing_command = "{}STAR --runThreadN {} --runMode {} --genomeDir {} --genomeFastaFiles {} --sjdbGTFfile {} --sjdbOverhang {} {}" \
                        .format(STAR_bin, number_of_threads, run_mode, index_folder, fasta_files, gtf_files, overhang_number, extra_option)
    confirmation_runCommand(indexing_command)
else:
    msg_output("Check the number of threads and overhang number you have typed!")
