In [1]:
import os
import os.path

In [2]:
settings = {
    "threads": 32,
    "input_file": os.path.abspath("./test_data/NC_000913.fna"),
    "binary_folder": os.path.abspath("./prokka/binaries/linux/"),
    "binary_folder_common": os.path.abspath("./prokka/binaries/common/"),
    "work_folder": os.path.abspath("./"),
    "output_folder": os.path.abspath("./result"),
    "genetic_code": "11",
}

tools_settings = {
    "aragorn_options": {
        "aragorn_binary": os.path.join(settings["binary_folder"], "aragorn"),
        "aragorn_input": settings["input_file"],
        "aragorn_output": os.path.join(settings["output_folder"], "aragorn.out"),
        "aragorn_options": "-l -gc%(genetic_code)s -w" % settings,
    },
    "barrnap_options": {
        "barrnap_binary": os.path.join(settings["binary_folder"], "barrnap"),
        "barrnap_options": "--kingdom bac --threads %(threads)s --quiet" % settings,
        "barrnap_output": os.path.join(settings["output_folder"], "barrnap.out"),
        "barrnap_input": settings["input_file"],
    },
    "parallel_options": {
        "parallel_binary": os.path.join(settings["binary_folder_common"], "parallel"),
        "parallel_input": settings["input_file"],
        "parallel_output": os.path.join(settings["output_folder"], "parallel.out"),
        "parallel_options": "--gnu --plain" % settings,
    },    
    "rnammer_options": {
        "rnammer_binary": os.path.join(settings["binary_folder"], "rnammer"),
        "rnammer_input": settings["input_file"],
        "rnammer_output": os.path.join(settings["output_folder"], "rnammer.out"),
        "rnammer_options": "-S bac -xml ./result/rnammer.xml" % settings,
    },
    "prodigal_options": {
        "prodigal_binary": os.path.join(settings["binary_folder"], "prodigal"),
        "prodigal_input": settings["input_file"],
        "prodigal_output": os.path.join(settings["output_folder"], "prodigal.out"),
        "prodigal_options": "-c -m -g 11 -p single -f sco -q" % settings,
    },
    "signalp_options": {
        "signalp_binary": os.path.join(settings["binary_folder"], "signalp"),
        "signalp_input": settings["input_file"],
        "signalp_output": os.path.join(settings["output_folder"], "signalp.out"),
        "signalp_options": "-tmp ./result -prefix ./result/signalp -org gram+ -format short -fasta" % settings,
    },
}

In [5]:
# create all dirictories 
for key, value in settings.items():
    if "folder" in key:
        if not os.path.isdir(value):
            os.makedirs(value)

In [6]:
# PARALLEL
parallel_command_pl = "parallel --gnu --plain"
parallel_command_py = "%(parallel_binary)s %(parallel_options)s" % tools_settings["parallel_options"]
print(parallel_command_py)
os.system(parallel_command_py)

/Users/miracle/super-duper-annotator/prokka/binaries/common/parallel --gnu --plain


0

In [8]:
# ARAGORN

aragorn_command_pl = "aragorn -l -gc$gcode $aragorn_opt -w \Q$outdir/$prefix.fna\E"

aragorn_command_py = "%(aragorn_binary)s %(aragorn_options)s -o %(aragorn_output)s %(aragorn_input)s" % tools_settings["aragorn_options"]
print(aragorn_command_py)
os.system(aragorn_command_py) #why return not 0

#

/Users/miracle/super-duper-annotator/prokka/binaries/linux/aragorn -l -gc11 -w -o /Users/miracle/super-duper-annotator/result/aragorn.out /Users/miracle/super-duper-annotator/test_data/NC_000913.fna


32256

In [9]:
with open(tools_settings["aragorn_options"]["aragorn_output"]) as fh:
    for line in fh:
        print(line.strip())

>NC_000913.3 Escherichia coli str. K-12 substr. MG1655, complete genome
89 genes found
1   tRNA-Ile               [225381,225457]	35  	(gat)
2   tRNA-Ala               [225500,225575]	34  	(tgc)
3   tRNA-Asp               [228928,229004]	35  	(gtc)
4   tRNA-Asp               [236931,237007]	35  	(gtc)
5   tRNA-Thr               [262871,262946]	34  	(cgt)
6   tRNA-Ser               [345334,345414]	38  	(gga)
7   tRNA-Arg               [564723,564799]	35  	(tct)
8   tRNA-Gln              c[696430,696504]	33  	(ctg)
9   tRNA-Gln              c[696542,696616]	33  	(ctg)
10  tRNA-Met              c[696664,696740]	35  	(cat)
11  tRNA-Gln              c[696756,696830]	33  	(ttg)
12  tRNA-Gln              c[696865,696939]	33  	(ttg)
13  tRNA-Leu              c[696963,697047]	35  	(tag)
14  tRNA-Met              c[697057,697133]	35  	(cat)
15  tRNA-Lys               [780554,780629]	34  	(ttt)
16  tRNA-Val               [780765,780840]	34  	(tac)
17  tRNA-Lys               [780843,780918]	34  	(

In [3]:
# BARRNAP

barrnap_command_pl = "barrnap --kingdom $barrnap_mode --threads $cpus --quiet \Q$outdir/$prefix.fna\E"

barrnap_command_raw_py = "barrnap --kingdom bac -thread 10 -quiet ./results/barrnupoutput." #it false maybe because barrnap_mode change depending on the kingdom

barrnap_command_py = "barrnap %(barrnap_options)s %(barrnap_input)s > %(barrnap_output)s" % tools_settings["barrnap_options"]
print(barrnap_command_py)
os.system(barrnap_command_py)

barrnap --kingdom bac --threads 32 --quiet /Users/miracle/super-duper-annotator/test_data/NC_000913.fna > /Users/miracle/super-duper-annotator/result/barrnap.out


0

In [11]:
with open(tools_settings["barrnap_options"]["barrnap_output"]) as fh:
    for line in fh:
        print(line.strip())

##gff-version 3
NC_000913.3	barrnap:0.9	rRNA	223774	225311	0	+	.	Name=16S_rRNA;product=16S ribosomal RNA
NC_000913.3	barrnap:0.9	rRNA	225761	228661	0	+	.	Name=23S_rRNA;product=23S ribosomal RNA
NC_000913.3	barrnap:0.9	rRNA	228760	228870	1.9e-11	+	.	Name=5S_rRNA;product=5S ribosomal RNA
NC_000913.3	barrnap:0.9	rRNA	2726074	2726184	1.9e-11	-	.	Name=5S_rRNA;product=5S ribosomal RNA
NC_000913.3	barrnap:0.9	rRNA	2726282	2729182	0	-	.	Name=23S_rRNA;product=23S ribosomal RNA
NC_000913.3	barrnap:0.9	rRNA	2729617	2731154	0	-	.	Name=16S_rRNA;product=16S ribosomal RNA
NC_000913.3	barrnap:0.9	rRNA	3423428	3423538	4.4e-11	-	.	Name=5S_rRNA;product=5S ribosomal RNA
NC_000913.3	barrnap:0.9	rRNA	3423673	3423783	1.9e-11	-	.	Name=5S_rRNA;product=5S ribosomal RNA
NC_000913.3	barrnap:0.9	rRNA	3423881	3426781	0	-	.	Name=23S_rRNA;product=23S ribosomal RNA
NC_000913.3	barrnap:0.9	rRNA	3427222	3428759	0	-	.	Name=16S_rRNA;product=16S ribosomal RNA
NC_000913.3	barrnap:0.9	rRNA	3941811	3943348	0	+	.	Name=16S_rRNA

In [44]:
# RNAMMER

rnammer_command_pl = "rnammer -S $rnammer_mode $rnammer_opt -xml \Q$rnammerfn\E \Q$outdir/$prefix.fna\E"

rnammer_mode = 'bac'
rnammer_opt = '$cpus != 1 ? "-multi" : "" '
#and other modes and options change too
rnammer_command_py = "rnammer %(rnammer_options)s %(rnammer_output)s" % tools_settings["rnammer_options"]
print(rnammer_command_py)
os.system(rnammer_command_py)

rnammer -S bac -xml ./result/rnammer.xml -o /Users/miracle/super-duper-annotator/result/rnammer.out /Users/miracle/super-duper-annotator/test_data/NC_000913.fna


32512

In [43]:
# PRODIGAL

prodigal_command_pl = "prodigal -i \Q$outdir/$prefix.fna\E -c -m -g $gcode -p $prodigal_mode -f sco -q"

prodigal_mode_pl = "$prodigal_mode = ($totalbp >= 100000 && !$metagenome) ? 'single' : 'meta'" 
#i think we have to if/else for choice 'single' or 'meta', but imn't sure
    
prodigal_command_py = "%(prodigal_binary)s -i %(prodigal_output)s %(prodigal_options)s" % tools_settings["prodigal_options"]
print(prodigal_command_py)
os.system(prodigal_command_py)

/Users/miracle/super-duper-annotator/prokka/binaries/linux/prodigal -i /Users/miracle/super-duper-annotator/result/prodigal.out -c -m -g 11 -p single -f sco -q


32256

In [52]:
# SIGNALP

signalp_command_pl = "$opts \Q$spoutfn\E 2> /dev/null"

#sigpver is version of SIGNALP
#gram = gram+ or gram-
#$opts = $sigpver==3 ? "signalp -t $gram -f short -m hmm" : ($sigpver==4 ? "signalp -t $gram -f short" : '$(which signalp)'." -tmp $outdir -prefix $outdir/signalp -org $gram -format short -fasta")
signalp_command_py = "signalp %(signalp_options)s %(signalp_output)s 2> /dev/null" % tools_settings["signalp_options"]
    # or signalp_command_py = "signalp -tmp ./result -prefix ./result/signalp -org" + ' ' + gram + ' ' + "-format short -fasta ./result/signalp.faa 2> /dev/null"
print(signalp_command_py)
os.system(signalp_command_py)

signalp -tmp ./result -prefix ./result/signalp -org gram+ -format short -fasta /Users/miracle/super-duper-annotator/result/signalp.out 2> /dev/null


32512

In [None]:
# MINCED

minced_command_pl = '-|', "minced -gff \Q$outdir/$prefix.fna\E"
# E - stop change case (lowercase and uppercase)

minced_command_py = "minced -gff ./result/$prefix.fna"

In [None]:
# CMSCAN

cmscan_command_pl = "cmscan -Z $dbsize --cut_ga --rfam --nohmmonly --fmt 2 --cpu $icpu --tblout /dev/stdout -o /dev/null --noali $cmdb \Q$outdir/$prefix.fna\E"
#total_bp - base pairs
dbsize = total_bp * 2 / 1000000
#my $icpu = $cpus || 1;
if cpus == True:
    icpu = cpus
else:
    icpu = 1
cmdp_pl = "$dbdir/cm/$kingdom"
cmdb = os.path.abspath(./prokka/db) + '/cm/' + kingdom
cmscan_command_pl = "cmscan -Z" + ' ' + dbsize + ' ' +  "--cut_ga --rfam --nohmmonly --fmt 2 --cpu" + ' ' +  icpu + ' ' +  "--tblout /dev/stdout -o /dev/null --noali" + ' ' + cmdb + ' ' +  "./result/$prefix.fna"

In [None]:
# CMPRESS

cmpress_command_pl = "cmpress \Q$cm\E"
cmpress_command_py = "cmpress " + os.path.abspath(./prokka/db) + '/cm/' + kingdom

In [4]:
# HMMER3

hmmer3_command_pl = "hmmscan --noali --notextw --acc -E %e --cpu 1 %d /dev/stdin"
#not input and i don't know if what %e and $d means

hmmscan --noali --notextw --acc -E %e --cpu 1 %d /dev/stdin


256

In [None]:
# HMMPRESS

hmmpress_command_pl = "hmmpress \Q$hmm\E"
hmmpress_command_py = "hmmpress " + os.path.abspath(./prokka/db) + '/cm/' + kingdom

In [1]:
# BLASTP

blast_command_pl = "blastp -query - -db %d -evalue %e -qcov_hsp_perc %c -num_threads 1 -num_descriptions 1 -num_alignments 1 -seg no"


In [None]:
# MAKEBLASTDB

makeblastdb_command1_pl = "makeblastdb -dbtype prot -in \Q$faa_file\E -out \Q$outdir/proteins\E -logfile /dev/null" #about proteins
makeblastdb_command2_pl = "makeblastdb -hash_index -dbtype prot -in \Q$fasta\E -logfile /dev/null" #kingdom
makeblastdb_command3_pl ="makeblastdb -hash_index -dbtype prot -in \Q$genus\E -logfile /dev/null" #genus

In [None]:
# TBL2ASN

#runcmd("tbl2asn -a s -q F -A $prefix -N 1 -y 'Annotated using $EXE $VERSION from $URL' -Z $outdir/$prefix.err -M n -V b -i $outdir/$prefix.fsa -f $outdir/$prefix.tbl 2> /dev/null");
#runcmd("tbl2asn -V b -a s -A $prefix -N 1 -y 'Annotated using $EXE $VERSION from $URL' -Z $outdir/$prefix.err -i $outdir/$prefix.fsa");
#runcmd("tbl2asn -V b -a s -N 1 -y 'Annotated using $EXE $VERSION from $URL' -Z $outdir/$prefix.err -i $outdir/$prefix.fsa 2> /dev/null");

tbl2asn_command_pl = "tbl2asn -V b -a r10k -l paired-ends $tbl2asn_opt -N $accver -y 'Annotated using $EXE $VERSION from $URL'". " -Z \Q$outdir/$prefix.err\E -i \Q$outdir/$prefix.fsa\E 2> /dev/null"