Permalink
Switch branches/tags
Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 907 lines (788 sloc) 27 KB
#!/usr/bin/env perl
###############################################################################
#
# annotateM
#
# The idea here is an automated way of annotating your genome based on
# multiple available databases and to produce a tab-delimited file of
# all the annotations, evalues, scores, descriptions.
#
# Suggested workflow:
# 1) run your genome nucleotide fasta file through annotateM
# 2) generate a tab-delimited file
# 3) open the file in ms excel or oo calc
# 4) manually curate the annotations based on evalues/scores/desc etc
# 5) metabolic reconstruction of organism
#
# Copyright (C) Mohamed Fauzi Haroon
# Special appearance from Adam Skarshewski
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
###############################################################################
#pragmas
use strict;
use warnings;
#core Perl modules
use Getopt::Long;
use Carp;
use Data::Dumper;
#CPAN modules
#locally-written modules
BEGIN {
select(STDERR);
$| = 1;
select(STDOUT);
$| = 1;
}
# edit here to log all external commands
my $global_log_commands = 0;
# ext command failure levels
use constant {
IGNORE_FAILURE => 0,
WARN_ON_FAILURE => 1,
DIE_ON_FAILURE => 2
};
# get input params and print copyright
printAtStart();
my $global_options = checkParams();
######################################################################
# CODE HERE
######################################################################
# check that the file exists
checkFileExists($global_options->{'in'});
if (! -e "./prokka_annotation/")
{
print "prokka\n";
# run prokka to generate the ORFs and also prokka annotations
checkAndRunCommand("prokka", [{
"--locustag" => $global_options->{'locustag'},
"--outdir" => "prokka_annotation",
"--prefix" => $global_options->{'locustag'},
"--kingdom" => $global_options->{'kingdom'},
"--cpus" => $global_options->{'threads'},
"--keep_names",
$global_options->{'in'},
}], DIE_ON_FAILURE);
}
# identify the ORF called amino acid fasta file for blast-ing
my $locus = $global_options->{'locustag'};
# blast against img
if (! -e "./$locus.faaVSimg.blastp")
{
print "BLASTing against IMG 4.0 database...............\n";
checkAndRunCommand("cat",
[[
"prokka_annotation/$locus.faa |",
"parallel",
"--block"=> "100k",
"--recstart",
"'>'",
"--pipe",
"blastp",
-db => "/srv/db/img/4.0/dereplicated/img_dereplicated_species.genes.faa",
-outfmt => 6,
-max_target_seqs => 1,
-evalue => $global_options->{'evalue'},
-query => "-",
"> $locus.faaVSimg.blastp",
]], DIE_ON_FAILURE);
}
# reciprocal blast of img positive hits against genome ORF
if (! -e "./subsetimg.faaVS$locus.faa.blastp")
{
print "Reciprocal BLASTing positive IMG hits to $locus.faa ...............\n";
checkAndRunCommand("contig_extractor.pl",
[[
-i => "$locus.faaVSimg.blastp",
-d => "/srv/db/img/4.0/dereplicated/img_dereplicated_species.genes.faa",
-b => '',
-S => '',
-o => "subsetimg.faa",
]], DIE_ON_FAILURE);
checkAndRunCommand("makeblastdb",
[[
-in => "prokka_annotation/$locus.faa",
-dbtype => "prot",
]], DIE_ON_FAILURE);
checkAndRunCommand("blastp",
[[
-query => "subsetimg.faa",
-db => "prokka_annotation/$locus.faa",
-outfmt => 6,
-max_target_seqs => 1,
-evalue => $global_options->{'evalue'},
-num_threads => $global_options->{'threads'},
-out => "subsetimg.faaVS$locus.faa.blastp",
]], DIE_ON_FAILURE);
}
# blast against uniref
if (! -e "./$locus.faaVSuniref90.blastp")
{
print "BLASTing against Uniref90 database................\n";
checkAndRunCommand("cat",[[
"prokka_annotation/$locus.faa |",
"parallel",
"--block"=> "100k",
"--recstart",
"'>'",
"--pipe",
"blastp",
-db => "/srv/db/uniprot/uniref-20140403/uniref90.fasta",
-outfmt => 6,
-max_target_seqs => 1,
-evalue => $global_options->{'evalue'},
-query => "-",
"> $locus.faaVSuniref90.blastp",
#-num_threads => $global_options->{'threads'},
]], DIE_ON_FAILURE);
}
# reciprocal blast of Uniref positive hits against genome ORF
if (! -e "./subsetuniref.faaVS$locus.faa.blastp")
{
print "Reciprocal BLASTing positive Uniref hits to $locus.faa ...............\n";
checkAndRunCommand("contig_extractor.pl",
[[
-i => "$locus.faaVSuniref90.blastp",
-d => "/srv/db/uniprot/uniref-20140403/uniref90.fasta",
-b => '',
-S => '',
-o => "subsetuniref.faa",
]], DIE_ON_FAILURE);
checkAndRunCommand("blastp",
[[
-query => "subsetuniref.faa",
-db => "prokka_annotation/$locus.faa",
-outfmt => 6,
-max_target_seqs => 1,
-evalue => $global_options->{'evalue'},
-num_threads => $global_options->{'threads'},
-out => "subsetuniref.faaVS$locus.faa.blastp",
]], DIE_ON_FAILURE);
}
# blast against COG
if (! -e "./$locus.faaVSCOG.blastp")
{
print "BLASTing against COG database................\n";
checkAndRunCommand("cat",[[
"prokka_annotation/$locus.faa |",
"parallel",
"--block"=> "100k",
"--recstart",
"'>'",
"--pipe",
"blastp",
-db => "/srv/db/cog/cog_blast_prot_db",
-outfmt => 6,
-max_target_seqs => 1,
-evalue => $global_options->{'evalue'},
-query => "-",
"> $locus.faaVSCOG.blastp",
#-num_threads => $global_options->{'threads'},
]], DIE_ON_FAILURE);
}
# HMMSCAN against PFAM
if (! -e "./$locus.faaVSPfam-A.hmm.hmmscanned")
{
print "HMMscanning against Pfam database................\n";
checkAndRunCommand("pfam_scan.pl",[[
-cpu => $global_options->{'threads'},
-e_seq => $global_options->{'evalue'},
-outfile => "$locus.faaVSPfam-A.hmm.hmmscanned",
-fasta => "prokka_annotation/$locus.faa",
-dir => "/srv/db/pfam/27",
]], DIE_ON_FAILURE);
}
# HMMSCAN against TIGRfam
if (! -e "./$locus.faaVStigr_all.hmm.hmmscanned")
{
print "HMMscanning against TIGRfam database................\n";
checkAndRunCommand("hmmscan",[[
"--tblout",
"$locus.faaVStigr_all.hmm.hmmscanned",
"--noali",
-E => $global_options->{'evalue'},
"--cpu",
$global_options->{'threads'},
"/srv/db/tigrfam/14.0/TIGRFAMs_14.0_HMM/tigr_all.hmm",
"prokka_annotation/$locus.faa",
]], DIE_ON_FAILURE);
}
# convert the hmmscan output to tab delimited
checkAndRunCommand("awk",[[
"'{\$1=\$1}{ print }'",
"$locus.faaVSPfam-A.hmm.hmmscanned",
"| sed 's/\\s/\\t/g'",
"> $locus.faaVSPfam-A.hmm.hmmscanned.tab",
]], DIE_ON_FAILURE);
checkAndRunCommand("awk",[[
"'{\$1=\$1}{ print }'",
"$locus.faaVStigr_all.hmm.hmmscanned",
"| sed 's/\\s/\\t/g'",
"> $locus.faaVStigr_all.hmm.hmmscanned.tab",
]], DIE_ON_FAILURE);
# hashes for img
my %hash = ();
my %hash2 = ();
my %hash3 =();
# read the img blast output and store in hash
open my $IMGblast, "./$locus.faaVSimg.blastp", or die "Couldn't open file $locus.faaVSimg.blastp\n";
while (<$IMGblast>)
{
chomp $_;
my @columns = split (/\t/, $_);
# store the orfid which is in columns[0]
my $orfid = $columns[0];
if ($columns[11] > 60)
{
# key is $orfid while the value of the key in $columns[1] is the IMG id
$hash{$orfid} = $columns[1];
$hash{$columns[1]} = $columns[0];
$hash2{$columns[1]} = "$columns[0]\t$columns[10]\t$columns[11]";
}
}
# read img id2names.txt
open my $imgid2names, "/srv/db/img/4.0/dereplicated/id2names.txt", or die "Couldn't open id2names.txt\n";
open my $img_temp_OUT, ">img_output_temp.txt";
while (<$imgid2names>)
{
chomp $_;
my @columns = split (/\t/, $_);
if (exists $hash2{$columns[0]})
{
$hash3{$columns[0]} = "$hash2{$columns[0]}\t$columns[1]\t$columns[2]";
print {$img_temp_OUT} "$hash2{$columns[0]}\t$columns[1]\t$columns[2]\n";
}
}
close($IMGblast);
close($imgid2names);
close($img_temp_OUT);
# read my reciprocal img blast output and store in hash
open my $rIMGblast, "./subsetimg.faaVS$locus.faa.blastp", or die "Couldn't open file subsetimg.faaVS$locus.faa.blastp\n";
open my $img_temp_OUT2, ">img_output_temp2.txt";
while (<$rIMGblast>)
{
chomp $_;
my @columns = split (/\t/, $_);
if (exists $hash{$columns[0]})
{
print {$img_temp_OUT2} "$hash3{$columns[0]}\treciprocal\n";
}
}
close($img_temp_OUT2);
# hashes for uniref
my %hash4 = ();
my %hash5 =();
my %hash6 = ();
# read uniref blast and store in hash
open my $unirefblast, "./$locus.faaVSuniref90.blastp", or die "Couldn't open file $locus.faaVSuniref90.blastp\n";
while (<$unirefblast>)
{
chomp $_;
my @columns = split (/\t/, $_);
if ($columns[11] > 60)
{
$hash4{$columns[0]} = $columns[1];
$hash4{$columns[1]} = $columns[0];
$hash5{$columns[1]} = "$columns[0]\t$columns[10]\t$columns[11]";
}
}
# read uniref id2names.txt
open my $unirefid2names, "/srv/db/uniprot/uniref-20140403/uniref90_id2names.txt", or die "Couldn't open id2names.txt\n";
open my $uniref_temp_OUT, ">uniref_output_temp.txt";
while (<$unirefid2names>)
{
chomp $_;
my @columns = split (/\t/, $_);
if (exists $hash5{$columns[0]})
{
$hash6{$columns[0]} = "$hash5{$columns[0]}\t$columns[1]\t$columns[2]";
print {$uniref_temp_OUT} "$hash5{$columns[0]}\t$columns[1]\t$columns[2]\n";
}
}
close($unirefblast);
close($unirefid2names);
close($uniref_temp_OUT);
# read my reciprocal img blast output and store in hash
open my $runirefblast, "./subsetuniref.faaVS$locus.faa.blastp", or die "Couldn't open file subsetuniref.faaVS$locus.faa.blastp\n";
open my $uniref_temp_OUT2, ">uniref_output_temp2.txt";
while (<$runirefblast>)
{
chomp $_;
my @columns = split (/\t/, $_);
if (exists $hash4{$columns[0]})
{
print {$uniref_temp_OUT2} "$hash6{$columns[0]}\treciprocal\n";
}
}
close($uniref_temp_OUT2);
# hashes for pfam
my %hash7 = ();
my %hash8 = ();
# read pfam hmmscan output and store in hash
open my $pfamoutput, "./$locus.faaVSPfam-A.hmm.hmmscanned.tab", or die "Couldn't open file $locus.faaVSPfam-A.hmm.hmmscanned.tab\n";
while (<$pfamoutput>)
{
next if /^\s*(#.*)?$/;
next if $pfamoutput =~ /^#/;
next if $pfamoutput =~ /^=/;
chomp $_;
my @columns = split (/\t/, $_);
if ($columns[11] > 60)
{
my @pfam_columns = split (/\./, $columns[5]);
my $pfam_id = $pfam_columns[0];
$hash7{$columns[0]} = $pfam_columns[0];
$hash7{$pfam_columns[0]} = $columns[0];
$hash8{$pfam_columns[0]} = "$columns[0]\t$columns[12]\t$columns[11]";
}
}
# read Pfam-A.clans.tsv
open my $pfamid2names, "/srv/db/pfam/27/Pfam-A.clans.tsv", or die "Couldn't open Pfam-A.clans.tsv\n";
open my $pfam_temp_OUT, ">pfam_output_temp.txt";
while (<$pfamid2names>)
{
chomp $_;
my @columns = split (/\t/, $_);
if (exists $hash8{$columns[0]})
{
print {$pfam_temp_OUT} "$hash8{$columns[0]}\t$columns[4]\n";
}
}
close($pfamoutput);
close($pfamid2names);
close($pfam_temp_OUT);
# hashes for tigrfam
my %hash9 = ();
my %hash10 = ();
# read tigrfam hmmscan output and store in hash
open my $tigrfamoutput, "./$locus.faaVStigr_all.hmm.hmmscanned.tab", or die "Couldn't open file $locus.faaVStigr_all.hmm.hmmscanned.tab\n";
while (<$tigrfamoutput>)
{
next if /^\s*(#.*)?$/;
next if $tigrfamoutput =~ /^#/;
chomp $_;
my @columns = split (/\t/, $_);
if ($columns[5] > 10)
{
$hash9{$columns[2]} = $columns[0];
$hash9{$columns[0]} = $columns[2];
$hash10{$columns[0]} = "$columns[2]\t$columns[4]\t$columns[5]";
}
}
# read tigrfam id2names2description
open my $tigrfamid2names, "/srv/db/tigrfam/14.0/TIGRFAMs_14.0_INFO/tigr_info_combined.parsed", or die "Couldn't open tigr_info_combined.parsed\n";
open my $tigrfam_temp_OUT, ">tigrfam_output_temp.txt";
while (<$tigrfamid2names>)
{
chomp $_;
my @columns = split (/\t/, $_);
$columns[0] =~ s/^\s+//;
$columns[0] =~ s/\s+$//;
if (exists $hash10{$columns[0]})
{
print {$tigrfam_temp_OUT} "$hash10{$columns[0]}\t$columns[1]\t$columns[2]\n";
}
}
close($tigrfamoutput);
close($tigrfamid2names);
close($tigrfam_temp_OUT);
# hashes for cog
my %hash11 = ();
my %hash12 = ();
my %hash13 = ();
# read cog blastp output and store in hash
open my $cogblast, "./$locus.faaVSCOG.blastp", or die "Couldn't open file $locus.faaVSCOG.blastp\n";
while (<$cogblast>)
{
chomp $_;
my @columns = split (/\t/, $_);
if ($columns[11] > 60)
{
$hash11{$columns[0]} = $columns[1];
$hash11{$columns[1]} = $columns[0];
$hash12{$columns[1]} = "$columns[0]\t$columns[10]\t$columns[11]";
}
}
# read cog prot2COG.tab
open my $cogid2names, "/srv/db/cog/prot2COG.tab", or die "Couldn't open prot2COG.tab\n";
open my $cog_temp_OUT, "> cog_output_temp.txt";
while (<$cogid2names>)
{
chomp $_;
my @columns = split (/\t/, $_);
if (exists $hash12{$columns[0]})
{
$hash13{$columns[0]} = "$hash12{$columns[0]}\t$columns[1]";
$hash13{$columns[1]} = $hash12{$columns[0]};
print {$cog_temp_OUT} "$hash12{$columns[0]}\t$columns[1]\n";
}
}
close($cogblast);
close($cogid2names);
close($cog_temp_OUT);
# read cog listcogs.txt
open my $cogid2longernames, "/srv/db/cog/listcogs.txt", or die "Couldn't open listcogs.txt\n";
open my $cog_temp_OUT2, "> cog_output_temp2.txt";
while(<$cogid2longernames>)
{
chomp $_;
my @columns = split (/\t/, $_);
if (exists $hash13{$columns[5]})
{
print {$cog_temp_OUT2} "$hash13{$columns[5]}\t$columns[3]\t$columns[4]\t$columns[6]\n";
}
}
close($cog_temp_OUT2);
### now to parse all the temporary files and combine into one tab-delimited-file
# to store the IDs => DB => values/annotations
my %combined_bighash =();
# open file for output
open my $FINAL_OUTPUT, "> ./final_output.txt";
# print header
print {$FINAL_OUTPUT} "ORF_ID\timg_evalue\timg_score\timg_gene\timg_organism\timg_reciprocal\tuniref_evalue\tuniref_score\tuniref_gene\tuniref_organism\tuniref_reciprocal\tprokka_gene\tcog_evalues\tcog_scores\tcog_classes\tcog_gene_acronyms\tcog_genes\tpfam_evalues\tpfam_scores\tpfam_genes\ttigrfam_evalues\ttigrfam_scores\ttigrfam_genes\ttigrfam_descriptions\n";
# img
open my $img_annotation, "./img_output_temp2.txt", or die "Couldn't open img_output_temp2.txt\n";
while (<$img_annotation>)
{
chomp $_;
my @columns = split (/\t/, $_);
my @baba = @columns[1..$#columns];
#print "@baba \n";
#$combined_bighash{$columns[0]}->{'img'} = join("\t", @baba);
push @{$combined_bighash{$columns[0]}->{'a-img'}}, join("\t", @baba);
}
# uniref
open my $uniref_annotation, "./uniref_output_temp2.txt", or die "Couldn't open uniref_output_temp2.txt\n";
while (<$uniref_annotation>)
{
chomp $_;
my @columns = split (/\t/, $_);
my @baba = @columns[1..$#columns];
#$combined_bighash{$columns[0]}->{'uniref'} = join("\t", @baba);
push @{$combined_bighash{$columns[0]}->{'b-uniref'}}, join("\t", @baba);
}
# prokka
# need to parse faa file to give prokka id2names
checkAndRunCommand("grep",[[
"'>'",
"prokka_annotation/$locus.faa |",
"sed",
"'s/>//g' |",
"sed",
-e => "'s/ /\\t/'",
"> prokka_temp_output.txt",
]], DIE_ON_FAILURE);
open my $prokka_annotation, "./prokka_temp_output.txt", or die "Couldn't open prokka_temp_output.txt\n";
while (<$prokka_annotation>)
{
chomp $_;
my @columns = split (/\t/, $_);
my @baba = @columns[1..$#columns];
#$combined_bighash{$columns[0]}->{'prokka'} = join("\t", @baba);
push @{$combined_bighash{$columns[0]}->{'c-prokka'}}, join("\t", @baba);
}
# cog
open my $cog_annotation, "./cog_output_temp2.txt", or die "Couldn't open cog_output_temp2.txt\n";
while (<$cog_annotation>)
{
chomp $_;
my @columns = split (/\t/, $_);
my @baba = @columns[1..$#columns];
#$combined_bighash{$columns[0]}->{'cog'} = join("\t", @baba);
push @{$combined_bighash{$columns[0]}->{'d-cog'}}, join("\t", @baba);
}
# pfam
open my $pfam_annotation, "./pfam_output_temp.txt", or die "Couldn't open pfam_output_temp.txt\n";
while (<$pfam_annotation>)
{
chomp $_;
my @columns = split (/\t/, $_);
my @baba = @columns[1..$#columns];
#$combined_bighash{$columns[0]}->{'pfam'} = join("\t", @baba);
push @{$combined_bighash{$columns[0]}->{'e-pfam'}}, join("\t", @baba);
}
#print Dumper \%combined_bighash;
# tigrfam
open my $tigrfam_annotation, "./tigrfam_output_temp.txt", or die "Couldn't open tigrfam_output_temp.txt\n";
while (<$tigrfam_annotation>)
{
chomp $_;
my @columns = split (/\t/, $_);
my @baba = @columns[1..$#columns];
#$combined_bighash{$columns[0]}->{'tigrfam'} = join("\t", @baba);
push @{$combined_bighash{$columns[0]}->{'f-tigrfam'}}, join("\t", @baba);
}
# to print finally.................
# assign key and value for number of expected columns for each annotation type, important for putting NA in missing annotation types
my %column_lengths = (
'a-img' => 5,
'b-uniref' => 5,
'c-prokka' => 1,
'd-cog' => 5,
'e-pfam' => 3,
'f-tigrfam' => 4,
);
# print the orfids first
foreach my $ID (sort(keys %combined_bighash))
{
print {$FINAL_OUTPUT} "$ID\t";
foreach my $annotation_type (sort(keys %column_lengths))
{
# if the annotation type does not exist, print NA in the columns depending on the %column_lengths hash values above
if (! exists $combined_bighash{$ID}->{$annotation_type})
{
# cool way of printing a certain string multiple times based on the values in the hash
print {$FINAL_OUTPUT} join("\t", ("NA",) x $column_lengths{$annotation_type}), "\t";
}
# check the derefencing made with @{$combined_bighash{$columns[0]}->{'f-tigrfam'}} and so on..
# the derefencing allows the hash be converted into an array so that we can read the hash for the different types of annotation types
elsif (ref($combined_bighash{$ID}->{$annotation_type}) eq 'ARRAY')
{
# place to store the columns in the values of the hash annotation types
my @storage_array;
foreach my $line (@{$combined_bighash{$ID}->{$annotation_type}})
{
# each annotation types have different number of columns, so we need to split the columns first before
# we can add in the extra values if lets say an orfid hits multiple pfam/cog/tigrfam values
my @values = split("\t",$line);
# cool and alternate way of doing columns[1] = values[1], and so on.., repetitively
# what it basically means as long as the value i less than the number of columns in each annotation type,
# add +1 to the string $i and do the push below
for (my $i = 0; $i <= $#values; $i++)
{
push @{$storage_array[$i]}, $values[$i];
}
}
#print Dumper(\@storage_array);
# array to store the multiple hits in each column.. for example test0001 orfid hits multiple pfam values like pf0008 anf pf0010 etc..
# so we would like to have the values combined together in the same column delimited by a comma
my @print_info_array;
for (my $i = 0; $i < $column_lengths{$annotation_type}; $i++)
{
push @print_info_array, join(", ", @{$storage_array[$i]});
}
#print Dumper(\@print_info_array);
print {$FINAL_OUTPUT} join("\t", @print_info_array), "\t";
}
else
{
print {$FINAL_OUTPUT} "$combined_bighash{$ID}{$annotation_type}\t";
}
}
print {$FINAL_OUTPUT} "\n";
}
#close all files
close($img_annotation);
close($uniref_annotation);
close($prokka_annotation);
close($cog_annotation);
close($pfam_annotation);
close($tigrfam_annotation);
close($FINAL_OUTPUT);
######################################################################
# CUSTOM SUBS
######################################################################
# who needs custom subs...
######################################################################
# TEMPLATE SUBS
######################################################################
# PARAMETERS
sub checkParams {
#-----
# Do any and all options checking here...
#
my @standard_options = ( "help|h+", "in|i:s", "locustag|l:s", "kingdom|k:s", "threads|t:s", "evalue|e:s");
my %options;
# Add any other command line options, and the code to handle them
#
GetOptions( \%options, @standard_options );
# if no arguments supplied print the usage and exit
#
exec("pod2usage $0") if (0 == (keys (%options) ));
# If the -help option is set, print the usage and exit
#
exec("pod2usage $0") if $options{'help'};
# Compulsory items
#if(!exists $options{''} ) { printParamError (""); }
if(!exists $options{'in'} ) { printParamError ("You MUST supply a fasta file"); }
return \%options;
}
sub printParamError
{
#-----
# What to do if there's something wrong with a parameter
#
my ($error) = @_;
print "**ERROR: $0 : $error\n"; exec("pod2usage $0");
}
sub overrideDefault
{
#-----
# Set and override default values for parameters
#
my ($default_value, $option_name) = @_;
if(exists $global_options->{$option_name})
{
return $global_options->{$option_name};
}
return $default_value;
}
#####################################################################
# FILE IO
sub openWrite
{
#-----
# Open a file for writing
#
my ($fn) = @_;
open my $fh, ">", $fn or croak "**ERROR: could not open file: $fn for writing $!\n";
return $fh;
}
sub openRead
{
#-----
# Open a file for reading
#
my ($fn) = @_;
open my $fh, "<", $fn or croak "**ERROR: could not open file: $fn for reading $!\n";
return $fh;
}
######################################################################
# EXTERNAL COMMANDS
#
# checkAndRunCommand("ls", {
# -a => ""
# },
# WARN_ON_FAILURE);
sub checkFileExists {
#-----
# Does a file exists?
#
my ($file) = @_;
unless(-e $file) {
croak "**ERROR: $0 : Cannot find:\n$file\n";
}
}
sub logExternalCommand
{
#-----
# Log a command line command to the command line!
#
if(1 == $global_log_commands) {
print $_[0], "\n";
}
}
sub isCommandInPath
{
#-----
# Is this command in the path?
#
my ($cmd, $failure_type) = @_;
if (system("which $cmd |> /dev/null")) {
handleCommandFailure($cmd, $failure_type);
}
}
sub runExternalCommand
{
#-----
# Run a command line command on the command line!
#
my ($cmd) = @_;
logExternalCommand($cmd);
system($cmd);
}
sub checkAndRunCommand
{
#-----
# Run external commands more sanelier
#
my ($cmd, $params, $failure_type) = @_;
isCommandInPath($cmd, $failure_type);
# join the parameters to the command
my $param_str = join " ", map {formatParams($_)} @{$params};
my $cmd_str = $cmd . " " . $param_str;
print "The command currently running:\t$cmd_str\n";
logExternalCommand($cmd_str);
# make sure that all went well
if (system($cmd_str)) {
handleCommandFailure($cmd_str, $failure_type)
}
}
sub formatParams {
#---------
# Handles and formats the different ways of passing parameters to
# checkAndRunCommand
#
my $ref = shift;
if (ref($ref) eq "ARRAY") {
return join(" ", @{$ref});
} elsif (ref($ref) eq "HASH") {
return join(" ", map { $_ . " " . $ref->{$_}} keys %{$ref});
}
croak 'The elements of the $params argument in checkAndRunCommand can ' .
'only contain references to arrays or hashes\n';
}
sub handleCommandFailure {
#-----
# What to do when all goes bad!
#
my ($cmd, $failure_type) = @_;
if (defined($failure_type)) {
if ($failure_type == DIE_ON_FAILURE) {
croak "**ERROR: $0 : " . $! . "\n";
} elsif ($failure_type == WARN_ON_FAILURE) {
carp "**WARNING: $0 : " . $! . "\n";
}
}
}
######################################################################
# MISC
sub printAtStart {
print<<"EOF";
----------------------------------------------------------------
$0
annotateM - annotate my genome
Due to the blast processes against multiple databases, this whole
annotation pipeline will usually take awhile. Please be patient!
----------------------------------------------------------------
EOF
}
__DATA__
=head1 NAME
annotateM
=head1 COPYRIGHT
Copyright (C) Mohamed Fauzi Haroon
Special appearance from Adam Skarshewski
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
=head1 DESCRIPTION
Want to annotate your genome? annotateM!
=head1 SYNOPSIS
annotateM -i [fasta_file] -l [locus] -k [Bacteria|Archaea] -t [threads] -e [evalue]
-i FASTA_FILE Nucleotide fasta file
-l locustag Name of locus tag
-k kingdom (Bacteria/Archaea) Kingdom of genome to be annotated
-t threads Number of threads
-e evalue Evalue for BLAST, recommended 1e-3
[-help -h] Displays basic usage information
=cut