Permalink
Switch branches/tags
Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 383 lines (315 sloc) 9.87 KB
#!/usr/bin/env perl
###############################################################################
#
# annotateM
#
# The idea here is to produce a tab-delimited file of all the annotation
# pipelines for manual curation afterwards.
#
# Copyright (C) Mohamed Fauzi Haroon
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
###############################################################################
#pragmas
use strict;
use warnings;
#core Perl modules
use Getopt::Long;
use Carp;
#CPAN modules
#locally-written modules
BEGIN {
select(STDERR);
$| = 1;
select(STDOUT);
$| = 1;
}
# edit here to log all external commands
my $global_log_commands = 0;
# ext command failure levels
use constant {
IGNORE_FAILURE => 0,
WARN_ON_FAILURE => 1,
DIE_ON_FAILURE => 2
};
# get input params and print copyright
printAtStart();
my $global_options = checkParams();
######################################################################
# CODE HERE
######################################################################
# check that the file exists
checkFileExists($global_options->{'in'});
# run prokka to generate the ORFs and also prokka annotations
checkAndRunCommand("prokka", [{
"--locustag" => $global_options->{'locustag'},
"--outdir" => "prokka_annotation",
"--prefix" => $global_options->{'locustag'},
"--kingdom" => $global_options->{'kingdom'},
"--cpus" => $global_options->{'threads'},
"--keep_names",
$global_options->{'in'},
}], DIE_ON_FAILURE);
# identify the ORF called amino acid fasta file for blast-ing
my $locus = $global_options->{'locustag'};
# blast against img
if (! -e "./$locus.faaVSimg.blastp")
{
print "BLASTing against IMG 4.0 database...............\n";
checkAndRunCommand("cat",
[[
"prokka_annotation/$locus.faa |",
"parallel",
"--block"=> "100k",
"--recstart",
"'>'",
"--pipe",
"blastp",
-db => "/srv/db/img/4.0/dereplicated/img_dereplicated_species.genes.faa",
-outfmt => 6,
-max_target_seqs => 1,
-evalue => $global_options->{'evalue'},
-query => "-",
"> $locus.faaVSimg.blastp",
#-num_threads => $global_options->{'threads'},
]], DIE_ON_FAILURE);
}
# reciprocal blast of img positive hits to genome ORF
if (! -e "./subsetimgVS$locus.faa.blastp")
{
print "Reciprocal BLASTing positive IMG hits to $locus.faa ...............\n";
checkAndRunCommand("contig_extractor.pl",
[[
-i => "$locus.faaVSimg.blastp",
-d => "/srv/db/img/4.0/dereplicated/img_dereplicated_species.genes.faa",
-b => '',
-S => '',
-o => "subsetimgVS$locus.faa.blastp",
]]
# blast against uniref
if (! -e "./$locus.faaVSuniref90.blastp")
{
print "BLASTing against Uniref90 database................\n";
checkAndRunCommand("cat",[[
"prokka_annotation/$locus.faa |",
"parallel",
"--block"=> "100k",
"--recstart",
"'>'",
"--pipe",
"blastp",
-db => "/srv/whitlam/home/users/uqmharoo/Uniref_db/uniref90.fasta",
-outfmt => 6,
-max_target_seqs => 1,
-evalue => $global_options->{'evalue'},
-query => "-",
"> $locus.faaVSuniref90.blastp",
#-num_threads => $global_options->{'threads'},
]], DIE_ON_FAILURE);
}
######################################################################
# CUSTOM SUBS
######################################################################
######################################################################
# TEMPLATE SUBS
######################################################################
# PARAMETERS
sub checkParams {
#-----
# Do any and all options checking here...
#
my @standard_options = ( "help|h+", "in|i:s", "locustag|l:s", "kingdom|k:s", "threads|t:s", "evalue|e:s");
my %options;
# Add any other command line options, and the code to handle them
#
GetOptions( \%options, @standard_options );
# if no arguments supplied print the usage and exit
#
exec("pod2usage $0") if (0 == (keys (%options) ));
# If the -help option is set, print the usage and exit
#
exec("pod2usage $0") if $options{'help'};
# Compulsory items
#if(!exists $options{''} ) { printParamError (""); }
if(!exists $options{'in'} ) { printParamError ("You MUST supply a fasta file"); }
return \%options;
}
sub printParamError
{
#-----
# What to do if there's something wrong with a parameter
#
my ($error) = @_;
print "**ERROR: $0 : $error\n"; exec("pod2usage $0");
}
sub overrideDefault
{
#-----
# Set and override default values for parameters
#
my ($default_value, $option_name) = @_;
if(exists $global_options->{$option_name})
{
return $global_options->{$option_name};
}
return $default_value;
}
######################################################################
# FILE IO
sub openWrite
{
#-----
# Open a file for writing
#
my ($fn) = @_;
open my $fh, ">", $fn or croak "**ERROR: could not open file: $fn for writing $!\n";
return $fh;
}
sub openRead
{
#-----
# Open a file for reading
#
my ($fn) = @_;
open my $fh, "<", $fn or croak "**ERROR: could not open file: $fn for reading $!\n";
return $fh;
}
######################################################################
# EXTERNAL COMMANDS
#
# checkAndRunCommand("ls", {
# -a => ""
# },
# WARN_ON_FAILURE);
sub checkFileExists {
#-----
# Does a file exists?
#
my ($file) = @_;
unless(-e $file) {
croak "**ERROR: $0 : Cannot find:\n$file\n";
}
}
sub logExternalCommand
{
#-----
# Log a command line command to the command line!
#
if(1 == $global_log_commands) {
print $_[0], "\n";
}
}
sub isCommandInPath
{
#-----
# Is this command in the path?
#
my ($cmd, $failure_type) = @_;
if (system("which $cmd |> /dev/null")) {
handleCommandFailure($cmd, $failure_type);
}
}
sub runExternalCommand
{
#-----
# Run a command line command on the command line!
#
my ($cmd) = @_;
logExternalCommand($cmd);
system($cmd);
}
sub checkAndRunCommand
{
#-----
# Run external commands more sanelier
#
my ($cmd, $params, $failure_type) = @_;
isCommandInPath($cmd, $failure_type);
# join the parameters to the command
my $param_str = join " ", map {formatParams($_)} @{$params};
my $cmd_str = $cmd . " " . $param_str;
print $cmd_str;
logExternalCommand($cmd_str);
# make sure that all went well
if (system($cmd_str)) {
handleCommandFailure($cmd_str, $failure_type)
}
}
sub formatParams {
#---------
# Handles and formats the different ways of passing parameters to
# checkAndRunCommand
#
my $ref = shift;
if (ref($ref) eq "ARRAY") {
return join(" ", @{$ref});
} elsif (ref($ref) eq "HASH") {
return join(" ", map { $_ . " " . $ref->{$_}} keys %{$ref});
}
croak 'The elements of the $params argument in checkAndRunCommand can ' .
'only contain references to arrays or hashes\n';
}
sub handleCommandFailure {
#-----
# What to do when all goes bad!
#
my ($cmd, $failure_type) = @_;
if (defined($failure_type)) {
if ($failure_type == DIE_ON_FAILURE) {
croak "**ERROR: $0 : " . $! . "\n";
} elsif ($failure_type == WARN_ON_FAILURE) {
carp "**WARNING: $0 : " . $! . "\n";
}
}
}
######################################################################
# MISC
sub printAtStart {
print<<"EOF";
----------------------------------------------------------------
$0
Copyright (C) Mohamed Fauzi Haroon
This program comes with ABSOLUTELY NO WARRANTY;
This is free software, and you are welcome to redistribute it
under certain conditions: See the source for more details.
----------------------------------------------------------------
EOF
}
__DATA__
=head1 NAME
annotateM
=head1 COPYRIGHT
copyright (C) Mohamed Fauzi Haroon
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
=head1 DESCRIPTION
Want to annotate your genome? annotateM!
=head1 SYNOPSIS
annotateM -i fasta_file
-i FASTA_FILE Nucleotide fasta file
-l locustag Name of locus tag
-k kingdom (Bacteria/Archaea) Kingdom of genome to be annotated
-t threads Number of threads
-e evalue Evalue for BLAST, recommend 1e-3
[-help -h] Displays basic usage information
=cut