Permalink
Please sign in to comment.
363
annotateM
| @@ -0,0 +1,363 @@ | ||
| +#!/usr/bin/env perl | ||
| +############################################################################### | ||
| +# | ||
| +# annotateM | ||
| +# | ||
| +# The idea here is to produce a tab-delimited file of all the annotation | ||
| +# pipelines for manual curation afterwards. | ||
| +# | ||
| +# Copyright (C) Mohamed Fauzi Haroon | ||
| +# | ||
| +# This program is free software: you can redistribute it and/or modify | ||
| +# it under the terms of the GNU General Public License as published by | ||
| +# the Free Software Foundation, either version 3 of the License, or | ||
| +# (at your option) any later version. | ||
| +# | ||
| +# This program is distributed in the hope that it will be useful, | ||
| +# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| +# GNU General Public License for more details. | ||
| +# | ||
| +# You should have received a copy of the GNU General Public License | ||
| +# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| +# | ||
| +############################################################################### | ||
| + | ||
| +#pragmas | ||
| +use strict; | ||
| +use warnings; | ||
| + | ||
| +#core Perl modules | ||
| +use Getopt::Long; | ||
| +use Carp; | ||
| + | ||
| +#CPAN modules | ||
| + | ||
| +#locally-written modules | ||
| + | ||
| +BEGIN { | ||
| + select(STDERR); | ||
| + $| = 1; | ||
| + select(STDOUT); | ||
| + $| = 1; | ||
| +} | ||
| + | ||
| +# edit here to log all external commands | ||
| +my $global_log_commands = 0; | ||
| + | ||
| +# ext command failure levels | ||
| +use constant { | ||
| + IGNORE_FAILURE => 0, | ||
| + WARN_ON_FAILURE => 1, | ||
| + DIE_ON_FAILURE => 2 | ||
| +}; | ||
| + | ||
| +# get input params and print copyright | ||
| +printAtStart(); | ||
| +my $global_options = checkParams(); | ||
| + | ||
| +###################################################################### | ||
| +# CODE HERE | ||
| +###################################################################### | ||
| + | ||
| +# check that the file exists | ||
| +checkFileExists($global_options->{'in'}); | ||
| + | ||
| +# run prokka to generate the ORFs and also prokka annotations | ||
| +checkAndRunCommand("prokka", [{ | ||
| + "--locustag" => $global_options->{'locustag'}, | ||
| + "--outdir" => "prokka_annotation", | ||
| + "--prefix" => $global_options->{'locustag'}, | ||
| + "--kingdom" => $global_options->{'kingdom'}, | ||
| + "--cpus" => $global_options->{'threads'}, | ||
| + "--keep_names", | ||
| + $global_options->{'in'}, | ||
| + }], DIE_ON_FAILURE); | ||
| + | ||
| +# identify the ORF called amino acid fasta file for blast-ing | ||
| +my $locus = $global_options->{'locustag'}; | ||
| + | ||
| +# blast against img | ||
| +if (! -e "./$locus.faaVSimg.blastp") | ||
| +{ | ||
| +print "BLASTing against IMG 4.0 database\n"; | ||
| +checkAndRunCommand("cat", | ||
| + [[ | ||
| + "prokka_annotation/$locus.faa |", | ||
| + "parallel", | ||
| + "--block"=> "100k", | ||
| + "--recstart", | ||
| + "'>'", | ||
| + "--pipe", | ||
| + "blastp", | ||
| + -db => "/srv/db/img/4.0/dereplicated/img_dereplicated_species.genes.faa", | ||
| + -outfmt => 6, | ||
| + -max_target_seqs => 1, | ||
| + -query => "-", | ||
| + "> $locus.faaVSimg.blastp", | ||
| + #-num_threads => $global_options->{'threads'}, | ||
| + ]], DIE_ON_FAILURE); | ||
| +} | ||
| + | ||
| +# blast against uniref | ||
| +if (! -e "./$locus.faaVSuniref90.blastp") | ||
| +{ | ||
| +print "BLASTing against Uniref90 database\n"; | ||
| +checkAndRunCommand("cat",[[ | ||
| + "prokka_annotation/$locus.faa |", | ||
| + "parallel", | ||
| + "--block"=> "100k", | ||
| + "--recstart", | ||
| + "'>'", | ||
| + "--pipe", | ||
| + "blastp", | ||
| + -db => "/srv/whitlam/home/users/uqmharoo/Uniref_db/uniref90.fasta", | ||
| + -outfmt => 6, | ||
| + -max_target_seqs => 1, | ||
| + -query => "-", | ||
| + "> $locus.faaVSuniref90.blastp", | ||
| + #-num_threads => $global_options->{'threads'}, | ||
| + ]], DIE_ON_FAILURE); | ||
| +} | ||
| +###################################################################### | ||
| +# CUSTOM SUBS | ||
| +###################################################################### | ||
| + | ||
| +###################################################################### | ||
| +# TEMPLATE SUBS | ||
| + | ||
| +###################################################################### | ||
| +# PARAMETERS | ||
| + | ||
| +sub checkParams { | ||
| + #----- | ||
| + # Do any and all options checking here... | ||
| + # | ||
| + my @standard_options = ( "help|h+", "in|i:s", "locustag|l:s", "kingdom|k:s", "threads|t:s"); | ||
| + my %options; | ||
| + | ||
| + # Add any other command line options, and the code to handle them | ||
| + # | ||
| + GetOptions( \%options, @standard_options ); | ||
| + | ||
| + # if no arguments supplied print the usage and exit | ||
| + # | ||
| + exec("pod2usage $0") if (0 == (keys (%options) )); | ||
| + | ||
| + # If the -help option is set, print the usage and exit | ||
| + # | ||
| + exec("pod2usage $0") if $options{'help'}; | ||
| + | ||
| + # Compulsory items | ||
| + #if(!exists $options{''} ) { printParamError (""); } | ||
| + if(!exists $options{'in'} ) { printParamError ("You MUST supply a fasta file"); } | ||
| + | ||
| + return \%options; | ||
| +} | ||
| + | ||
| +sub printParamError | ||
| +{ | ||
| + #----- | ||
| + # What to do if there's something wrong with a parameter | ||
| + # | ||
| + my ($error) = @_; | ||
| + print "**ERROR: $0 : $error\n"; exec("pod2usage $0"); | ||
| +} | ||
| + | ||
| +sub overrideDefault | ||
| +{ | ||
| + #----- | ||
| + # Set and override default values for parameters | ||
| + # | ||
| + my ($default_value, $option_name) = @_; | ||
| + if(exists $global_options->{$option_name}) | ||
| + { | ||
| + return $global_options->{$option_name}; | ||
| + } | ||
| + return $default_value; | ||
| +} | ||
| + | ||
| +###################################################################### | ||
| +# FILE IO | ||
| + | ||
| +sub openWrite | ||
| +{ | ||
| + #----- | ||
| + # Open a file for writing | ||
| + # | ||
| + my ($fn) = @_; | ||
| + open my $fh, ">", $fn or croak "**ERROR: could not open file: $fn for writing $!\n"; | ||
| + return $fh; | ||
| +} | ||
| + | ||
| +sub openRead | ||
| +{ | ||
| + #----- | ||
| + # Open a file for reading | ||
| + # | ||
| + my ($fn) = @_; | ||
| + open my $fh, "<", $fn or croak "**ERROR: could not open file: $fn for reading $!\n"; | ||
| + return $fh; | ||
| +} | ||
| + | ||
| +###################################################################### | ||
| +# EXTERNAL COMMANDS | ||
| +# | ||
| +# checkAndRunCommand("ls", { | ||
| +# -a => "" | ||
| +# }, | ||
| +# WARN_ON_FAILURE); | ||
| + | ||
| +sub checkFileExists { | ||
| + #----- | ||
| + # Does a file exists? | ||
| + # | ||
| + my ($file) = @_; | ||
| + unless(-e $file) { | ||
| + croak "**ERROR: $0 : Cannot find:\n$file\n"; | ||
| + } | ||
| +} | ||
| + | ||
| +sub logExternalCommand | ||
| +{ | ||
| + #----- | ||
| + # Log a command line command to the command line! | ||
| + # | ||
| + if(1 == $global_log_commands) { | ||
| + print $_[0], "\n"; | ||
| + } | ||
| +} | ||
| + | ||
| +sub isCommandInPath | ||
| +{ | ||
| + #----- | ||
| + # Is this command in the path? | ||
| + # | ||
| + my ($cmd, $failure_type) = @_; | ||
| + if (system("which $cmd |> /dev/null")) { | ||
| + handleCommandFailure($cmd, $failure_type); | ||
| + } | ||
| +} | ||
| + | ||
| +sub runExternalCommand | ||
| +{ | ||
| + #----- | ||
| + # Run a command line command on the command line! | ||
| + # | ||
| + my ($cmd) = @_; | ||
| + logExternalCommand($cmd); | ||
| + system($cmd); | ||
| +} | ||
| + | ||
| +sub checkAndRunCommand | ||
| +{ | ||
| + #----- | ||
| + # Run external commands more sanelier | ||
| + # | ||
| + my ($cmd, $params, $failure_type) = @_; | ||
| + | ||
| + isCommandInPath($cmd, $failure_type); | ||
| + | ||
| + # join the parameters to the command | ||
| + my $param_str = join " ", map {formatParams($_)} @{$params}; | ||
| + | ||
| + my $cmd_str = $cmd . " " . $param_str; | ||
| + | ||
| + print $cmd_str; | ||
| + logExternalCommand($cmd_str); | ||
| + | ||
| + # make sure that all went well | ||
| + if (system($cmd_str)) { | ||
| + handleCommandFailure($cmd_str, $failure_type) | ||
| + } | ||
| +} | ||
| + | ||
| +sub formatParams { | ||
| + | ||
| + #--------- | ||
| + # Handles and formats the different ways of passing parameters to | ||
| + # checkAndRunCommand | ||
| + # | ||
| + my $ref = shift; | ||
| + | ||
| + if (ref($ref) eq "ARRAY") { | ||
| + return join(" ", @{$ref}); | ||
| + } elsif (ref($ref) eq "HASH") { | ||
| + return join(" ", map { $_ . " " . $ref->{$_}} keys %{$ref}); | ||
| + } | ||
| + croak 'The elements of the $params argument in checkAndRunCommand can ' . | ||
| + 'only contain references to arrays or hashes\n'; | ||
| +} | ||
| + | ||
| + | ||
| +sub handleCommandFailure { | ||
| + #----- | ||
| + # What to do when all goes bad! | ||
| + # | ||
| + my ($cmd, $failure_type) = @_; | ||
| + if (defined($failure_type)) { | ||
| + if ($failure_type == DIE_ON_FAILURE) { | ||
| + croak "**ERROR: $0 : " . $! . "\n"; | ||
| + } elsif ($failure_type == WARN_ON_FAILURE) { | ||
| + carp "**WARNING: $0 : " . $! . "\n"; | ||
| + } | ||
| + } | ||
| +} | ||
| + | ||
| + | ||
| +###################################################################### | ||
| +# MISC | ||
| + | ||
| +sub printAtStart { | ||
| +print<<"EOF"; | ||
| +---------------------------------------------------------------- | ||
| + $0 | ||
| + Copyright (C) Mohamed Fauzi Haroon | ||
| + | ||
| + This program comes with ABSOLUTELY NO WARRANTY; | ||
| + This is free software, and you are welcome to redistribute it | ||
| + under certain conditions: See the source for more details. | ||
| +---------------------------------------------------------------- | ||
| +EOF | ||
| +} | ||
| + | ||
| +__DATA__ | ||
| + | ||
| +=head1 NAME | ||
| + | ||
| + annotateM | ||
| + | ||
| +=head1 COPYRIGHT | ||
| + | ||
| + copyright (C) Mohamed Fauzi Haroon | ||
| + | ||
| + This program is free software: you can redistribute it and/or modify | ||
| + it under the terms of the GNU General Public License as published by | ||
| + the Free Software Foundation, either version 3 of the License, or | ||
| + (at your option) any later version. | ||
| + | ||
| + This program is distributed in the hope that it will be useful, | ||
| + but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| + GNU General Public License for more details. | ||
| + | ||
| + You should have received a copy of the GNU General Public License | ||
| + along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| + | ||
| +=head1 DESCRIPTION | ||
| + | ||
| + Insert detailed description here | ||
| + | ||
| +=head1 SYNOPSIS | ||
| + | ||
| + annotateM -i fasta_file | ||
| + | ||
| + -i FASTA_FILE Nucleotide fasta file | ||
| + -l locustag Name of locus tag | ||
| + -k kingdom (Bacteria/Archaea) Kingdom of genome to be annotated | ||
| + -t threads Number of threads | ||
| + [-help -h] Displays basic usage information | ||
| + | ||
| +=cut | ||
| + | ||
| + |
0 comments on commit
fa8a808