Permalink
Cannot retrieve contributors at this time
Fetching contributors…
| #!/usr/bin/env perl | |
| ############################################################################### | |
| # | |
| # annotateM | |
| # | |
| # The idea here is to produce a tab-delimited file of all the annotation | |
| # pipelines for manual curation afterwards. | |
| # | |
| # Copyright (C) Mohamed Fauzi Haroon | |
| # | |
| # This program is free software: you can redistribute it and/or modify | |
| # it under the terms of the GNU General Public License as published by | |
| # the Free Software Foundation, either version 3 of the License, or | |
| # (at your option) any later version. | |
| # | |
| # This program is distributed in the hope that it will be useful, | |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| # GNU General Public License for more details. | |
| # | |
| # You should have received a copy of the GNU General Public License | |
| # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
| # | |
| ############################################################################### | |
| #pragmas | |
| use strict; | |
| use warnings; | |
| #core Perl modules | |
| use Getopt::Long; | |
| use Carp; | |
| #CPAN modules | |
| #locally-written modules | |
| BEGIN { | |
| select(STDERR); | |
| $| = 1; | |
| select(STDOUT); | |
| $| = 1; | |
| } | |
| # edit here to log all external commands | |
| my $global_log_commands = 0; | |
| # ext command failure levels | |
| use constant { | |
| IGNORE_FAILURE => 0, | |
| WARN_ON_FAILURE => 1, | |
| DIE_ON_FAILURE => 2 | |
| }; | |
| # get input params and print copyright | |
| printAtStart(); | |
| my $global_options = checkParams(); | |
| ###################################################################### | |
| # CODE HERE | |
| ###################################################################### | |
| # check that the file exists | |
| checkFileExists($global_options->{'in'}); | |
| # run prokka to generate the ORFs and also prokka annotations | |
| checkAndRunCommand("prokka", [{ | |
| "--locustag" => $global_options->{'locustag'}, | |
| "--outdir" => "prokka_annotation", | |
| "--prefix" => $global_options->{'locustag'}, | |
| "--kingdom" => $global_options->{'kingdom'}, | |
| "--cpus" => $global_options->{'threads'}, | |
| "--keep_names", | |
| $global_options->{'in'}, | |
| }], DIE_ON_FAILURE); | |
| # identify the ORF called amino acid fasta file for blast-ing | |
| my $locus = $global_options->{'locustag'}; | |
| # blast against img | |
| if (! -e "./$locus.faaVSimg.blastp") | |
| { | |
| print "BLASTing against IMG 4.0 database...............\n"; | |
| checkAndRunCommand("cat", | |
| [[ | |
| "prokka_annotation/$locus.faa |", | |
| "parallel", | |
| "--block"=> "100k", | |
| "--recstart", | |
| "'>'", | |
| "--pipe", | |
| "blastp", | |
| -db => "/srv/db/img/4.0/dereplicated/img_dereplicated_species.genes.faa", | |
| -outfmt => 6, | |
| -max_target_seqs => 1, | |
| -evalue => $global_options->{'evalue'}, | |
| -query => "-", | |
| "> $locus.faaVSimg.blastp", | |
| #-num_threads => $global_options->{'threads'}, | |
| ]], DIE_ON_FAILURE); | |
| } | |
| # reciprocal blast of img positive hits to genome ORF | |
| if (! -e "./subsetimgVS$locus.faa.blastp") | |
| { | |
| print "Reciprocal BLASTing positive IMG hits to $locus.faa ...............\n"; | |
| checkAndRunCommand("contig_extractor.pl", | |
| [[ | |
| -i => "$locus.faaVSimg.blastp", | |
| -d => "/srv/db/img/4.0/dereplicated/img_dereplicated_species.genes.faa", | |
| -b => '', | |
| -S => '', | |
| -o => "subsetimgVS$locus.faa.blastp", | |
| ]] | |
| # blast against uniref | |
| if (! -e "./$locus.faaVSuniref90.blastp") | |
| { | |
| print "BLASTing against Uniref90 database................\n"; | |
| checkAndRunCommand("cat",[[ | |
| "prokka_annotation/$locus.faa |", | |
| "parallel", | |
| "--block"=> "100k", | |
| "--recstart", | |
| "'>'", | |
| "--pipe", | |
| "blastp", | |
| -db => "/srv/whitlam/home/users/uqmharoo/Uniref_db/uniref90.fasta", | |
| -outfmt => 6, | |
| -max_target_seqs => 1, | |
| -evalue => $global_options->{'evalue'}, | |
| -query => "-", | |
| "> $locus.faaVSuniref90.blastp", | |
| #-num_threads => $global_options->{'threads'}, | |
| ]], DIE_ON_FAILURE); | |
| } | |
| ###################################################################### | |
| # CUSTOM SUBS | |
| ###################################################################### | |
| ###################################################################### | |
| # TEMPLATE SUBS | |
| ###################################################################### | |
| # PARAMETERS | |
| sub checkParams { | |
| #----- | |
| # Do any and all options checking here... | |
| # | |
| my @standard_options = ( "help|h+", "in|i:s", "locustag|l:s", "kingdom|k:s", "threads|t:s", "evalue|e:s"); | |
| my %options; | |
| # Add any other command line options, and the code to handle them | |
| # | |
| GetOptions( \%options, @standard_options ); | |
| # if no arguments supplied print the usage and exit | |
| # | |
| exec("pod2usage $0") if (0 == (keys (%options) )); | |
| # If the -help option is set, print the usage and exit | |
| # | |
| exec("pod2usage $0") if $options{'help'}; | |
| # Compulsory items | |
| #if(!exists $options{''} ) { printParamError (""); } | |
| if(!exists $options{'in'} ) { printParamError ("You MUST supply a fasta file"); } | |
| return \%options; | |
| } | |
| sub printParamError | |
| { | |
| #----- | |
| # What to do if there's something wrong with a parameter | |
| # | |
| my ($error) = @_; | |
| print "**ERROR: $0 : $error\n"; exec("pod2usage $0"); | |
| } | |
| sub overrideDefault | |
| { | |
| #----- | |
| # Set and override default values for parameters | |
| # | |
| my ($default_value, $option_name) = @_; | |
| if(exists $global_options->{$option_name}) | |
| { | |
| return $global_options->{$option_name}; | |
| } | |
| return $default_value; | |
| } | |
| ###################################################################### | |
| # FILE IO | |
| sub openWrite | |
| { | |
| #----- | |
| # Open a file for writing | |
| # | |
| my ($fn) = @_; | |
| open my $fh, ">", $fn or croak "**ERROR: could not open file: $fn for writing $!\n"; | |
| return $fh; | |
| } | |
| sub openRead | |
| { | |
| #----- | |
| # Open a file for reading | |
| # | |
| my ($fn) = @_; | |
| open my $fh, "<", $fn or croak "**ERROR: could not open file: $fn for reading $!\n"; | |
| return $fh; | |
| } | |
| ###################################################################### | |
| # EXTERNAL COMMANDS | |
| # | |
| # checkAndRunCommand("ls", { | |
| # -a => "" | |
| # }, | |
| # WARN_ON_FAILURE); | |
| sub checkFileExists { | |
| #----- | |
| # Does a file exists? | |
| # | |
| my ($file) = @_; | |
| unless(-e $file) { | |
| croak "**ERROR: $0 : Cannot find:\n$file\n"; | |
| } | |
| } | |
| sub logExternalCommand | |
| { | |
| #----- | |
| # Log a command line command to the command line! | |
| # | |
| if(1 == $global_log_commands) { | |
| print $_[0], "\n"; | |
| } | |
| } | |
| sub isCommandInPath | |
| { | |
| #----- | |
| # Is this command in the path? | |
| # | |
| my ($cmd, $failure_type) = @_; | |
| if (system("which $cmd |> /dev/null")) { | |
| handleCommandFailure($cmd, $failure_type); | |
| } | |
| } | |
| sub runExternalCommand | |
| { | |
| #----- | |
| # Run a command line command on the command line! | |
| # | |
| my ($cmd) = @_; | |
| logExternalCommand($cmd); | |
| system($cmd); | |
| } | |
| sub checkAndRunCommand | |
| { | |
| #----- | |
| # Run external commands more sanelier | |
| # | |
| my ($cmd, $params, $failure_type) = @_; | |
| isCommandInPath($cmd, $failure_type); | |
| # join the parameters to the command | |
| my $param_str = join " ", map {formatParams($_)} @{$params}; | |
| my $cmd_str = $cmd . " " . $param_str; | |
| print $cmd_str; | |
| logExternalCommand($cmd_str); | |
| # make sure that all went well | |
| if (system($cmd_str)) { | |
| handleCommandFailure($cmd_str, $failure_type) | |
| } | |
| } | |
| sub formatParams { | |
| #--------- | |
| # Handles and formats the different ways of passing parameters to | |
| # checkAndRunCommand | |
| # | |
| my $ref = shift; | |
| if (ref($ref) eq "ARRAY") { | |
| return join(" ", @{$ref}); | |
| } elsif (ref($ref) eq "HASH") { | |
| return join(" ", map { $_ . " " . $ref->{$_}} keys %{$ref}); | |
| } | |
| croak 'The elements of the $params argument in checkAndRunCommand can ' . | |
| 'only contain references to arrays or hashes\n'; | |
| } | |
| sub handleCommandFailure { | |
| #----- | |
| # What to do when all goes bad! | |
| # | |
| my ($cmd, $failure_type) = @_; | |
| if (defined($failure_type)) { | |
| if ($failure_type == DIE_ON_FAILURE) { | |
| croak "**ERROR: $0 : " . $! . "\n"; | |
| } elsif ($failure_type == WARN_ON_FAILURE) { | |
| carp "**WARNING: $0 : " . $! . "\n"; | |
| } | |
| } | |
| } | |
| ###################################################################### | |
| # MISC | |
| sub printAtStart { | |
| print<<"EOF"; | |
| ---------------------------------------------------------------- | |
| $0 | |
| Copyright (C) Mohamed Fauzi Haroon | |
| This program comes with ABSOLUTELY NO WARRANTY; | |
| This is free software, and you are welcome to redistribute it | |
| under certain conditions: See the source for more details. | |
| ---------------------------------------------------------------- | |
| EOF | |
| } | |
| __DATA__ | |
| =head1 NAME | |
| annotateM | |
| =head1 COPYRIGHT | |
| copyright (C) Mohamed Fauzi Haroon | |
| This program is free software: you can redistribute it and/or modify | |
| it under the terms of the GNU General Public License as published by | |
| the Free Software Foundation, either version 3 of the License, or | |
| (at your option) any later version. | |
| This program is distributed in the hope that it will be useful, | |
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| GNU General Public License for more details. | |
| You should have received a copy of the GNU General Public License | |
| along with this program. If not, see <http://www.gnu.org/licenses/>. | |
| =head1 DESCRIPTION | |
| Want to annotate your genome? annotateM! | |
| =head1 SYNOPSIS | |
| annotateM -i fasta_file | |
| -i FASTA_FILE Nucleotide fasta file | |
| -l locustag Name of locus tag | |
| -k kingdom (Bacteria/Archaea) Kingdom of genome to be annotated | |
| -t threads Number of threads | |
| -e evalue Evalue for BLAST, recommend 1e-3 | |
| [-help -h] Displays basic usage information | |
| =cut | |