From fa8a808f993d88228f146b48dcc0ccce605e310a Mon Sep 17 00:00:00 2001 From: fauzi Date: Thu, 3 Apr 2014 15:35:16 +1000 Subject: [PATCH] This is my first commit of annotateM --- annotateM | 363 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 363 insertions(+) create mode 100755 annotateM diff --git a/annotateM b/annotateM new file mode 100755 index 0000000..351be48 --- /dev/null +++ b/annotateM @@ -0,0 +1,363 @@ +#!/usr/bin/env perl +############################################################################### +# +# annotateM +# +# The idea here is to produce a tab-delimited file of all the annotation +# pipelines for manual curation afterwards. +# +# Copyright (C) Mohamed Fauzi Haroon +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +############################################################################### + +#pragmas +use strict; +use warnings; + +#core Perl modules +use Getopt::Long; +use Carp; + +#CPAN modules + +#locally-written modules + +BEGIN { + select(STDERR); + $| = 1; + select(STDOUT); + $| = 1; +} + +# edit here to log all external commands +my $global_log_commands = 0; + +# ext command failure levels +use constant { + IGNORE_FAILURE => 0, + WARN_ON_FAILURE => 1, + DIE_ON_FAILURE => 2 +}; + +# get input params and print copyright +printAtStart(); +my $global_options = checkParams(); + +###################################################################### +# CODE HERE +###################################################################### + +# check that the file exists +checkFileExists($global_options->{'in'}); + +# run prokka to generate the ORFs and also prokka annotations +checkAndRunCommand("prokka", [{ + "--locustag" => $global_options->{'locustag'}, + "--outdir" => "prokka_annotation", + "--prefix" => $global_options->{'locustag'}, + "--kingdom" => $global_options->{'kingdom'}, + "--cpus" => $global_options->{'threads'}, + "--keep_names", + $global_options->{'in'}, + }], DIE_ON_FAILURE); + +# identify the ORF called amino acid fasta file for blast-ing +my $locus = $global_options->{'locustag'}; + +# blast against img +if (! -e "./$locus.faaVSimg.blastp") +{ +print "BLASTing against IMG 4.0 database\n"; +checkAndRunCommand("cat", + [[ + "prokka_annotation/$locus.faa |", + "parallel", + "--block"=> "100k", + "--recstart", + "'>'", + "--pipe", + "blastp", + -db => "/srv/db/img/4.0/dereplicated/img_dereplicated_species.genes.faa", + -outfmt => 6, + -max_target_seqs => 1, + -query => "-", + "> $locus.faaVSimg.blastp", + #-num_threads => $global_options->{'threads'}, + ]], DIE_ON_FAILURE); +} + +# blast against uniref +if (! -e "./$locus.faaVSuniref90.blastp") +{ +print "BLASTing against Uniref90 database\n"; +checkAndRunCommand("cat",[[ + "prokka_annotation/$locus.faa |", + "parallel", + "--block"=> "100k", + "--recstart", + "'>'", + "--pipe", + "blastp", + -db => "/srv/whitlam/home/users/uqmharoo/Uniref_db/uniref90.fasta", + -outfmt => 6, + -max_target_seqs => 1, + -query => "-", + "> $locus.faaVSuniref90.blastp", + #-num_threads => $global_options->{'threads'}, + ]], DIE_ON_FAILURE); +} +###################################################################### +# CUSTOM SUBS +###################################################################### + +###################################################################### +# TEMPLATE SUBS + +###################################################################### +# PARAMETERS + +sub checkParams { + #----- + # Do any and all options checking here... + # + my @standard_options = ( "help|h+", "in|i:s", "locustag|l:s", "kingdom|k:s", "threads|t:s"); + my %options; + + # Add any other command line options, and the code to handle them + # + GetOptions( \%options, @standard_options ); + + # if no arguments supplied print the usage and exit + # + exec("pod2usage $0") if (0 == (keys (%options) )); + + # If the -help option is set, print the usage and exit + # + exec("pod2usage $0") if $options{'help'}; + + # Compulsory items + #if(!exists $options{''} ) { printParamError (""); } + if(!exists $options{'in'} ) { printParamError ("You MUST supply a fasta file"); } + + return \%options; +} + +sub printParamError +{ + #----- + # What to do if there's something wrong with a parameter + # + my ($error) = @_; + print "**ERROR: $0 : $error\n"; exec("pod2usage $0"); +} + +sub overrideDefault +{ + #----- + # Set and override default values for parameters + # + my ($default_value, $option_name) = @_; + if(exists $global_options->{$option_name}) + { + return $global_options->{$option_name}; + } + return $default_value; +} + +###################################################################### +# FILE IO + +sub openWrite +{ + #----- + # Open a file for writing + # + my ($fn) = @_; + open my $fh, ">", $fn or croak "**ERROR: could not open file: $fn for writing $!\n"; + return $fh; +} + +sub openRead +{ + #----- + # Open a file for reading + # + my ($fn) = @_; + open my $fh, "<", $fn or croak "**ERROR: could not open file: $fn for reading $!\n"; + return $fh; +} + +###################################################################### +# EXTERNAL COMMANDS +# +# checkAndRunCommand("ls", { +# -a => "" +# }, +# WARN_ON_FAILURE); + +sub checkFileExists { + #----- + # Does a file exists? + # + my ($file) = @_; + unless(-e $file) { + croak "**ERROR: $0 : Cannot find:\n$file\n"; + } +} + +sub logExternalCommand +{ + #----- + # Log a command line command to the command line! + # + if(1 == $global_log_commands) { + print $_[0], "\n"; + } +} + +sub isCommandInPath +{ + #----- + # Is this command in the path? + # + my ($cmd, $failure_type) = @_; + if (system("which $cmd |> /dev/null")) { + handleCommandFailure($cmd, $failure_type); + } +} + +sub runExternalCommand +{ + #----- + # Run a command line command on the command line! + # + my ($cmd) = @_; + logExternalCommand($cmd); + system($cmd); +} + +sub checkAndRunCommand +{ + #----- + # Run external commands more sanelier + # + my ($cmd, $params, $failure_type) = @_; + + isCommandInPath($cmd, $failure_type); + + # join the parameters to the command + my $param_str = join " ", map {formatParams($_)} @{$params}; + + my $cmd_str = $cmd . " " . $param_str; + + print $cmd_str; + logExternalCommand($cmd_str); + + # make sure that all went well + if (system($cmd_str)) { + handleCommandFailure($cmd_str, $failure_type) + } +} + +sub formatParams { + + #--------- + # Handles and formats the different ways of passing parameters to + # checkAndRunCommand + # + my $ref = shift; + + if (ref($ref) eq "ARRAY") { + return join(" ", @{$ref}); + } elsif (ref($ref) eq "HASH") { + return join(" ", map { $_ . " " . $ref->{$_}} keys %{$ref}); + } + croak 'The elements of the $params argument in checkAndRunCommand can ' . + 'only contain references to arrays or hashes\n'; +} + + +sub handleCommandFailure { + #----- + # What to do when all goes bad! + # + my ($cmd, $failure_type) = @_; + if (defined($failure_type)) { + if ($failure_type == DIE_ON_FAILURE) { + croak "**ERROR: $0 : " . $! . "\n"; + } elsif ($failure_type == WARN_ON_FAILURE) { + carp "**WARNING: $0 : " . $! . "\n"; + } + } +} + + +###################################################################### +# MISC + +sub printAtStart { +print<<"EOF"; +---------------------------------------------------------------- + $0 + Copyright (C) Mohamed Fauzi Haroon + + This program comes with ABSOLUTELY NO WARRANTY; + This is free software, and you are welcome to redistribute it + under certain conditions: See the source for more details. +---------------------------------------------------------------- +EOF +} + +__DATA__ + +=head1 NAME + + annotateM + +=head1 COPYRIGHT + + copyright (C) Mohamed Fauzi Haroon + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +=head1 DESCRIPTION + + Insert detailed description here + +=head1 SYNOPSIS + + annotateM -i fasta_file + + -i FASTA_FILE Nucleotide fasta file + -l locustag Name of locus tag + -k kingdom (Bacteria/Archaea) Kingdom of genome to be annotated + -t threads Number of threads + [-help -h] Displays basic usage information + +=cut + +