Permalink
Cannot retrieve contributors at this time
Fetching contributors…
| #!/usr/bin/env perl | |
| ############################################################################### | |
| # | |
| # annotateM | |
| # | |
| # The idea here is to produce a tab-delimited file of all the annotation | |
| # pipelines for manual curation afterwards. | |
| # | |
| # Copyright (C) Mohamed Fauzi Haroon | |
| # | |
| # This program is free software: you can redistribute it and/or modify | |
| # it under the terms of the GNU General Public License as published by | |
| # the Free Software Foundation, either version 3 of the License, or | |
| # (at your option) any later version. | |
| # | |
| # This program is distributed in the hope that it will be useful, | |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| # GNU General Public License for more details. | |
| # | |
| # You should have received a copy of the GNU General Public License | |
| # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
| # | |
| ############################################################################### | |
| #pragmas | |
| use strict; | |
| use warnings; | |
| #core Perl modules | |
| use Getopt::Long; | |
| use Carp; | |
| use Data::Dumper; | |
| #CPAN modules | |
| #locally-written modules | |
| BEGIN { | |
| select(STDERR); | |
| $| = 1; | |
| select(STDOUT); | |
| $| = 1; | |
| } | |
| # edit here to log all external commands | |
| my $global_log_commands = 0; | |
| # ext command failure levels | |
| use constant { | |
| IGNORE_FAILURE => 0, | |
| WARN_ON_FAILURE => 1, | |
| DIE_ON_FAILURE => 2 | |
| }; | |
| # get input params and print copyright | |
| printAtStart(); | |
| my $global_options = checkParams(); | |
| ###################################################################### | |
| # CODE HERE | |
| ###################################################################### | |
| # check that the file exists | |
| checkFileExists($global_options->{'in'}); | |
| # run prokka to generate the ORFs and also prokka annotations | |
| checkAndRunCommand("prokka", [{ | |
| "--locustag" => $global_options->{'locustag'}, | |
| "--outdir" => "prokka_annotation", | |
| "--prefix" => $global_options->{'locustag'}, | |
| "--kingdom" => $global_options->{'kingdom'}, | |
| "--cpus" => $global_options->{'threads'}, | |
| "--keep_names", | |
| $global_options->{'in'}, | |
| }], DIE_ON_FAILURE); | |
| # identify the ORF called amino acid fasta file for blast-ing | |
| my $locus = $global_options->{'locustag'}; | |
| # blast against img | |
| if (! -e "./$locus.faaVSimg.blastp") | |
| { | |
| print "BLASTing against IMG 4.0 database...............\n"; | |
| checkAndRunCommand("cat", | |
| [[ | |
| "prokka_annotation/$locus.faa |", | |
| "parallel", | |
| "--block"=> "100k", | |
| "--recstart", | |
| "'>'", | |
| "--pipe", | |
| "blastp", | |
| -db => "/srv/db/img/4.0/dereplicated/img_dereplicated_species.genes.faa", | |
| -outfmt => 6, | |
| -max_target_seqs => 1, | |
| -evalue => $global_options->{'evalue'}, | |
| -query => "-", | |
| "> $locus.faaVSimg.blastp", | |
| ]], DIE_ON_FAILURE); | |
| } | |
| # reciprocal blast of img positive hits against genome ORF | |
| if (! -e "./subsetimg.faaVS$locus.faa.blastp") | |
| { | |
| print "Reciprocal BLASTing positive IMG hits to $locus.faa ...............\n"; | |
| checkAndRunCommand("contig_extractor.pl", | |
| [[ | |
| -i => "$locus.faaVSimg.blastp", | |
| -d => "/srv/db/img/4.0/dereplicated/img_dereplicated_species.genes.faa", | |
| -b => '', | |
| -S => '', | |
| -o => "subsetimg.faa", | |
| ]], DIE_ON_FAILURE); | |
| checkAndRunCommand("makeblastdb", | |
| [[ | |
| -in => "prokka_annotation/$locus.faa", | |
| -dbtype => "prot", | |
| ]], DIE_ON_FAILURE); | |
| checkAndRunCommand("blastp", | |
| [[ | |
| -query => "subsetimg.faa", | |
| -db => "prokka_annotation/$locus.faa", | |
| -outfmt => 6, | |
| -max_target_seqs => 1, | |
| -evalue => $global_options->{'evalue'}, | |
| -num_threads => $global_options->{'threads'}, | |
| -out => "subsetimg.faaVS$locus.faa.blastp", | |
| ]], DIE_ON_FAILURE); | |
| } | |
| # blast against uniref | |
| if (! -e "./$locus.faaVSuniref90.blastp") | |
| { | |
| print "BLASTing against Uniref90 database................\n"; | |
| checkAndRunCommand("cat",[[ | |
| "prokka_annotation/$locus.faa |", | |
| "parallel", | |
| "--block"=> "100k", | |
| "--recstart", | |
| "'>'", | |
| "--pipe", | |
| "blastp", | |
| -db => "/srv/db/uniprot/uniref-20140403/uniref90.fasta", | |
| -outfmt => 6, | |
| -max_target_seqs => 1, | |
| -evalue => $global_options->{'evalue'}, | |
| -query => "-", | |
| "> $locus.faaVSuniref90.blastp", | |
| #-num_threads => $global_options->{'threads'}, | |
| ]], DIE_ON_FAILURE); | |
| } | |
| # reciprocal blast of Uniref positive hits against genome ORF | |
| if (! -e "./subsetuniref.faaVS$locus.faa.blastp") | |
| { | |
| print "Reciprocal BLASTing positive Uniref hits to $locus.faa ...............\n"; | |
| checkAndRunCommand("contig_extractor.pl", | |
| [[ | |
| -i => "$locus.faaVSuniref90.blastp", | |
| -d => "/srv/db/uniprot/uniref-20140403/uniref90.fasta", | |
| -b => '', | |
| -S => '', | |
| -o => "subsetuniref.faa", | |
| ]], DIE_ON_FAILURE); | |
| checkAndRunCommand("blastp", | |
| [[ | |
| -query => "subsetuniref.faa", | |
| -db => "prokka_annotation/$locus.faa", | |
| -outfmt => 6, | |
| -max_target_seqs => 1, | |
| -evalue => $global_options->{'evalue'}, | |
| -num_threads => $global_options->{'threads'}, | |
| -out => "subsetuniref.faaVS$locus.faa.blastp", | |
| ]], DIE_ON_FAILURE); | |
| } | |
| # blast against COG | |
| if (! -e "./$locus.faaVSCOG.blastp") | |
| { | |
| print "BLASTing against COG database................\n"; | |
| checkAndRunCommand("cat",[[ | |
| "prokka_annotation/$locus.faa |", | |
| "parallel", | |
| "--block"=> "100k", | |
| "--recstart", | |
| "'>'", | |
| "--pipe", | |
| "blastp", | |
| -db => "/srv/db/cog/cog_blast_prot_db", | |
| -outfmt => 6, | |
| -max_target_seqs => 1, | |
| -evalue => $global_options->{'evalue'}, | |
| -query => "-", | |
| "> $locus.faaVSCOG.blastp", | |
| #-num_threads => $global_options->{'threads'}, | |
| ]], DIE_ON_FAILURE); | |
| } | |
| # HMMSCAN against PFAM | |
| if (! -e "./$locus.faaVSPfam-A.hmm.hmmscanned") | |
| { | |
| print "HMMscanning against Pfam database................\n"; | |
| checkAndRunCommand("pfam_scan.pl",[[ | |
| -cpu => $global_options->{'threads'}, | |
| -e_seq => $global_options->{'evalue'}, | |
| -outfile => "$locus.faaVSPfam-A.hmm.hmmscanned", | |
| -fasta => "prokka_annotation/$locus.faa", | |
| -dir => "/srv/db/pfam/27", | |
| ]], DIE_ON_FAILURE); | |
| } | |
| # HMMSCAN against TIGRfam | |
| if (! -e "./$locus.faaVStigr_all.hmm.hmmscanned") | |
| { | |
| print "HMMscanning against TIGRfam database................\n"; | |
| checkAndRunCommand("hmmscan",[[ | |
| "--tblout" => "$locus.faaVStigr_all.hmm.hmmscanned", | |
| "--noali", | |
| -E => $global_options->{'evalue'}, | |
| "--cpu" => $global_options->{'threads'}, | |
| "/srv/db/tigrfam/14.0/TIGRFAMs_14.0_HMM/tigr_all.hmm", | |
| "prokka_annotation/$locus.faa", | |
| ]], DIE_ON_FAILURE); | |
| } | |
| #convert the hmmscan output to tab delimited | |
| checkAndRunCommand("awk",[[ | |
| "'{\$1=\$1}{ print }'", | |
| "$locus.faaVSPfam-A.hmm.hmmscanned", | |
| "| sed 's/\\s/\\t/g'", | |
| "> $locus.faaVSPfam-A.hmm.hmmscanned.tab", | |
| ]], DIE_ON_FAILURE); | |
| checkAndRunCommand("awk",[[ | |
| "'{\$1=\$1}{ print }'", | |
| "$locus.faaVStigr_all.hmm.hmmscanned", | |
| "| sed 's/\\s/\\t/g'", | |
| "> $locus.faaVStigr_all.hmm.hmmscanned.tab", | |
| ]], DIE_ON_FAILURE); | |
| #hashes for img | |
| my %hash = (); | |
| my %hash2 = (); | |
| my %hash3 =(); | |
| #read the img blast output and store in hash | |
| open my $IMGblast, "./$locus.faaVSimg.blastp", or die "Couldn't open file $locus.faaVSimg.blastp\n"; | |
| while (<$IMGblast>) | |
| { | |
| chomp $_; | |
| my @columns = split (/\t/, $_); | |
| # store the orfid which is in columns[0] | |
| my $orfid = $columns[0]; | |
| if ($columns[11] > 60) | |
| { | |
| # key is $orfid while the value of the key in $columns[1] is the IMG id | |
| $hash{$orfid} = $columns[1]; | |
| $hash{$columns[1]} = $columns[0]; | |
| $hash2{$columns[1]} = "$columns[0]\t$columns[10]\t$columns[11]"; | |
| } | |
| } | |
| # read img id2names.txt | |
| open my $imgid2names, "/srv/db/img/4.0/dereplicated/id2names.txt", or die "Couldn't open id2names.txt\n"; | |
| open my $img_temp_OUT, ">img_output_temp.txt"; | |
| while (<$imgid2names>) | |
| { | |
| chomp $_; | |
| my @columns = split (/\t/, $_); | |
| if (exists $hash2{$columns[0]}) | |
| { | |
| $hash3{$columns[0]} = "$hash2{$columns[0]}\t$columns[1]\t$columns[2]"; | |
| print {$img_temp_OUT} "$hash2{$columns[0]}\t$columns[1]\t$columns[2]\n"; | |
| } | |
| } | |
| close($IMGblast); | |
| close($imgid2names); | |
| close($img_temp_OUT); | |
| # read my reciprocal img blast output and store in hash | |
| open my $rIMGblast, "./subsetimg.faaVS$locus.faa.blastp", or die "Couldn't open file subsetimg.faaVS$locus.faa.blastp\n"; | |
| open my $img_temp_OUT2, ">img_output_temp2.txt"; | |
| while (<$rIMGblast>) | |
| { | |
| chomp $_; | |
| my @columns = split (/\t/, $_); | |
| if (exists $hash{$columns[0]}) | |
| { | |
| print {$img_temp_OUT2} "$hash3{$columns[0]}\treciprocal\n"; | |
| } | |
| } | |
| close($img_temp_OUT2); | |
| #hashes for uniref | |
| my %hash4 = (); | |
| my %hash5 =(); | |
| my %hash6 = (); | |
| #read uniref blast and store in hash | |
| open my $unirefblast, "./$locus.faaVSuniref90.blastp", or die "Couldn't open file $locus.faaVSuniref90.blastp\n"; | |
| while (<$unirefblast>) | |
| { | |
| chomp $_; | |
| my @columns = split (/\t/, $_); | |
| if ($columns[11] > 60) | |
| { | |
| $hash4{$columns[0]} = $columns[1]; | |
| $hash4{$columns[1]} = $columns[0]; | |
| $hash5{$columns[1]} = "$columns[0]\t$columns[10]\t$columns[11]"; | |
| } | |
| } | |
| #read uniref id2names.txt | |
| open my $unirefid2names, "/srv/db/uniprot/uniref-20140403/uniref90_id2names.txt", or die "Couldn't open id2names.txt\n"; | |
| open my $uniref_temp_OUT, ">uniref_output_temp.txt"; | |
| while (<$unirefid2names>) | |
| { | |
| chomp $_; | |
| my @columns = split (/\t/, $_); | |
| if (exists $hash5{$columns[0]}) | |
| { | |
| $hash6{$columns[0]} = "$hash5{$columns[0]}\t$columns[1]\t$columns[2]"; | |
| print {$uniref_temp_OUT} "$hash5{$columns[0]}\t$columns[1]\t$columns[2]\n"; | |
| } | |
| } | |
| close($unirefblast); | |
| close($unirefid2names); | |
| close($uniref_temp_OUT); | |
| # read my reciprocal img blast output and store in hash | |
| open my $runirefblast, "./subsetuniref.faaVS$locus.faa.blastp", or die "Couldn't open file subsetuniref.faaVS$locus.faa.blastp\n"; | |
| open my $uniref_temp_OUT2, ">uniref_output_temp2.txt"; | |
| while (<$runirefblast>) | |
| { | |
| chomp $_; | |
| my @columns = split (/\t/, $_); | |
| if (exists $hash4{$columns[0]}) | |
| { | |
| print {$uniref_temp_OUT2} "$hash6{$columns[0]}\treciprocal\n"; | |
| } | |
| } | |
| close($uniref_temp_OUT2); | |
| #hashes for pfam | |
| my %hash7 = (); | |
| my %hash8 = (); | |
| #read pfam hmmscan output and store in hash | |
| open my $pfamoutput, "./$locus.faaVSPfam-A.hmm.hmmscanned.tab", or die "Couldn't open file $locus.faaVSPfam-A.hmm.hmmscanned.tab\n"; | |
| while (<$pfamoutput>) | |
| { | |
| next if /^\s*(#.*)?$/; | |
| next if $pfamoutput =~ /^#/; | |
| next if $pfamoutput =~ /^=/; | |
| # if ($pfamoutput =~ /test_.*/) | |
| # { | |
| chomp $_; | |
| my @columns = split (/\t/, $_); | |
| if ($columns[11] > 60) | |
| # if ($columns[11]) | |
| { | |
| my @pfam_columns = split (/\./, $columns[5]); | |
| my $pfam_id = $pfam_columns[0]; | |
| $hash7{$columns[0]} = $pfam_columns[0]; | |
| $hash7{$pfam_columns[0]} = $columns[0]; | |
| $hash8{$pfam_columns[0]} = "$columns[0]\t$columns[12]\t$columns[11]"; | |
| } | |
| # } | |
| } | |
| #read Pfam-A.clans.tsv | |
| open my $pfamid2names, "/srv/db/pfam/27/Pfam-A.clans.tsv", or die "Couldn't open Pfam-A.clans.tsv\n"; | |
| open my $pfam_temp_OUT, ">pfam_output_temp.txt"; | |
| while (<$pfamid2names>) | |
| { | |
| chomp $_; | |
| my @columns = split (/\t/, $_); | |
| if (exists $hash8{$columns[0]}) | |
| { | |
| print {$pfam_temp_OUT} "$hash8{$columns[0]}\t$columns[4]\n"; | |
| } | |
| } | |
| close($pfamoutput); | |
| close($pfamid2names); | |
| close($pfam_temp_OUT); | |
| #hashes for tigrfam | |
| my %hash9 = (); | |
| my %hash10 = (); | |
| #read tigrfam hmmscan output and store in hash | |
| open my $tigrfamoutput, "./$locus.faaVStigr_all.hmm.hmmscanned.tab", or die "Couldn't open file $locus.faaVStigr_all.hmm.hmmscanned.tab\n"; | |
| while (<$tigrfamoutput>) | |
| { | |
| next if /^\s*(#.*)?$/; | |
| next if $tigrfamoutput =~ /^#/; | |
| # next if $pfamoutput =! /TIGR/; | |
| # if ($tigrfamoutput =~ /TIGR.*/) | |
| # { | |
| chomp $_; | |
| my @columns = split (/\t/, $_); | |
| if ($columns[5] > 10) | |
| # if ($columns[5]) | |
| { | |
| $hash9{$columns[2]} = $columns[0]; | |
| $hash9{$columns[0]} = $columns[2]; | |
| $hash10{$columns[0]} = "$columns[2]\t$columns[5]\t$columns[4]"; | |
| } | |
| # } | |
| } | |
| #read tigrfam id2names2description | |
| open my $tigrfamid2names, "/srv/db/tigrfam/14.0/TIGRFAMs_14.0_INFO/tigr_info_combined.parsed", or die "Couldn't open tigr_info_combined.parsed\n"; | |
| open my $tigrfam_temp_OUT, ">tigrfam_output_temp.txt"; | |
| while (<$tigrfamid2names>) | |
| { | |
| chomp $_; | |
| my @columns = split (/\t/, $_); | |
| #print "$columns[0]\n"; | |
| $columns[0] =~ s/^\s+//; | |
| $columns[0] =~ s/\s+$//; | |
| if (exists $hash10{$columns[0]}) | |
| { | |
| print {$tigrfam_temp_OUT} "$hash10{$columns[0]}\t$columns[1]\t$columns[2]\n"; | |
| } | |
| } | |
| close($tigrfamoutput); | |
| close($tigrfamid2names); | |
| close($tigrfam_temp_OUT); | |
| # hashes for cog | |
| my %hash11 = (); | |
| my %hash12 = (); | |
| my %hash13 = (); | |
| # read cog blastp output and store in hash | |
| open my $cogblast, "./$locus.faaVSCOG.blastp", or die "Couldn't open file $locus.faaVSCOG.blastp\n"; | |
| while (<$cogblast>) | |
| { | |
| chomp $_; | |
| my @columns = split (/\t/, $_); | |
| if ($columns[11] > 60) | |
| { | |
| $hash11{$columns[0]} = $columns[1]; | |
| $hash11{$columns[1]} = $columns[0]; | |
| $hash12{$columns[1]} = "$columns[0]\t$columns[10]\t$columns[11]"; | |
| } | |
| } | |
| # read cog prot2COG.tab | |
| open my $cogid2names, "/srv/db/cog/prot2COG.tab", or die "Couldn't open prot2COG.tab\n"; | |
| open my $cog_temp_OUT, "> cog_output_temp.txt"; | |
| while (<$cogid2names>) | |
| { | |
| chomp $_; | |
| my @columns = split (/\t/, $_); | |
| if (exists $hash12{$columns[0]}) | |
| { | |
| $hash13{$columns[0]} = "$hash12{$columns[0]}\t$columns[1]"; | |
| $hash13{$columns[1]} = $hash12{$columns[0]}; | |
| print {$cog_temp_OUT} "$hash12{$columns[0]}\t$columns[1]\n"; | |
| } | |
| } | |
| close($cogblast); | |
| close($cogid2names); | |
| close($cog_temp_OUT); | |
| # read cog listcogs.txt | |
| open my $cogid2longernames, "/srv/db/cog/listcogs.txt", or die "Couldn't open listcogs.txt\n"; | |
| open my $cog_temp_OUT2, "> cog_output_temp2.txt"; | |
| while(<$cogid2longernames>) | |
| { | |
| chomp $_; | |
| my @columns = split (/\t/, $_); | |
| if (exists $hash13{$columns[5]}) | |
| { | |
| print {$cog_temp_OUT2} "$hash13{$columns[5]}\t$columns[3]\t$columns[4]\t$columns[6]\n"; | |
| } | |
| } | |
| close($cog_temp_OUT2); | |
| ### now to parse all the temporary files and combine into one tab-delimited-file | |
| # to store the IDs => DB => values/annotations | |
| my %combined_bighash =(); | |
| # open file for output | |
| open my $FINAL_OUTPUT, "> ./final_output.txt"; | |
| # print header | |
| print {$FINAL_OUTPUT} "ORF_ID\t\n"; | |
| # img | |
| open my $img_annotation, "./img_output_temp2.txt", or die "Couldn't open img_output_temp2.txt\n"; | |
| while (<$img_annotation>) | |
| { | |
| chomp $_; | |
| my @columns = split (/\t/, $_); | |
| my @baba = @columns[1..$#columns]; | |
| #print "@baba \n"; | |
| $combined_bighash{$columns[0]}->{'img'} = join("\t", @baba); | |
| } | |
| # uniref | |
| open my $uniref_annotation, "./uniref_output_temp2.txt", or die "Couldn't open uniref_output_temp2.txt\n"; | |
| while (<$uniref_annotation>) | |
| { | |
| chomp $_; | |
| my @columns = split (/\t/, $_); | |
| my @baba = @columns[1..$#columns]; | |
| $combined_bighash{$columns[0]}->{'uniref'} = join("\t", @baba); | |
| } | |
| # prokka | |
| # need to parse faa file to give prokka id2names | |
| checkAndRunCommand("grep",[[ | |
| "'>'", | |
| "prokka_annotation/$locus.faa |", | |
| "sed", | |
| "'s/>//g' |", | |
| "sed", | |
| -e => "'s/ /\\t/'", | |
| "> prokka_temp_output.txt", | |
| ]], DIE_ON_FAILURE); | |
| open my $prokka_annotation, "./prokka_temp_output.txt", or die "Couldn't open prokka_temp_output.txt\n"; | |
| while (<$prokka_annotation>) | |
| { | |
| chomp $_; | |
| my @columns = split (/\t/, $_); | |
| my @baba = @columns[1..$#columns]; | |
| $combined_bighash{$columns[0]}->{'prokka'} = join("\t", @baba); | |
| } | |
| # cog | |
| open my $cog_annotation, "./cog_output_temp2.txt", or die "Couldn't open cog_output_temp2.txt\n"; | |
| while (<$cog_annotation>) | |
| { | |
| chomp $_; | |
| my @columns = split (/\t/, $_); | |
| my @baba = @columns[1..$#columns]; | |
| $combined_bighash{$columns[0]}->{'cog'} = join("\t", @baba); | |
| } | |
| # pfam | |
| open my $pfam_annotation, "./pfam_output_temp.txt", or die "Couldn't open pfam_output_temp.txt\n"; | |
| while (<$pfam_annotation>) | |
| { | |
| chomp $_; | |
| my @columns = split (/\t/, $_); | |
| my @baba = @columns[1..$#columns]; | |
| $combined_bighash{$columns[0]}->{'pfam'} = join("\t", @baba); | |
| } | |
| # tigrfam | |
| open my $tigrfam_annotation, "./tigrfam_output_temp.txt", or die "Couldn't open tigrfam_output_temp.txt\n"; | |
| while (<$tigrfam_annotation>) | |
| { | |
| chomp $_; | |
| my @columns = split (/\t/, $_); | |
| my @baba = @columns[1..$#columns]; | |
| $combined_bighash{$columns[0]}->{'tigrfam'} = join("\t", @baba); | |
| } | |
| # to print finally................. | |
| my %column_lengths = ( | |
| 'img' => 5, | |
| 'uniref' => 5, | |
| 'prokka' => 1, | |
| 'cog' => 5, | |
| 'pfam' => 3, | |
| 'tigrfam' => 4, | |
| ); | |
| foreach my $item (sort(keys %combined_bighash)) | |
| { | |
| print {$FINAL_OUTPUT} "$item\t"; | |
| foreach my $annotation_type (keys %column_lengths) | |
| { | |
| if (! exists $combined_bighash{$item}->{$annotation_type}) | |
| { | |
| print {$FINAL_OUTPUT} join("\t", ("NA",) x $column_lengths{$annotation_type}), "\t"; | |
| } | |
| else | |
| { | |
| print {$FINAL_OUTPUT} "$annotation_type = $combined_bighash{$item}{$annotation_type}\t"; | |
| } | |
| } | |
| print {$FINAL_OUTPUT} "\n"; | |
| } | |
| #close all files | |
| close($img_annotation); | |
| close($uniref_annotation); | |
| close($prokka_annotation); | |
| close($cog_annotation); | |
| close($pfam_annotation); | |
| close($tigrfam_annotation); | |
| close($FINAL_OUTPUT); | |
| # to beautify the output | |
| ###################################################################### | |
| # CUSTOM SUBS | |
| ###################################################################### | |
| ###################################################################### | |
| # TEMPLATE SUBS | |
| ###################################################################### | |
| # PARAMETERS | |
| sub checkParams { | |
| #----- | |
| # Do any and all options checking here... | |
| # | |
| my @standard_options = ( "help|h+", "in|i:s", "locustag|l:s", "kingdom|k:s", "threads|t:s", "evalue|e:s"); | |
| my %options; | |
| # Add any other command line options, and the code to handle them | |
| # | |
| GetOptions( \%options, @standard_options ); | |
| # if no arguments supplied print the usage and exit | |
| # | |
| exec("pod2usage $0") if (0 == (keys (%options) )); | |
| # If the -help option is set, print the usage and exit | |
| # | |
| exec("pod2usage $0") if $options{'help'}; | |
| # Compulsory items | |
| #if(!exists $options{''} ) { printParamError (""); } | |
| if(!exists $options{'in'} ) { printParamError ("You MUST supply a fasta file"); } | |
| return \%options; | |
| } | |
| sub printParamError | |
| { | |
| #----- | |
| # What to do if there's something wrong with a parameter | |
| # | |
| my ($error) = @_; | |
| print "**ERROR: $0 : $error\n"; exec("pod2usage $0"); | |
| } | |
| sub overrideDefault | |
| { | |
| #----- | |
| # Set and override default values for parameters | |
| # | |
| my ($default_value, $option_name) = @_; | |
| if(exists $global_options->{$option_name}) | |
| { | |
| return $global_options->{$option_name}; | |
| } | |
| return $default_value; | |
| } | |
| ###################################################################### | |
| # FILE IO | |
| sub openWrite | |
| { | |
| #----- | |
| # Open a file for writing | |
| # | |
| my ($fn) = @_; | |
| open my $fh, ">", $fn or croak "**ERROR: could not open file: $fn for writing $!\n"; | |
| return $fh; | |
| } | |
| sub openRead | |
| { | |
| #----- | |
| # Open a file for reading | |
| # | |
| my ($fn) = @_; | |
| open my $fh, "<", $fn or croak "**ERROR: could not open file: $fn for reading $!\n"; | |
| return $fh; | |
| } | |
| ###################################################################### | |
| # EXTERNAL COMMANDS | |
| # | |
| # checkAndRunCommand("ls", { | |
| # -a => "" | |
| # }, | |
| # WARN_ON_FAILURE); | |
| sub checkFileExists { | |
| #----- | |
| # Does a file exists? | |
| # | |
| my ($file) = @_; | |
| unless(-e $file) { | |
| croak "**ERROR: $0 : Cannot find:\n$file\n"; | |
| } | |
| } | |
| sub logExternalCommand | |
| { | |
| #----- | |
| # Log a command line command to the command line! | |
| # | |
| if(1 == $global_log_commands) { | |
| print $_[0], "\n"; | |
| } | |
| } | |
| sub isCommandInPath | |
| { | |
| #----- | |
| # Is this command in the path? | |
| # | |
| my ($cmd, $failure_type) = @_; | |
| if (system("which $cmd |> /dev/null")) { | |
| handleCommandFailure($cmd, $failure_type); | |
| } | |
| } | |
| sub runExternalCommand | |
| { | |
| #----- | |
| # Run a command line command on the command line! | |
| # | |
| my ($cmd) = @_; | |
| logExternalCommand($cmd); | |
| system($cmd); | |
| } | |
| sub checkAndRunCommand | |
| { | |
| #----- | |
| # Run external commands more sanelier | |
| # | |
| my ($cmd, $params, $failure_type) = @_; | |
| isCommandInPath($cmd, $failure_type); | |
| # join the parameters to the command | |
| my $param_str = join " ", map {formatParams($_)} @{$params}; | |
| my $cmd_str = $cmd . " " . $param_str; | |
| print "The command currently running:\t$cmd_str\n"; | |
| logExternalCommand($cmd_str); | |
| # make sure that all went well | |
| if (system($cmd_str)) { | |
| handleCommandFailure($cmd_str, $failure_type) | |
| } | |
| } | |
| sub formatParams { | |
| #--------- | |
| # Handles and formats the different ways of passing parameters to | |
| # checkAndRunCommand | |
| # | |
| my $ref = shift; | |
| if (ref($ref) eq "ARRAY") { | |
| return join(" ", @{$ref}); | |
| } elsif (ref($ref) eq "HASH") { | |
| return join(" ", map { $_ . " " . $ref->{$_}} keys %{$ref}); | |
| } | |
| croak 'The elements of the $params argument in checkAndRunCommand can ' . | |
| 'only contain references to arrays or hashes\n'; | |
| } | |
| sub handleCommandFailure { | |
| #----- | |
| # What to do when all goes bad! | |
| # | |
| my ($cmd, $failure_type) = @_; | |
| if (defined($failure_type)) { | |
| if ($failure_type == DIE_ON_FAILURE) { | |
| croak "**ERROR: $0 : " . $! . "\n"; | |
| } elsif ($failure_type == WARN_ON_FAILURE) { | |
| carp "**WARNING: $0 : " . $! . "\n"; | |
| } | |
| } | |
| } | |
| ###################################################################### | |
| # MISC | |
| sub printAtStart { | |
| print<<"EOF"; | |
| ---------------------------------------------------------------- | |
| $0 | |
| Copyright (C) Mohamed Fauzi Haroon, Yuji Sekiguchi, Adam Skarshewski | |
| This program comes with ABSOLUTELY NO WARRANTY; | |
| This is free software, and you are welcome to redistribute it | |
| under certain conditions: See the source for more details. | |
| ---------------------------------------------------------------- | |
| EOF | |
| } | |
| __DATA__ | |
| =head1 NAME | |
| annotateM | |
| =head1 COPYRIGHT | |
| Copyright (C) Mohamed Fauzi Haroon, Yuji Sekiguchi, Adam Skarshewski | |
| This program is free software: you can redistribute it and/or modify | |
| it under the terms of the GNU General Public License as published by | |
| the Free Software Foundation, either version 3 of the License, or | |
| (at your option) any later version. | |
| This program is distributed in the hope that it will be useful, | |
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| GNU General Public License for more details. | |
| You should have received a copy of the GNU General Public License | |
| along with this program. If not, see <http://www.gnu.org/licenses/>. | |
| =head1 DESCRIPTION | |
| Want to annotate your genome? annotateM! | |
| =head1 SYNOPSIS | |
| annotateM -i fasta_file | |
| -i FASTA_FILE Nucleotide fasta file | |
| -l locustag Name of locus tag | |
| -k kingdom (Bacteria/Archaea) Kingdom of genome to be annotated | |
| -t threads Number of threads | |
| -e evalue Evalue for BLAST, recommend 1e-3 | |
| [-help -h] Displays basic usage information | |
| =cut | |