Permalink
Browse files

Parsed all the database id2names and also parsed output for Pfam and …

…TIGRfam.

TIGRfam includes description which is super useful.
  • Loading branch information...
1 parent 39bcdac commit 564f8fbae22e1912970c34cd82d3a90c0a52e30f @fauziharoon committed Apr 7, 2014
Showing with 168 additions and 6 deletions.
  1. +168 −6 annotateM
View
174 annotateM
@@ -178,6 +178,71 @@ checkAndRunCommand("blastp",
]], DIE_ON_FAILURE);
}
+# blast against COG
+if (! -e "./$locus.faaVSCOG.blastp")
+{
+print "BLASTing against COG database................\n";
+checkAndRunCommand("cat",[[
+ "prokka_annotation/$locus.faa |",
+ "parallel",
+ "--block"=> "100k",
+ "--recstart",
+ "'>'",
+ "--pipe",
+ "blastp",
+ -db => "/srv/db/cog/cog_blast_prot_db",
+ -outfmt => 6,
+ -max_target_seqs => 1,
+ -evalue => $global_options->{'evalue'},
+ -query => "-",
+ "> $locus.faaVSCOG.blastp",
+ #-num_threads => $global_options->{'threads'},
+ ]], DIE_ON_FAILURE);
+}
+
+# HMMSCAN against PFAM
+if (! -e "./$locus.faaVSPfam-A.hmm.hmmscanned")
+{
+print "HMMscanning against Pfam database................\n";
+checkAndRunCommand("pfam_scan.pl",[[
+ -cpu => $global_options->{'threads'},
+ -e_seq => $global_options->{'evalue'},
+ -outfile => "$locus.faaVSPfam-A.hmm.hmmscanned",
+ -fasta => "prokka_annotation/$locus.faa",
+ -dir => "/srv/db/pfam/27",
+ ]], DIE_ON_FAILURE);
+}
+
+# HMMSCAN against TIGRfam
+if (! -e "./$locus.faaVStigr_all.hmm.hmmscanned")
+{
+print "HMMscanning against TIGRfam database................\n";
+checkAndRunCommand("hmmscan",[[
+ "--tblout" => "$locus.faaVStigr_all.hmm.hmmscanned",
+ "--noali",
+ -E => $global_options->{'evalue'},
+ "--cpu" => $global_options->{'threads'},
+ "/srv/db/tigrfam/14.0/TIGRFAMs_14.0_HMM/tigr_all.hmm",
+ "prokka_annotation/$locus.faa",
+ ]], DIE_ON_FAILURE);
+}
+
+# convert the hmmscan output to tab delimited
+checkAndRunCommand("awk",[[
+ "'{\$1=\$1}{ print }'",
+ "$locus.faaVSPfam-A.hmm.hmmscanned",
+ "| sed 's/\\s/\\t/g'",
+ "> $locus.faaVSPfam-A.hmm.hmmscanned.tab",
+ ]], DIE_ON_FAILURE);
+
+checkAndRunCommand("awk",[[
+ "'{\$1=\$1}{ print }'",
+ "$locus.faaVStigr_all.hmm.hmmscanned",
+ "| sed 's/\\s/\\t/g'",
+ "> $locus.faaVStigr_all.hmm.hmmscanned.tab",
+ ]], DIE_ON_FAILURE);
+
+
# hashes for img
my %hash = ();
my %hash2 = ();
@@ -196,7 +261,7 @@ while (<$IMGblast>)
# key is $orfid while the value of the key in $columns[1] is the IMG id
$hash{$orfid} = $columns[1];
$hash{$columns[1]} = $columns[0];
- $hash2{$columns[1]} = "$columns[0]\t$columns[11]";
+ $hash2{$columns[1]} = "$columns[0]\t$columns[10]\t$columns[11]";
}
}
@@ -231,12 +296,14 @@ while (<$rIMGblast>)
}
}
-#hashes for uniref
+close($img_temp_OUT2);
+
+# hashes for uniref
my %hash4 = ();
my %hash5 =();
my %hash6 = ();
-#read uniref blast and store in hash
+# read uniref blast and store in hash
open my $unirefblast, "./$locus.faaVSuniref90.blastp", or die "Couldn't open file $locus.faaVSuniref90.blastp\n";
while (<$unirefblast>)
{
@@ -246,11 +313,11 @@ while (<$unirefblast>)
{
$hash4{$columns[0]} = $columns[1];
$hash4{$columns[1]} = $columns[0];
- $hash5{$columns[1]} = "$columns[0]\t$columns[11]";
+ $hash5{$columns[1]} = "$columns[0]\t$columns[10]\t$columns[11]";
}
}
-#read uniref id2names.txt
+# read uniref id2names.txt
open my $unirefid2names, "/srv/db/uniprot/uniref-20140403/uniref90_id2names.txt", or die "Couldn't open id2names.txt\n";
open my $uniref_temp_OUT, ">uniref_output_temp.txt";
while (<$unirefid2names>)
@@ -280,7 +347,102 @@ while (<$runirefblast>)
print {$uniref_temp_OUT2} "$hash6{$columns[0]}\treciprocal\n";
}
}
-
+
+close($uniref_temp_OUT2);
+
+# hashes for pfam
+my %hash7 = ();
+my %hash8 = ();
+my %hash9 = ();
+
+# read pfam hmmscan output and store in hash
+open my $pfamoutput, "./$locus.faaVSPfam-A.hmm.hmmscanned.tab", or die "Couldn't open file $locus.faaVSPfam-A.hmm.hmmscanned.tab\n";
+while (<$pfamoutput>)
+{
+ next if /^\s*(#.*)?$/;
+ next if $pfamoutput =~ /^#/;
+ next if $pfamoutput =~ /^=/;
+# if ($pfamoutput =~ /test_.*/)
+# {
+ chomp $_;
+ my @columns = split (/\t/, $_);
+ if ($columns[11] > 60)
+# if ($columns[11])
+ {
+ my @pfam_columns = split (/\./, $columns[5]);
+ my $pfam_id = $pfam_columns[0];
+ $hash7{$columns[0]} = $pfam_columns[0];
+ $hash7{$pfam_columns[0]} = $columns[0];
+ $hash8{$pfam_columns[0]} = "$columns[0]\t$columns[12]\t$columns[11]";
+ }
+# }
+}
+
+#read Pfam-A.clans.tsv
+open my $pfamid2names, "/srv/db/pfam/27/Pfam-A.clans.tsv", or die "Couldn't open Pfam-A.clans.tsv\n";
+open my $pfam_temp_OUT, ">pfam_output_temp.txt";
+while (<$pfamid2names>)
+{
+ chomp $_;
+ my @columns = split (/\t/, $_);
+ if (exists $hash8{$columns[0]})
+ {
+ print {$pfam_temp_OUT} "$hash8{$columns[0]}\t$columns[4]\n";
+ }
+}
+
+close($pfamoutput);
+close($pfamid2names);
+close($pfam_temp_OUT);
+
+#hashes for tigrfam
+my %hash10 = ();
+my %hash11 = ();
+my %hash12 = ();
+
+#read tigrfam hmmscan output and store in hash
+open my $tigrfamoutput, "./$locus.faaVStigr_all.hmm.hmmscanned.tab", or die "Couldn't open file $locus.faaVStigr_all.hmm.hmmscanned.tab\n";
+while (<$tigrfamoutput>)
+{
+ next if /^\s*(#.*)?$/;
+ next if $tigrfamoutput =~ /^#/;
+# next if $pfamoutput =! /TIGR/;
+# if ($tigrfamoutput =~ /TIGR.*/)
+# {
+ chomp $_;
+ my @columns = split (/\t/, $_);
+ if ($columns[5] > 10)
+# if ($columns[5])
+ {
+ $hash10{$columns[2]} = $columns[0];
+ $hash10{$columns[0]} = $columns[2];
+ $hash11{$columns[0]} = "$columns[2]\t$columns[5]\t$columns[4]";
+ }
+# }
+}
+
+#read tigrfam id2names2description
+open my $tigrfamid2names, "/srv/db/tigrfam/14.0/TIGRFAMs_14.0_INFO/tigr_info_combined.parsed", or die "Couldn't open tigr_info_combined.parsed\n";
+open my $tigrfam_temp_OUT, ">tigrfam_output_temp.txt";
+while (<$tigrfamid2names>)
+{
+ chomp $_;
+ my @columns = split (/\t/, $_);
+ #print "$columns[0]\n";
+ $columns[0] =~ s/^\s+//;
+ $columns[0] =~ s/\s+$//;
+ if (exists $hash11{$columns[0]})
+ {
+ print {$tigrfam_temp_OUT} "$hash11{$columns[0]}\t$columns[1]\t$columns[2]\n";
+ }
+}
+
+close($tigrfamoutput);
+close($tigrfamid2names);
+close($tigrfam_temp_OUT);
+
+
+
######################################################################
# CUSTOM SUBS

0 comments on commit 564f8fb

Please sign in to comment.