Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Examples for using Bio::SearchIO::psiblast and Bio::SearchIO::Writer::*
svn path=/bioperl-live/branches/steve_chervitz/; revision=3140
- Loading branch information
Showing
8 changed files
with
634 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
#!/usr/local/bin/perl | ||
|
||
# Example usage of a SearchIO::psiblast parser of traditional format Blast | ||
# and PSI-Blast reports. | ||
# Illustrates how to grab a set of SeqFeatures from a Blast report. | ||
# This parser represents a new and improved version of Bio/Tools/Blast.pm. | ||
# | ||
# Usage: | ||
# STDIN: stream containing one or more BLAST or PSI-BLAST reports. | ||
# STDOUT: feature start, end data | ||
# STDERR: Processing info, such as the number of reports processed | ||
# and the number of hitless reports. | ||
# | ||
# For more documentation about working with Blast result objects, | ||
# see to documentation for these modules: | ||
# Bio::Search::Result::BlastResult | ||
# Bio::Search::Hit::BlastHit | ||
# Bio::Search::HSP::BlastHSP | ||
# | ||
# For more documentation about the PSI-Blast parser, see docs for | ||
# Bio::SearchIO::psiblast | ||
# | ||
# Author: Steve Chervitz <steve_chervitz@affymetrix.com> | ||
# Revision: $Id$ | ||
|
||
use strict; | ||
use lib '../../../'; | ||
use Bio::SearchIO; | ||
|
||
my $in = Bio::SearchIO->new( -format => 'psiblast', | ||
-signif => 0.1, | ||
-verbose => 0 ); | ||
my @hitless_reports = (); | ||
|
||
while ( my $blast = $in->next_result() ) { | ||
|
||
if( $blast->hits ) { | ||
while( my $feature = $blast->next_feature() ) { | ||
print "Feature from ", $feature->start, " to ", $feature->end, "\n"; | ||
} | ||
} | ||
else { | ||
push @hitless_reports, $blast; | ||
} | ||
} | ||
|
||
printf STDERR "\n%d Blast report(s) processed.\n", $in->report_count; | ||
printf STDERR "\n%d reports had no hits:\n", scalar(@hitless_reports); | ||
|
||
foreach my $blast (@hitless_reports) { | ||
print STDERR "No hits for query ", $blast->query_name; | ||
print STDERR ($blast->no_hits_found ? "\n" : "(filtered)\n") | ||
; | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
#!/usr/bin/env perl | ||
|
||
# Example usage of a SearchIO::psiblast parser of traditional format Blast reports. | ||
# Here we define a custom SearchWriterI object that ouputs just the data we want | ||
# from each BLAST report. | ||
# | ||
# NOTE: If you just want pick and choose which columns you want | ||
# in the output table, you don't need to create your own custom | ||
# SearchWriterI implementation as we do here. HitTableWriter and HSPTableWriter | ||
# are configurable as to what columns and order you want. | ||
# The hitwriter*.pl and hspwriter.pl examples in this directory | ||
# illustrate this. | ||
# | ||
# For a complete list of columns, see the docs for these modules: | ||
# Bio::SearchIO::Writer::HitTableWriter | ||
# Bio::SearchIO::Writer::HSPTableWriter | ||
# | ||
# This example serves as an illustration of how to use the | ||
# SearchWriterI api and plug it in to a SearchIO parser, | ||
# which you may want to do if you want to generate data column(s) | ||
# not provided by the available writers. | ||
# | ||
# Usage: | ||
# STDIN: stream containing one or more BLAST or PSI-BLAST reports. | ||
# STDOUT: none, but generates an output file "custom_writer.out" | ||
# containing tab-delimited data on a per-hit basis. | ||
# STDERR: Progress info. | ||
# | ||
# Author: Steve Chervitz <steve_chervitz@affymetrix.com> | ||
# Revision: $Id$ | ||
|
||
package MyBlastWriter; | ||
|
||
use strict; | ||
use lib '../../../'; | ||
use Bio::Root::Root; | ||
use Bio::SearchIO::SearchWriterI; | ||
|
||
use vars qw( @ISA ); | ||
@ISA = qw( Bio::Root::Root Bio::SearchIO::SearchWriterI ); | ||
|
||
sub to_string { | ||
my ($self, $result, @args) = @_; | ||
my $str = ''; | ||
|
||
my $hits_reported = 0; | ||
|
||
foreach my $hit($result->hits) { | ||
|
||
# If this is a PSI-BLAST report, only report novel hits | ||
if( $result->psiblast ) { | ||
# Note that we could have supplied this has a -HIT_FILTER function | ||
# when we defined our input SearchIO object. Then we wouldn't need | ||
# to define a custom writer. | ||
next unless $hit->iteration > 1 and not $hit->found_again; | ||
} | ||
|
||
$hits_reported++; | ||
printf STDERR "$hit\n"; | ||
|
||
$str .= sprintf "%s\t%d\t%s\t%d\t%.2f\t%d\t%.1e\t%d\t%d\t%d\t%d\t%s\n", | ||
$result->query_name, $result->query_length, $hit->name, | ||
$hit->length, $hit->frac_identical('query'), $hit->length_aln, | ||
$hit->expect, $hit->score, $hit->bits, | ||
$hit->gaps('total'), $hit->num_hsps, $hit->iteration || '-'; | ||
} | ||
|
||
printf STDERR "\n%d hits written\n", $hits_reported; | ||
|
||
$str; | ||
|
||
} | ||
|
||
package main; | ||
|
||
#=================================================== | ||
# Start of script | ||
#=================================================== | ||
|
||
use strict; | ||
|
||
use lib '../../../'; | ||
use Bio::SearchIO; | ||
|
||
select STDOUT; $|=1; | ||
|
||
my $in = Bio::SearchIO->new( -format => 'psiblast', -signif => 0.1 ); | ||
my $writer = MyBlastWriter->new(); | ||
my $out = Bio::SearchIO->new( -format => 'psiblast', | ||
-writer => $writer, | ||
-file => ">custom_writer.out" ); | ||
|
||
while ( my $result = $in->next_result() ) { | ||
printf STDERR "Report %d: $result\n", $in->report_count; | ||
$out->write_result($result); | ||
} | ||
|
||
printf STDERR "\n%d Results processed.\n", $in->report_count; | ||
printf STDERR "Output sent to file: %s\n", $out->file if $out->file; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
#!/usr/bin/perl | ||
|
||
# Example usage of a SearchIO::psiblast parser of traditional format Blast reports. | ||
# This parser represents a new and improved version of Bio/Tools/Blast.pm. | ||
# | ||
# Usage: | ||
# STDIN: stream containing one or more BLAST or PSI-BLAST reports. | ||
# STDOUT: none, but generates an output file "hitwriter.out" | ||
# containing tab-delimited data on a per-hit basis. | ||
# STDERR: Progress info. | ||
# | ||
# In this example, we create a SearchIO parser that screens out hits | ||
# based on expect (or P) scores and a default HitTableWriter. This writer | ||
# provides the same functionality as the original Bio::Tools::Blast::table() | ||
# function (i.e., a tab-delimited summary of each hit per row). | ||
# HitTableWriter, however, is customizable so you can specify just the columns | ||
# you want to have in the output table. | ||
# | ||
# For more documentation about the writer, including | ||
# a complete list of columns, execute: | ||
# perldoc Bio::SearchIO::Writer::HitTableWriter. | ||
# | ||
# For more documentation about working with Blast result objects, | ||
# see docs for these modules: | ||
# Bio::Search::Result::BlastResult | ||
# Bio::Search::Hit::BlastHit | ||
# Bio::Search::HSP::BlastHSP | ||
# | ||
# For more documentation about the PSI-Blast parser, see docs for | ||
# Bio::SearchIO::psiblast | ||
# | ||
# Author: Steve Chervitz <steve_chervitz@affymetrix.com> | ||
# Revision: $Id$ | ||
|
||
use strict; | ||
use lib '../../../'; | ||
|
||
use Bio::SearchIO; | ||
use Bio::SearchIO::Writer::HitTableWriter; | ||
|
||
# These are the columns that will be in the output table of BLAST results. | ||
my @columns = qw( | ||
query_name | ||
query_length | ||
hit_name | ||
hit_length | ||
num_hsps | ||
expect | ||
frac_identical_query | ||
length_aln_query | ||
gaps_total | ||
strand_query | ||
strand_hit | ||
); | ||
|
||
print STDERR "\nUsing SearchIO->new()\n"; | ||
|
||
# Note that all parameters for the $in, $out, and $writer objects are optional. | ||
# Default in = STDIN; Default out = STDOUT; Default writer = all columns | ||
# In this example, we're reading from STDIN and writing to a file | ||
# called "hitwriter.out" | ||
# TODO: write hitless reports to STDERR and note if filtered. | ||
my $in = Bio::SearchIO->new( -format => 'psiblast', | ||
-signif => 0.1, | ||
-verbose=> 0 ); | ||
my $writer = Bio::SearchIO::Writer::HitTableWriter->new( -columns => \@columns | ||
); | ||
my $out = Bio::SearchIO->new( -format => 'psiblast', | ||
-writer => $writer, | ||
-file => ">hitwriter.out" ); | ||
# Need to keep a separate count of reports with hits | ||
# to know when to include labels. The first report may be hitless, | ||
# so we can't use $in->report_coun | ||
my $hit_count = 0; | ||
while ( my $blast = $in->next_result() ) { | ||
printf STDERR "\nReport %d: $blast\n", $in->report_count; | ||
|
||
if( $blast->hits ) { | ||
$hit_count++; | ||
$out->write_result($blast, $hit_count==1 ); | ||
} | ||
else { | ||
print STDERR "Hitless Blast Report "; | ||
print STDERR ($blast->no_hits_found ? "\n" : "(filtered)\n"); | ||
} | ||
|
||
## For a simple progress monitor, uncomment this line: | ||
#print STDERR "."; print STDERR "\n" if $in->report_count % 50 == 0; | ||
} | ||
|
||
printf STDERR "\n%d Blast report(s) processed.\n", $in->report_count; | ||
printf STDERR "Output sent to file: %s\n", $out->file if $out->file; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
#!/usr/local/bin/perl | ||
|
||
# Example usage of a SearchIO::psiblast parser of traditional format Blast reports. | ||
# Same as hitwriter.pl but showing how to use the -hit_filter option. | ||
# | ||
# Usage: | ||
# STDIN: stream containing one or more BLAST or PSI-BLAST reports. | ||
# STDOUT: none, but generates an output file "hitwriter2.out" | ||
# containing tab-delimited data on a per-hit basis. | ||
# STDERR: Progress info. | ||
# | ||
# For more documentation about the writer, including | ||
# a complete list of columns, execute: | ||
# perldoc Bio::SearchIO::Writer::HitTableWriter. | ||
# | ||
# For more documentation about working with Blast result objects, | ||
# see docs foor these modules: | ||
# Bio::Search::Result::BlastResult | ||
# Bio::Search::Hit::BlastHit | ||
# Bio::Search::HSP::BlastHSP | ||
# | ||
# For more documentation about the PSI-Blast parser, see docs for | ||
# Bio::SearchIO::psiblast | ||
# | ||
# Author: Steve Chervitz <steve_chervitz@affymetrix.com> | ||
# Revision: $Id$ | ||
|
||
use strict; | ||
|
||
use lib '../../../'; | ||
|
||
use Bio::SearchIO; | ||
use Bio::SearchIO::Writer::HitTableWriter; | ||
|
||
# These are the columns that will be in the output table of BLAST results. | ||
my @columns = qw( | ||
query_name | ||
query_length | ||
hit_name | ||
hit_length | ||
num_hsps | ||
expect | ||
frac_identical_query | ||
length_aln_query | ||
gaps_total | ||
strand_query | ||
strand_hit | ||
); | ||
|
||
# These labels just override the defaults for the specified columns. | ||
# (first column is 1). | ||
my %labels = ( | ||
1 => 'QUERY_GI', | ||
3 => 'HIT_IDENTIFIER', | ||
); | ||
|
||
|
||
# BLAST hit filtering function. All hits of each BLAST report must satisfy | ||
# this criteria to be retained. If a hit fails this test, it is ignored. | ||
# If all hits of a report fail, the report will be considered hitless, | ||
# but we can output a "(filtered)" string to indicate that this is the reason. | ||
my $filt_func = sub{ my $hit=shift; | ||
$hit->frac_identical('query') >= 0.5 | ||
&& $hit->frac_aligned_query >= 0.50 | ||
}; | ||
|
||
print STDERR "\nUsing SearchIO->new()\n"; | ||
|
||
# Note that all parameters for the $in, $out, and $writer objects are optional. | ||
# Default in = STDIN; Default out = STDOUT; Default writer = all columns | ||
# In this example, we're reading from STDIN and writing to a file | ||
# called "hitwriter.out" | ||
|
||
my $in = Bio::SearchIO->new( -format => 'psiblast', | ||
-hit_filter => $filt_func,); | ||
my $writer = Bio::SearchIO::Writer::HitTableWriter->new( -columns => \@columns, | ||
-labels => \%labels | ||
); | ||
my $out = Bio::SearchIO->new( -format => 'psiblast', | ||
-writer => $writer, | ||
-file => ">hitwriter2.out" ); | ||
my $hit_count = 0; | ||
while ( my $blast = $in->next_result() ) { | ||
printf STDERR "\nReport %d: $blast\n", $in->report_count; | ||
|
||
if( $blast->hits ) { | ||
$hit_count++; | ||
$out->write_result($blast, $hit_count==1 ); | ||
} | ||
else { | ||
print STDERR "Hitless Blast Report: "; | ||
print STDERR ($blast->no_hits_found ? "\n" : "(filtered)\n"); | ||
} | ||
|
||
## For a simple progress monitor, uncomment this line: | ||
#print STDERR "."; print STDERR "\n" if $in->report_count % 50 == 0; | ||
} | ||
|
||
printf STDERR "\n%d Blast report(s) processed.\n", $in->report_count; | ||
printf STDERR "Output sent to file: %s\n", $out->file if $out->file; | ||
|
Oops, something went wrong.