Skip to content

Commit

Permalink
Examples for using Bio::SearchIO::psiblast and Bio::SearchIO::Writer::*
Browse files Browse the repository at this point in the history
svn path=/bioperl-live/branches/steve_chervitz/; revision=3140
  • Loading branch information
trutane committed Dec 28, 2001
1 parent 4aca2a1 commit 55556b6
Show file tree
Hide file tree
Showing 8 changed files with 634 additions and 0 deletions.
56 changes: 56 additions & 0 deletions examples/searchio/psiblast_features.pl
@@ -0,0 +1,56 @@
#!/usr/local/bin/perl

# Example usage of a SearchIO::psiblast parser of traditional format Blast
# and PSI-Blast reports.
# Illustrates how to grab a set of SeqFeatures from a Blast report.
# This parser represents a new and improved version of Bio/Tools/Blast.pm.
#
# Usage:
# STDIN: stream containing one or more BLAST or PSI-BLAST reports.
# STDOUT: feature start, end data
# STDERR: Processing info, such as the number of reports processed
# and the number of hitless reports.
#
# For more documentation about working with Blast result objects,
# see to documentation for these modules:
# Bio::Search::Result::BlastResult
# Bio::Search::Hit::BlastHit
# Bio::Search::HSP::BlastHSP
#
# For more documentation about the PSI-Blast parser, see docs for
# Bio::SearchIO::psiblast
#
# Author: Steve Chervitz <steve_chervitz@affymetrix.com>
# Revision: $Id$

use strict;
use lib '../../../';
use Bio::SearchIO;

my $in = Bio::SearchIO->new( -format => 'psiblast',
-signif => 0.1,
-verbose => 0 );
my @hitless_reports = ();

while ( my $blast = $in->next_result() ) {

if( $blast->hits ) {
while( my $feature = $blast->next_feature() ) {
print "Feature from ", $feature->start, " to ", $feature->end, "\n";
}
}
else {
push @hitless_reports, $blast;
}
}

printf STDERR "\n%d Blast report(s) processed.\n", $in->report_count;
printf STDERR "\n%d reports had no hits:\n", scalar(@hitless_reports);

foreach my $blast (@hitless_reports) {
print STDERR "No hits for query ", $blast->query_name;
print STDERR ($blast->no_hits_found ? "\n" : "(filtered)\n")
;
}


100 changes: 100 additions & 0 deletions examples/searchio/writer/custom_writer.pl
@@ -0,0 +1,100 @@
#!/usr/bin/env perl

# Example usage of a SearchIO::psiblast parser of traditional format Blast reports.
# Here we define a custom SearchWriterI object that ouputs just the data we want
# from each BLAST report.
#
# NOTE: If you just want pick and choose which columns you want
# in the output table, you don't need to create your own custom
# SearchWriterI implementation as we do here. HitTableWriter and HSPTableWriter
# are configurable as to what columns and order you want.
# The hitwriter*.pl and hspwriter.pl examples in this directory
# illustrate this.
#
# For a complete list of columns, see the docs for these modules:
# Bio::SearchIO::Writer::HitTableWriter
# Bio::SearchIO::Writer::HSPTableWriter
#
# This example serves as an illustration of how to use the
# SearchWriterI api and plug it in to a SearchIO parser,
# which you may want to do if you want to generate data column(s)
# not provided by the available writers.
#
# Usage:
# STDIN: stream containing one or more BLAST or PSI-BLAST reports.
# STDOUT: none, but generates an output file "custom_writer.out"
# containing tab-delimited data on a per-hit basis.
# STDERR: Progress info.
#
# Author: Steve Chervitz <steve_chervitz@affymetrix.com>
# Revision: $Id$

package MyBlastWriter;

use strict;
use lib '../../../';
use Bio::Root::Root;
use Bio::SearchIO::SearchWriterI;

use vars qw( @ISA );
@ISA = qw( Bio::Root::Root Bio::SearchIO::SearchWriterI );

sub to_string {
my ($self, $result, @args) = @_;
my $str = '';

my $hits_reported = 0;

foreach my $hit($result->hits) {

# If this is a PSI-BLAST report, only report novel hits
if( $result->psiblast ) {
# Note that we could have supplied this has a -HIT_FILTER function
# when we defined our input SearchIO object. Then we wouldn't need
# to define a custom writer.
next unless $hit->iteration > 1 and not $hit->found_again;
}

$hits_reported++;
printf STDERR "$hit\n";

$str .= sprintf "%s\t%d\t%s\t%d\t%.2f\t%d\t%.1e\t%d\t%d\t%d\t%d\t%s\n",
$result->query_name, $result->query_length, $hit->name,
$hit->length, $hit->frac_identical('query'), $hit->length_aln,
$hit->expect, $hit->score, $hit->bits,
$hit->gaps('total'), $hit->num_hsps, $hit->iteration || '-';
}

printf STDERR "\n%d hits written\n", $hits_reported;

$str;

}

package main;

#===================================================
# Start of script
#===================================================

use strict;

use lib '../../../';
use Bio::SearchIO;

select STDOUT; $|=1;

my $in = Bio::SearchIO->new( -format => 'psiblast', -signif => 0.1 );
my $writer = MyBlastWriter->new();
my $out = Bio::SearchIO->new( -format => 'psiblast',
-writer => $writer,
-file => ">custom_writer.out" );

while ( my $result = $in->next_result() ) {
printf STDERR "Report %d: $result\n", $in->report_count;
$out->write_result($result);
}

printf STDERR "\n%d Results processed.\n", $in->report_count;
printf STDERR "Output sent to file: %s\n", $out->file if $out->file;

93 changes: 93 additions & 0 deletions examples/searchio/writer/hitwriter.pl
@@ -0,0 +1,93 @@
#!/usr/bin/perl

# Example usage of a SearchIO::psiblast parser of traditional format Blast reports.
# This parser represents a new and improved version of Bio/Tools/Blast.pm.
#
# Usage:
# STDIN: stream containing one or more BLAST or PSI-BLAST reports.
# STDOUT: none, but generates an output file "hitwriter.out"
# containing tab-delimited data on a per-hit basis.
# STDERR: Progress info.
#
# In this example, we create a SearchIO parser that screens out hits
# based on expect (or P) scores and a default HitTableWriter. This writer
# provides the same functionality as the original Bio::Tools::Blast::table()
# function (i.e., a tab-delimited summary of each hit per row).
# HitTableWriter, however, is customizable so you can specify just the columns
# you want to have in the output table.
#
# For more documentation about the writer, including
# a complete list of columns, execute:
# perldoc Bio::SearchIO::Writer::HitTableWriter.
#
# For more documentation about working with Blast result objects,
# see docs for these modules:
# Bio::Search::Result::BlastResult
# Bio::Search::Hit::BlastHit
# Bio::Search::HSP::BlastHSP
#
# For more documentation about the PSI-Blast parser, see docs for
# Bio::SearchIO::psiblast
#
# Author: Steve Chervitz <steve_chervitz@affymetrix.com>
# Revision: $Id$

use strict;
use lib '../../../';

use Bio::SearchIO;
use Bio::SearchIO::Writer::HitTableWriter;

# These are the columns that will be in the output table of BLAST results.
my @columns = qw(
query_name
query_length
hit_name
hit_length
num_hsps
expect
frac_identical_query
length_aln_query
gaps_total
strand_query
strand_hit
);

print STDERR "\nUsing SearchIO->new()\n";

# Note that all parameters for the $in, $out, and $writer objects are optional.
# Default in = STDIN; Default out = STDOUT; Default writer = all columns
# In this example, we're reading from STDIN and writing to a file
# called "hitwriter.out"
# TODO: write hitless reports to STDERR and note if filtered.
my $in = Bio::SearchIO->new( -format => 'psiblast',
-signif => 0.1,
-verbose=> 0 );
my $writer = Bio::SearchIO::Writer::HitTableWriter->new( -columns => \@columns
);
my $out = Bio::SearchIO->new( -format => 'psiblast',
-writer => $writer,
-file => ">hitwriter.out" );
# Need to keep a separate count of reports with hits
# to know when to include labels. The first report may be hitless,
# so we can't use $in->report_coun
my $hit_count = 0;
while ( my $blast = $in->next_result() ) {
printf STDERR "\nReport %d: $blast\n", $in->report_count;

if( $blast->hits ) {
$hit_count++;
$out->write_result($blast, $hit_count==1 );
}
else {
print STDERR "Hitless Blast Report ";
print STDERR ($blast->no_hits_found ? "\n" : "(filtered)\n");
}

## For a simple progress monitor, uncomment this line:
#print STDERR "."; print STDERR "\n" if $in->report_count % 50 == 0;
}

printf STDERR "\n%d Blast report(s) processed.\n", $in->report_count;
printf STDERR "Output sent to file: %s\n", $out->file if $out->file;

101 changes: 101 additions & 0 deletions examples/searchio/writer/hitwriter2.pl
@@ -0,0 +1,101 @@
#!/usr/local/bin/perl

# Example usage of a SearchIO::psiblast parser of traditional format Blast reports.
# Same as hitwriter.pl but showing how to use the -hit_filter option.
#
# Usage:
# STDIN: stream containing one or more BLAST or PSI-BLAST reports.
# STDOUT: none, but generates an output file "hitwriter2.out"
# containing tab-delimited data on a per-hit basis.
# STDERR: Progress info.
#
# For more documentation about the writer, including
# a complete list of columns, execute:
# perldoc Bio::SearchIO::Writer::HitTableWriter.
#
# For more documentation about working with Blast result objects,
# see docs foor these modules:
# Bio::Search::Result::BlastResult
# Bio::Search::Hit::BlastHit
# Bio::Search::HSP::BlastHSP
#
# For more documentation about the PSI-Blast parser, see docs for
# Bio::SearchIO::psiblast
#
# Author: Steve Chervitz <steve_chervitz@affymetrix.com>
# Revision: $Id$

use strict;

use lib '../../../';

use Bio::SearchIO;
use Bio::SearchIO::Writer::HitTableWriter;

# These are the columns that will be in the output table of BLAST results.
my @columns = qw(
query_name
query_length
hit_name
hit_length
num_hsps
expect
frac_identical_query
length_aln_query
gaps_total
strand_query
strand_hit
);

# These labels just override the defaults for the specified columns.
# (first column is 1).
my %labels = (
1 => 'QUERY_GI',
3 => 'HIT_IDENTIFIER',
);


# BLAST hit filtering function. All hits of each BLAST report must satisfy
# this criteria to be retained. If a hit fails this test, it is ignored.
# If all hits of a report fail, the report will be considered hitless,
# but we can output a "(filtered)" string to indicate that this is the reason.
my $filt_func = sub{ my $hit=shift;
$hit->frac_identical('query') >= 0.5
&& $hit->frac_aligned_query >= 0.50
};

print STDERR "\nUsing SearchIO->new()\n";

# Note that all parameters for the $in, $out, and $writer objects are optional.
# Default in = STDIN; Default out = STDOUT; Default writer = all columns
# In this example, we're reading from STDIN and writing to a file
# called "hitwriter.out"

my $in = Bio::SearchIO->new( -format => 'psiblast',
-hit_filter => $filt_func,);
my $writer = Bio::SearchIO::Writer::HitTableWriter->new( -columns => \@columns,
-labels => \%labels
);
my $out = Bio::SearchIO->new( -format => 'psiblast',
-writer => $writer,
-file => ">hitwriter2.out" );
my $hit_count = 0;
while ( my $blast = $in->next_result() ) {
printf STDERR "\nReport %d: $blast\n", $in->report_count;

if( $blast->hits ) {
$hit_count++;
$out->write_result($blast, $hit_count==1 );
}
else {
print STDERR "Hitless Blast Report: ";
print STDERR ($blast->no_hits_found ? "\n" : "(filtered)\n");
}

## For a simple progress monitor, uncomment this line:
#print STDERR "."; print STDERR "\n" if $in->report_count % 50 == 0;
}

printf STDERR "\n%d Blast report(s) processed.\n", $in->report_count;
printf STDERR "Output sent to file: %s\n", $out->file if $out->file;

0 comments on commit 55556b6

Please sign in to comment.