Permalink
Browse files

Examples for using Bio::SearchIO::psiblast and Bio::SearchIO::Writer::*

svn path=/bioperl-live/branches/steve_chervitz/; revision=3140
  • Loading branch information...
1 parent 4aca2a1 commit 55556b6333d8ba7a7585030f8015a13cdf3b45fc @trutane trutane committed Dec 28, 2001
@@ -0,0 +1,56 @@
+#!/usr/local/bin/perl
+
+# Example usage of a SearchIO::psiblast parser of traditional format Blast
+# and PSI-Blast reports.
+# Illustrates how to grab a set of SeqFeatures from a Blast report.
+# This parser represents a new and improved version of Bio/Tools/Blast.pm.
+#
+# Usage:
+# STDIN: stream containing one or more BLAST or PSI-BLAST reports.
+# STDOUT: feature start, end data
+# STDERR: Processing info, such as the number of reports processed
+# and the number of hitless reports.
+#
+# For more documentation about working with Blast result objects,
+# see to documentation for these modules:
+# Bio::Search::Result::BlastResult
+# Bio::Search::Hit::BlastHit
+# Bio::Search::HSP::BlastHSP
+#
+# For more documentation about the PSI-Blast parser, see docs for
+# Bio::SearchIO::psiblast
+#
+# Author: Steve Chervitz <steve_chervitz@affymetrix.com>
+# Revision: $Id$
+
+use strict;
+use lib '../../../';
+use Bio::SearchIO;
+
+my $in = Bio::SearchIO->new( -format => 'psiblast',
+ -signif => 0.1,
+ -verbose => 0 );
+my @hitless_reports = ();
+
+while ( my $blast = $in->next_result() ) {
+
+ if( $blast->hits ) {
+ while( my $feature = $blast->next_feature() ) {
+ print "Feature from ", $feature->start, " to ", $feature->end, "\n";
+ }
+ }
+ else {
+ push @hitless_reports, $blast;
+ }
+}
+
+printf STDERR "\n%d Blast report(s) processed.\n", $in->report_count;
+printf STDERR "\n%d reports had no hits:\n", scalar(@hitless_reports);
+
+foreach my $blast (@hitless_reports) {
+ print STDERR "No hits for query ", $blast->query_name;
+ print STDERR ($blast->no_hits_found ? "\n" : "(filtered)\n")
+;
+}
+
+
@@ -0,0 +1,100 @@
+#!/usr/bin/env perl
+
+# Example usage of a SearchIO::psiblast parser of traditional format Blast reports.
+# Here we define a custom SearchWriterI object that ouputs just the data we want
+# from each BLAST report.
+#
+# NOTE: If you just want pick and choose which columns you want
+# in the output table, you don't need to create your own custom
+# SearchWriterI implementation as we do here. HitTableWriter and HSPTableWriter
+# are configurable as to what columns and order you want.
+# The hitwriter*.pl and hspwriter.pl examples in this directory
+# illustrate this.
+#
+# For a complete list of columns, see the docs for these modules:
+# Bio::SearchIO::Writer::HitTableWriter
+# Bio::SearchIO::Writer::HSPTableWriter
+#
+# This example serves as an illustration of how to use the
+# SearchWriterI api and plug it in to a SearchIO parser,
+# which you may want to do if you want to generate data column(s)
+# not provided by the available writers.
+#
+# Usage:
+# STDIN: stream containing one or more BLAST or PSI-BLAST reports.
+# STDOUT: none, but generates an output file "custom_writer.out"
+# containing tab-delimited data on a per-hit basis.
+# STDERR: Progress info.
+#
+# Author: Steve Chervitz <steve_chervitz@affymetrix.com>
+# Revision: $Id$
+
+package MyBlastWriter;
+
+use strict;
+use lib '../../../';
+use Bio::Root::Root;
+use Bio::SearchIO::SearchWriterI;
+
+use vars qw( @ISA );
+@ISA = qw( Bio::Root::Root Bio::SearchIO::SearchWriterI );
+
+sub to_string {
+ my ($self, $result, @args) = @_;
+ my $str = '';
+
+ my $hits_reported = 0;
+
+ foreach my $hit($result->hits) {
+
+ # If this is a PSI-BLAST report, only report novel hits
+ if( $result->psiblast ) {
+ # Note that we could have supplied this has a -HIT_FILTER function
+ # when we defined our input SearchIO object. Then we wouldn't need
+ # to define a custom writer.
+ next unless $hit->iteration > 1 and not $hit->found_again;
+ }
+
+ $hits_reported++;
+ printf STDERR "$hit\n";
+
+ $str .= sprintf "%s\t%d\t%s\t%d\t%.2f\t%d\t%.1e\t%d\t%d\t%d\t%d\t%s\n",
+ $result->query_name, $result->query_length, $hit->name,
+ $hit->length, $hit->frac_identical('query'), $hit->length_aln,
+ $hit->expect, $hit->score, $hit->bits,
+ $hit->gaps('total'), $hit->num_hsps, $hit->iteration || '-';
+ }
+
+ printf STDERR "\n%d hits written\n", $hits_reported;
+
+ $str;
+
+}
+
+package main;
+
+#===================================================
+# Start of script
+#===================================================
+
+use strict;
+
+use lib '../../../';
+use Bio::SearchIO;
+
+select STDOUT; $|=1;
+
+my $in = Bio::SearchIO->new( -format => 'psiblast', -signif => 0.1 );
+my $writer = MyBlastWriter->new();
+my $out = Bio::SearchIO->new( -format => 'psiblast',
+ -writer => $writer,
+ -file => ">custom_writer.out" );
+
+while ( my $result = $in->next_result() ) {
+ printf STDERR "Report %d: $result\n", $in->report_count;
+ $out->write_result($result);
+}
+
+printf STDERR "\n%d Results processed.\n", $in->report_count;
+printf STDERR "Output sent to file: %s\n", $out->file if $out->file;
+
@@ -0,0 +1,93 @@
+#!/usr/bin/perl
+
+# Example usage of a SearchIO::psiblast parser of traditional format Blast reports.
+# This parser represents a new and improved version of Bio/Tools/Blast.pm.
+#
+# Usage:
+# STDIN: stream containing one or more BLAST or PSI-BLAST reports.
+# STDOUT: none, but generates an output file "hitwriter.out"
+# containing tab-delimited data on a per-hit basis.
+# STDERR: Progress info.
+#
+# In this example, we create a SearchIO parser that screens out hits
+# based on expect (or P) scores and a default HitTableWriter. This writer
+# provides the same functionality as the original Bio::Tools::Blast::table()
+# function (i.e., a tab-delimited summary of each hit per row).
+# HitTableWriter, however, is customizable so you can specify just the columns
+# you want to have in the output table.
+#
+# For more documentation about the writer, including
+# a complete list of columns, execute:
+# perldoc Bio::SearchIO::Writer::HitTableWriter.
+#
+# For more documentation about working with Blast result objects,
+# see docs for these modules:
+# Bio::Search::Result::BlastResult
+# Bio::Search::Hit::BlastHit
+# Bio::Search::HSP::BlastHSP
+#
+# For more documentation about the PSI-Blast parser, see docs for
+# Bio::SearchIO::psiblast
+#
+# Author: Steve Chervitz <steve_chervitz@affymetrix.com>
+# Revision: $Id$
+
+use strict;
+use lib '../../../';
+
+use Bio::SearchIO;
+use Bio::SearchIO::Writer::HitTableWriter;
+
+# These are the columns that will be in the output table of BLAST results.
+my @columns = qw(
+ query_name
+ query_length
+ hit_name
+ hit_length
+ num_hsps
+ expect
+ frac_identical_query
+ length_aln_query
+ gaps_total
+ strand_query
+ strand_hit
+ );
+
+print STDERR "\nUsing SearchIO->new()\n";
+
+# Note that all parameters for the $in, $out, and $writer objects are optional.
+# Default in = STDIN; Default out = STDOUT; Default writer = all columns
+# In this example, we're reading from STDIN and writing to a file
+# called "hitwriter.out"
+# TODO: write hitless reports to STDERR and note if filtered.
+my $in = Bio::SearchIO->new( -format => 'psiblast',
+ -signif => 0.1,
+ -verbose=> 0 );
+my $writer = Bio::SearchIO::Writer::HitTableWriter->new( -columns => \@columns
+ );
+my $out = Bio::SearchIO->new( -format => 'psiblast',
+ -writer => $writer,
+ -file => ">hitwriter.out" );
+# Need to keep a separate count of reports with hits
+# to know when to include labels. The first report may be hitless,
+# so we can't use $in->report_coun
+my $hit_count = 0;
+while ( my $blast = $in->next_result() ) {
+ printf STDERR "\nReport %d: $blast\n", $in->report_count;
+
+ if( $blast->hits ) {
+ $hit_count++;
+ $out->write_result($blast, $hit_count==1 );
+ }
+ else {
+ print STDERR "Hitless Blast Report ";
+ print STDERR ($blast->no_hits_found ? "\n" : "(filtered)\n");
+ }
+
+ ## For a simple progress monitor, uncomment this line:
+ #print STDERR "."; print STDERR "\n" if $in->report_count % 50 == 0;
+}
+
+printf STDERR "\n%d Blast report(s) processed.\n", $in->report_count;
+printf STDERR "Output sent to file: %s\n", $out->file if $out->file;
+
@@ -0,0 +1,101 @@
+#!/usr/local/bin/perl
+
+# Example usage of a SearchIO::psiblast parser of traditional format Blast reports.
+# Same as hitwriter.pl but showing how to use the -hit_filter option.
+#
+# Usage:
+# STDIN: stream containing one or more BLAST or PSI-BLAST reports.
+# STDOUT: none, but generates an output file "hitwriter2.out"
+# containing tab-delimited data on a per-hit basis.
+# STDERR: Progress info.
+#
+# For more documentation about the writer, including
+# a complete list of columns, execute:
+# perldoc Bio::SearchIO::Writer::HitTableWriter.
+#
+# For more documentation about working with Blast result objects,
+# see docs foor these modules:
+# Bio::Search::Result::BlastResult
+# Bio::Search::Hit::BlastHit
+# Bio::Search::HSP::BlastHSP
+#
+# For more documentation about the PSI-Blast parser, see docs for
+# Bio::SearchIO::psiblast
+#
+# Author: Steve Chervitz <steve_chervitz@affymetrix.com>
+# Revision: $Id$
+
+use strict;
+
+use lib '../../../';
+
+use Bio::SearchIO;
+use Bio::SearchIO::Writer::HitTableWriter;
+
+# These are the columns that will be in the output table of BLAST results.
+my @columns = qw(
+ query_name
+ query_length
+ hit_name
+ hit_length
+ num_hsps
+ expect
+ frac_identical_query
+ length_aln_query
+ gaps_total
+ strand_query
+ strand_hit
+ );
+
+# These labels just override the defaults for the specified columns.
+# (first column is 1).
+my %labels = (
+ 1 => 'QUERY_GI',
+ 3 => 'HIT_IDENTIFIER',
+ );
+
+
+# BLAST hit filtering function. All hits of each BLAST report must satisfy
+# this criteria to be retained. If a hit fails this test, it is ignored.
+# If all hits of a report fail, the report will be considered hitless,
+# but we can output a "(filtered)" string to indicate that this is the reason.
+my $filt_func = sub{ my $hit=shift;
+ $hit->frac_identical('query') >= 0.5
+ && $hit->frac_aligned_query >= 0.50
+ };
+
+print STDERR "\nUsing SearchIO->new()\n";
+
+# Note that all parameters for the $in, $out, and $writer objects are optional.
+# Default in = STDIN; Default out = STDOUT; Default writer = all columns
+# In this example, we're reading from STDIN and writing to a file
+# called "hitwriter.out"
+
+my $in = Bio::SearchIO->new( -format => 'psiblast',
+ -hit_filter => $filt_func,);
+my $writer = Bio::SearchIO::Writer::HitTableWriter->new( -columns => \@columns,
+ -labels => \%labels
+ );
+my $out = Bio::SearchIO->new( -format => 'psiblast',
+ -writer => $writer,
+ -file => ">hitwriter2.out" );
+my $hit_count = 0;
+while ( my $blast = $in->next_result() ) {
+ printf STDERR "\nReport %d: $blast\n", $in->report_count;
+
+ if( $blast->hits ) {
+ $hit_count++;
+ $out->write_result($blast, $hit_count==1 );
+ }
+ else {
+ print STDERR "Hitless Blast Report: ";
+ print STDERR ($blast->no_hits_found ? "\n" : "(filtered)\n");
+ }
+
+ ## For a simple progress monitor, uncomment this line:
+ #print STDERR "."; print STDERR "\n" if $in->report_count % 50 == 0;
+}
+
+printf STDERR "\n%d Blast report(s) processed.\n", $in->report_count;
+printf STDERR "Output sent to file: %s\n", $out->file if $out->file;
+
Oops, something went wrong.

0 comments on commit 55556b6

Please sign in to comment.