Permalink
Browse files

This commit was manufactured by cvs2svn to create tag

'join-0-04-to-0-05'.

svn path=/bioperl-live/tags/join-0-04-to-0-05/; revision=948
  • Loading branch information...
1 parent de669cf commit 7382c905e89db840b655fbd46447482ca7bb1caa nobody committed Apr 12, 1999
@@ -2,7 +2,7 @@
=head1 NAME
-Bio::DB::BioSeqI - Abstract interface for a sequence database
+Bio::DB::Abstract - Abstract definition of a database
=head1 SYNOPSIS
@@ -12,16 +12,12 @@ Bio::DB::BioSeqI - Abstract interface for a sequence database
$seq = $db->get_Seq_by_id('ROA1_HUMAN');
- #
- # $seq is a Bio::Seq object
- #
-
=head1 DESCRIPTION
-This is a pure interface class - in other words, all this does is define
+This is a purely abstract class - in other words, all this does is define
methods which other (concrete) classes will actually implement.
-The Bio::DB::BioSeqI class defines what methods a generic database class
+The Bio::DB::Abstract class defines what methods a generic database class
should have. At the moment it is just the ability to make Bio::Seq objects
from a name (id) or a accession number.
@@ -47,7 +43,7 @@ The rest of the documentation details each of the object methods. Internal metho
# Let the code begin...
-package Bio::DB::BioSeqI;
+package Bio::DB::Abstract;
use vars qw($AUTOLOAD @ISA @EXPORT_OK);
use strict;
@@ -57,7 +53,6 @@ use Bio::Root::Object;
@ISA = qw(Bio::Root::Object Exporter);
@EXPORT_OK = qw();
-
# new() is inherited from Bio::Root::Object
# _initialize is where the heavy stuff will happen when new is called
View
@@ -1,4 +1,4 @@
-# test
+
#
# BioPerl module for Bio::DB::GenBank
#
@@ -29,10 +29,6 @@ Bio::DB::GenBank - Database object interface to GenBank
Allows the dynamic retrieval of Sequence objects (Bio::Seq) from the GenBank
database at NCBI, via an Entrez query.
-WARNING: Please do NOT spam the Entrez web server with multiple requests.
-NCBI offers Batch Entrez for this purpose. Batch Entrez support will likely
-be supported in a future version of DB::GenBank.
-
=head1 FEEDBACK
=head2 Mailing Lists
@@ -73,13 +69,12 @@ use strict;
# Object preamble - inherits from Bio::DB::Abstract
-use Bio::DB::BioSeqI;
+use Bio::DB::Abstract;
use Bio::SeqIO;
use IO::Socket;
-use IO::File;
-@ISA = qw(Bio::DB::BioSeqI Exporter);
+@ISA = qw(Bio::DB::Abstract Exporter);
@EXPORT_OK = qw();
# new() is inherited from Bio::DB::Abstract
@@ -108,7 +103,7 @@ sub _initialize {
sub get_Seq_by_id {
my $self = shift;
- my $uid = shift or $self->throw("Must supply an identifier!\n");
+ my $uid = shift or $self-throw("Must supply an identifier!\n");
my $entrez = "db=n&form=6&dopt=f&html=no&title=no&uid=$uid";
@@ -130,7 +125,7 @@ sub get_Seq_by_id {
sub get_Seq_by_acc {
my $self = shift;
- my $acc = shift or $self->throw("Must supply an accesion number!\n");
+ my $acc = shift or $self-throw("Must supply an accesion number!\n");
return $self->get_Seq_by_id($acc);
}
@@ -168,82 +163,18 @@ sub get_Stream_by_id {
Returns : a Bio::SeqIO stream object
Args : $ref : a reference to an array of accession numbers for
the desired sequence entries
- Note : For GenBank, this just calls the same code for get_Stream_by_id()
+ Note : For GenPept, this just calls the same code for get_Stream_by_id()
=cut
sub get_Stream_by_acc {
my $self = shift;
- my $acc = shift or $self->throw("Must supply an accession number!\n");
+ my $acc = shift or $self->throw("Must supply an accesion number!\n");
return $self->get_Seq_by_id($acc);
}
-=head2 get_Stream_by_batch
-
- Title : get_Stream_by_batch
- Usage : $seq = $db->get_Stream_by_batch($ref);
- Function: Retrieves Seq objects from Entrez 'en masse', rather than one
- at a time. For large numbers of sequences, this is far superior
- than get_Stream_by_[id/acc]().
- Example :
- Returns : a Bio::SeqIO stream object
- Args : $ref : either an array reference, a filename, or a filehandle
- from which to get the list of unique id's/accession numbers.
-
-
-=cut
-
-sub get_Stream_by_batch {
- my $self = shift;
- my $ref = shift or $self->throw("Must supply an argument!\n");
- my $which = ref($ref);
- my $fh;
- my $filename;
- if ( $which eq 'ARRAY') { # $ref is an array reference
- $fh = new_tmpfile IO::File;
- for ( @{$ref} ) {
- print $fh $_ . "\n";
- }
- $filename = "tempfile.txt";
- } elsif ( $which eq '') { # $ref is a filename
- $fh = new IO::File $ref, "r";
- $filename = $ref;
- } elsif ( $which eq 'GLOB' or $which eq 'IO::File') { # $ref is assumed to be a filehandle
- $fh = $ref;
- $filename = "tempfile.txt";
- }
-
- my $wwwbuf = "DB=n&REQUEST_TYPE=LIST_OF_GIS&FORMAT=1&HTML=FALSE&SAVETO=FALSE&NOHEADER=TRUE&UID=" . join(',', grep { chomp; } <$fh> );
-
- my $sock = $self->_get_sock();
-
- select $sock;
- print "POST /cgi-bin/Entrez/qserver.cgi HTTP/1.0\015\012";
- print "Host: www.ncbi.nlm.nih.gov\015\012";
- print "User-Agent: $0::Bio::DB::GenBank\015\012";
- print "Connection: Keep-Alive\015\012";
- print "Content-type: application/x-www-form-urlencoded\015\012";
- print "Content-length: " . length($wwwbuf) . "\015\012";
- print "\015\012";
- print $wwwbuf;
-
- while (<$sock>) {
- if ( m,^HTTP/\d+\.\d+\s+(\d+)[^\012]\012, ) {
- my $code = $1;
- return undef unless $code =~ /^2/;
- }
- $self->throw("Entrez Error - check query sequences!\n") if m/^ERROR/i;
- last if m/Batch Entrez results/;
- }
-
- return Bio::SeqIO->new('-fh' => $sock, '-format' => 'Fasta');
-
-}
-
-
-
sub _get_stream {
my($self, $entrez) = @_;
@@ -252,11 +183,20 @@ sub _get_stream {
# Gisle Aas and Martijn Koster. They copyleft'ed it, but we should give
# them full credit for this little diddy.
- my $sock = $self->_get_sock();
+ my $sock = IO::Socket::INET->new(PeerAddr => 'www3.ncbi.nlm.nih.gov',
+ PeerPort => 80,
+ Proto => 'tcp',
+ Timeout => 60
+ );
+ unless ($sock) {
+ $@ =~ s/^.*?: //;
+ $self->throw("Can't connect to GenBank ($@)\n");
+ }
+ $sock->autoflush(); # just for safety's sake if they have old IO::Socket
print $sock join("\015\012" =>
"GET /htbin-post/Entrez/query?$entrez HTTP/1.0",
- "Host: www.ncbi.nlm.nih.gov",
+ "Host: www3.ncbi.nlm.nih.gov",
"User-Agent: $0::Bio::DB::GenBank",
"", "");
@@ -269,24 +209,8 @@ sub _get_stream {
last if m/^------/; # Kludgy, but it's how L. Stein does Boulder too
}
- return Bio::SeqIO->new('-fh' => $sock, '-format' => 'Fasta');
-
-}
-
-sub _get_sock {
- my $self = shift;
- my $sock = IO::Socket::INET->new(PeerAddr => 'www.ncbi.nlm.nih.gov',
- PeerPort => 80,
- Proto => 'tcp',
- Timeout => 60
- );
- unless ($sock) {
- $@ =~ s/^.*?: //;
- $self->throw("Can't connect to GenBank ($@)\n");
- }
- $sock->autoflush(); # just for safety's sake if they have old IO::Socket
+ return Bio::SeqIO->new(-fh => $sock, -format => 'Fasta');
- return $sock;
}
View
@@ -29,10 +29,6 @@ Bio::DB::GenPept - Database object interface to GenPept
Allows the dynamic retrieval of Sequence objects (Bio::Seq) from the GenPept
database at NCBI, via an Entrez query.
-WARNING: Please do NOT spam the Entrez web server with multiple requests.
-NCBI offers Batch Entrez for this purpose. Batch Entrez support will likely
-be supported in a future version of DB::GenPept.
-
=head1 FEEDBACK
=head2 Mailing Lists
@@ -71,17 +67,17 @@ package Bio::DB::GenPept;
use vars qw($AUTOLOAD @ISA @EXPORT_OK);
use strict;
-# Object preamble - inherits from Bio::DB::BioSeqI
+# Object preamble - inherits from Bio::DB::Abstract
-use Bio::DB::BioSeqI;
+use Bio::DB::Abstract;
use Bio::SeqIO;
use IO::Socket;
-@ISA = qw(Bio::DB::BioSeqI Exporter);
+@ISA = qw(Bio::DB::Abstract Exporter);
@EXPORT_OK = qw();
-# new() is inherited from Bio::DB::BioSeqI
+# new() is inherited from Bio::DB::Abstract
# _initialize is where the heavy stuff will happen when new is called
@@ -179,68 +175,6 @@ sub get_Stream_by_acc {
return $self->get_Seq_by_id($acc);
}
-=head2 get_Stream_by_batch
-
- Title : get_Stream_by_batch
- Usage : $seq = $db->get_Stream_by_batch($ref);
- Function: Retrieves Seq objects from Entrez 'en masse', rather than one
- at a time. For large numbers of sequences, this is far superior
- than get_Stream_by_[id/acc]().
- Example :
- Returns : a Bio::SeqIO stream object
- Args : $ref : either an array reference, a filename, or a filehandle
- from which to get the list of unique id's/accession numbers.
-
-
-=cut
-
-sub get_Stream_by_batch {
- my $self = shift;
- my $ref = shift or $self->throw("Must supply an argument!\n");
- my $which = ref($ref);
- my $fh;
- my $filename;
- if ( $which eq 'ARRAY') { # $ref is an array reference
- $fh = new_tmpfile IO::File;
- for ( @{$ref} ) {
- print $fh $_ . "\n";
- }
- $filename = "tempfile.txt";
- } elsif ( $which eq '') { # $ref is a filename
- $fh = new IO::File $ref, "r";
- $filename = $ref;
- } elsif ( $which eq 'GLOB' or $which eq 'IO::File') { # $ref is assumed to be a filehandle
- $fh = $ref;
- $filename = "tempfile.txt";
- }
-
- my $wwwbuf = "DB=n&REQUEST_TYPE=LIST_OF_GIS&FORMAT=1&HTML=FALSE&SAVETO=FALSE&NOHEADER=TRUE&UID=" . join(',', grep { chomp; } <$fh> );
-
- my $sock = $self->_get_sock();
-
- select $sock;
- print "POST /cgi-bin/Entrez/qserver.cgi HTTP/1.0\015\012";
- print "Host: www.ncbi.nlm.nih.gov\015\012";
- print "User-Agent: $0::Bio::DB::GenBank\015\012";
- print "Connection: Keep-Alive\015\012";
- print "Content-type: application/x-www-form-urlencoded\015\012";
- print "Content-length: " . length($wwwbuf) . "\015\012";
- print "\015\012";
- print $wwwbuf;
-
- while (<$sock>) {
- if ( m,^HTTP/\d+\.\d+\s+(\d+)[^\012]\012, ) {
- my $code = $1;
- return undef unless $code =~ /^2/;
- }
- $self->throw("Entrez Error - check query sequences!\n") if m/^ERROR/i;
- last if m/Batch Entrez results/;
- }
-
- return Bio::SeqIO->new('-fh' => $sock, '-format' => 'Fasta');
-
-}
-
sub _get_stream {
my($self, $entrez) = @_;
@@ -249,12 +183,21 @@ sub _get_stream {
# Gisle Aas and Martijn Koster. They copyleft'ed it, but we should give
# them full credit for this little diddy.
- my $sock = $self->_get_sock();
+ my $sock = IO::Socket::INET->new(PeerAddr => 'www3.ncbi.nlm.nih.gov',
+ PeerPort => 80,
+ Proto => 'tcp',
+ Timeout => 60
+ );
+ unless ($sock) {
+ $@ =~ s/^.*?: //;
+ $self->throw("Can't connect to GenPept ($@)\n");
+ }
+ $sock->autoflush(); # just for safety's sake if they have old IO::Socket
print $sock join("\015\012" =>
"GET /htbin-post/Entrez/query?$entrez HTTP/1.0",
- "Host: www.ncbi.nlm.nih.gov",
- "User-Agent: $0::Bio::DB::GenPept",
+ "Host: www3.ncbi.nlm.nih.gov",
+ "User-Agent: $0::Bio::DB::GenBank",
"", "");
while(<$sock>) {
@@ -266,41 +209,11 @@ sub _get_stream {
last if m/^------/; # Kludgy, but it's how L. Stein does Boulder too
}
- return Bio::SeqIO->new('-fh' => $sock, '-format' => 'Fasta');
+ return Bio::SeqIO->new(-fh => $sock, -format => 'Fasta');
}
-sub _get_sock {
- my $self = shift;
- my $sock = IO::Socket::INET->new(PeerAddr => 'www.ncbi.nlm.nih.gov',
- PeerPort => 80,
- Proto => 'tcp',
- Timeout => 60
- );
- unless ($sock) {
- $@ =~ s/^.*?: //;
- $self->throw("Can't connect to GenBank ($@)\n");
- }
- $sock->autoflush(); # just for safety's sake if they have old IO::Socket
-
- return $sock;
-}
1;
__END__
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Oops, something went wrong. Retry.

0 comments on commit 7382c90

Please sign in to comment.