Skip to content

Commit

Permalink
This commit was manufactured by cvs2svn to create tag
Browse files Browse the repository at this point in the history
'join-0-04-to-0-05'.

svn path=/bioperl-live/tags/join-0-04-to-0-05/; revision=948
  • Loading branch information
nobody committed Apr 12, 1999
1 parent de669cf commit 7382c90
Show file tree
Hide file tree
Showing 46 changed files with 3,630 additions and 3,830 deletions.
13 changes: 4 additions & 9 deletions Bio/DB/BioSeqI.pm → Bio/DB/Abstract.pm
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

=head1 NAME
Bio::DB::BioSeqI - Abstract interface for a sequence database
Bio::DB::Abstract - Abstract definition of a database
=head1 SYNOPSIS
Expand All @@ -12,16 +12,12 @@ Bio::DB::BioSeqI - Abstract interface for a sequence database
$seq = $db->get_Seq_by_id('ROA1_HUMAN');
#
# $seq is a Bio::Seq object
#
=head1 DESCRIPTION
This is a pure interface class - in other words, all this does is define
This is a purely abstract class - in other words, all this does is define
methods which other (concrete) classes will actually implement.
The Bio::DB::BioSeqI class defines what methods a generic database class
The Bio::DB::Abstract class defines what methods a generic database class
should have. At the moment it is just the ability to make Bio::Seq objects
from a name (id) or a accession number.
Expand All @@ -47,7 +43,7 @@ The rest of the documentation details each of the object methods. Internal metho
# Let the code begin...


package Bio::DB::BioSeqI;
package Bio::DB::Abstract;
use vars qw($AUTOLOAD @ISA @EXPORT_OK);
use strict;

Expand All @@ -57,7 +53,6 @@ use Bio::Root::Object;

@ISA = qw(Bio::Root::Object Exporter);
@EXPORT_OK = qw();

# new() is inherited from Bio::Root::Object

# _initialize is where the heavy stuff will happen when new is called
Expand Down
114 changes: 19 additions & 95 deletions Bio/DB/GenBank.pm
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# test

#
# BioPerl module for Bio::DB::GenBank
#
Expand Down Expand Up @@ -29,10 +29,6 @@ Bio::DB::GenBank - Database object interface to GenBank
Allows the dynamic retrieval of Sequence objects (Bio::Seq) from the GenBank
database at NCBI, via an Entrez query.
WARNING: Please do NOT spam the Entrez web server with multiple requests.
NCBI offers Batch Entrez for this purpose. Batch Entrez support will likely
be supported in a future version of DB::GenBank.
=head1 FEEDBACK
=head2 Mailing Lists
Expand Down Expand Up @@ -73,13 +69,12 @@ use strict;

# Object preamble - inherits from Bio::DB::Abstract

use Bio::DB::BioSeqI;
use Bio::DB::Abstract;

use Bio::SeqIO;
use IO::Socket;
use IO::File;

@ISA = qw(Bio::DB::BioSeqI Exporter);
@ISA = qw(Bio::DB::Abstract Exporter);
@EXPORT_OK = qw();

# new() is inherited from Bio::DB::Abstract
Expand Down Expand Up @@ -108,7 +103,7 @@ sub _initialize {
sub get_Seq_by_id {

my $self = shift;
my $uid = shift or $self->throw("Must supply an identifier!\n");
my $uid = shift or $self-throw("Must supply an identifier!\n");

my $entrez = "db=n&form=6&dopt=f&html=no&title=no&uid=$uid";

Expand All @@ -130,7 +125,7 @@ sub get_Seq_by_id {
sub get_Seq_by_acc {

my $self = shift;
my $acc = shift or $self->throw("Must supply an accesion number!\n");
my $acc = shift or $self-throw("Must supply an accesion number!\n");

return $self->get_Seq_by_id($acc);
}
Expand Down Expand Up @@ -168,82 +163,18 @@ sub get_Stream_by_id {
Returns : a Bio::SeqIO stream object
Args : $ref : a reference to an array of accession numbers for
the desired sequence entries
Note : For GenBank, this just calls the same code for get_Stream_by_id()
Note : For GenPept, this just calls the same code for get_Stream_by_id()
=cut

sub get_Stream_by_acc {

my $self = shift;
my $acc = shift or $self->throw("Must supply an accession number!\n");
my $acc = shift or $self->throw("Must supply an accesion number!\n");

return $self->get_Seq_by_id($acc);
}

=head2 get_Stream_by_batch
Title : get_Stream_by_batch
Usage : $seq = $db->get_Stream_by_batch($ref);
Function: Retrieves Seq objects from Entrez 'en masse', rather than one
at a time. For large numbers of sequences, this is far superior
than get_Stream_by_[id/acc]().
Example :
Returns : a Bio::SeqIO stream object
Args : $ref : either an array reference, a filename, or a filehandle
from which to get the list of unique id's/accession numbers.
=cut

sub get_Stream_by_batch {
my $self = shift;
my $ref = shift or $self->throw("Must supply an argument!\n");
my $which = ref($ref);
my $fh;
my $filename;
if ( $which eq 'ARRAY') { # $ref is an array reference
$fh = new_tmpfile IO::File;
for ( @{$ref} ) {
print $fh $_ . "\n";
}
$filename = "tempfile.txt";
} elsif ( $which eq '') { # $ref is a filename
$fh = new IO::File $ref, "r";
$filename = $ref;
} elsif ( $which eq 'GLOB' or $which eq 'IO::File') { # $ref is assumed to be a filehandle
$fh = $ref;
$filename = "tempfile.txt";
}

my $wwwbuf = "DB=n&REQUEST_TYPE=LIST_OF_GIS&FORMAT=1&HTML=FALSE&SAVETO=FALSE&NOHEADER=TRUE&UID=" . join(',', grep { chomp; } <$fh> );

my $sock = $self->_get_sock();

select $sock;
print "POST /cgi-bin/Entrez/qserver.cgi HTTP/1.0\015\012";
print "Host: www.ncbi.nlm.nih.gov\015\012";
print "User-Agent: $0::Bio::DB::GenBank\015\012";
print "Connection: Keep-Alive\015\012";
print "Content-type: application/x-www-form-urlencoded\015\012";
print "Content-length: " . length($wwwbuf) . "\015\012";
print "\015\012";
print $wwwbuf;

while (<$sock>) {
if ( m,^HTTP/\d+\.\d+\s+(\d+)[^\012]\012, ) {
my $code = $1;
return undef unless $code =~ /^2/;
}
$self->throw("Entrez Error - check query sequences!\n") if m/^ERROR/i;
last if m/Batch Entrez results/;
}

return Bio::SeqIO->new('-fh' => $sock, '-format' => 'Fasta');

}



sub _get_stream {

my($self, $entrez) = @_;
Expand All @@ -252,11 +183,20 @@ sub _get_stream {
# Gisle Aas and Martijn Koster. They copyleft'ed it, but we should give
# them full credit for this little diddy.

my $sock = $self->_get_sock();
my $sock = IO::Socket::INET->new(PeerAddr => 'www3.ncbi.nlm.nih.gov',
PeerPort => 80,
Proto => 'tcp',
Timeout => 60
);
unless ($sock) {
$@ =~ s/^.*?: //;
$self->throw("Can't connect to GenBank ($@)\n");
}
$sock->autoflush(); # just for safety's sake if they have old IO::Socket

print $sock join("\015\012" =>
"GET /htbin-post/Entrez/query?$entrez HTTP/1.0",
"Host: www.ncbi.nlm.nih.gov",
"Host: www3.ncbi.nlm.nih.gov",
"User-Agent: $0::Bio::DB::GenBank",
"", "");

Expand All @@ -269,24 +209,8 @@ sub _get_stream {
last if m/^------/; # Kludgy, but it's how L. Stein does Boulder too
}

return Bio::SeqIO->new('-fh' => $sock, '-format' => 'Fasta');

}

sub _get_sock {
my $self = shift;
my $sock = IO::Socket::INET->new(PeerAddr => 'www.ncbi.nlm.nih.gov',
PeerPort => 80,
Proto => 'tcp',
Timeout => 60
);
unless ($sock) {
$@ =~ s/^.*?: //;
$self->throw("Can't connect to GenBank ($@)\n");
}
$sock->autoflush(); # just for safety's sake if they have old IO::Socket
return Bio::SeqIO->new(-fh => $sock, -format => 'Fasta');

return $sock;
}


Expand Down
121 changes: 17 additions & 104 deletions Bio/DB/GenPept.pm
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,6 @@ Bio::DB::GenPept - Database object interface to GenPept
Allows the dynamic retrieval of Sequence objects (Bio::Seq) from the GenPept
database at NCBI, via an Entrez query.
WARNING: Please do NOT spam the Entrez web server with multiple requests.
NCBI offers Batch Entrez for this purpose. Batch Entrez support will likely
be supported in a future version of DB::GenPept.
=head1 FEEDBACK
=head2 Mailing Lists
Expand Down Expand Up @@ -71,17 +67,17 @@ package Bio::DB::GenPept;
use vars qw($AUTOLOAD @ISA @EXPORT_OK);
use strict;

# Object preamble - inherits from Bio::DB::BioSeqI
# Object preamble - inherits from Bio::DB::Abstract

use Bio::DB::BioSeqI;
use Bio::DB::Abstract;

use Bio::SeqIO;
use IO::Socket;

@ISA = qw(Bio::DB::BioSeqI Exporter);
@ISA = qw(Bio::DB::Abstract Exporter);
@EXPORT_OK = qw();

# new() is inherited from Bio::DB::BioSeqI
# new() is inherited from Bio::DB::Abstract

# _initialize is where the heavy stuff will happen when new is called

Expand Down Expand Up @@ -179,68 +175,6 @@ sub get_Stream_by_acc {
return $self->get_Seq_by_id($acc);
}

=head2 get_Stream_by_batch
Title : get_Stream_by_batch
Usage : $seq = $db->get_Stream_by_batch($ref);
Function: Retrieves Seq objects from Entrez 'en masse', rather than one
at a time. For large numbers of sequences, this is far superior
than get_Stream_by_[id/acc]().
Example :
Returns : a Bio::SeqIO stream object
Args : $ref : either an array reference, a filename, or a filehandle
from which to get the list of unique id's/accession numbers.
=cut

sub get_Stream_by_batch {
my $self = shift;
my $ref = shift or $self->throw("Must supply an argument!\n");
my $which = ref($ref);
my $fh;
my $filename;
if ( $which eq 'ARRAY') { # $ref is an array reference
$fh = new_tmpfile IO::File;
for ( @{$ref} ) {
print $fh $_ . "\n";
}
$filename = "tempfile.txt";
} elsif ( $which eq '') { # $ref is a filename
$fh = new IO::File $ref, "r";
$filename = $ref;
} elsif ( $which eq 'GLOB' or $which eq 'IO::File') { # $ref is assumed to be a filehandle
$fh = $ref;
$filename = "tempfile.txt";
}

my $wwwbuf = "DB=n&REQUEST_TYPE=LIST_OF_GIS&FORMAT=1&HTML=FALSE&SAVETO=FALSE&NOHEADER=TRUE&UID=" . join(',', grep { chomp; } <$fh> );

my $sock = $self->_get_sock();

select $sock;
print "POST /cgi-bin/Entrez/qserver.cgi HTTP/1.0\015\012";
print "Host: www.ncbi.nlm.nih.gov\015\012";
print "User-Agent: $0::Bio::DB::GenBank\015\012";
print "Connection: Keep-Alive\015\012";
print "Content-type: application/x-www-form-urlencoded\015\012";
print "Content-length: " . length($wwwbuf) . "\015\012";
print "\015\012";
print $wwwbuf;

while (<$sock>) {
if ( m,^HTTP/\d+\.\d+\s+(\d+)[^\012]\012, ) {
my $code = $1;
return undef unless $code =~ /^2/;
}
$self->throw("Entrez Error - check query sequences!\n") if m/^ERROR/i;
last if m/Batch Entrez results/;
}

return Bio::SeqIO->new('-fh' => $sock, '-format' => 'Fasta');

}

sub _get_stream {

my($self, $entrez) = @_;
Expand All @@ -249,12 +183,21 @@ sub _get_stream {
# Gisle Aas and Martijn Koster. They copyleft'ed it, but we should give
# them full credit for this little diddy.

my $sock = $self->_get_sock();
my $sock = IO::Socket::INET->new(PeerAddr => 'www3.ncbi.nlm.nih.gov',
PeerPort => 80,
Proto => 'tcp',
Timeout => 60
);
unless ($sock) {
$@ =~ s/^.*?: //;
$self->throw("Can't connect to GenPept ($@)\n");
}
$sock->autoflush(); # just for safety's sake if they have old IO::Socket

print $sock join("\015\012" =>
"GET /htbin-post/Entrez/query?$entrez HTTP/1.0",
"Host: www.ncbi.nlm.nih.gov",
"User-Agent: $0::Bio::DB::GenPept",
"Host: www3.ncbi.nlm.nih.gov",
"User-Agent: $0::Bio::DB::GenBank",
"", "");

while(<$sock>) {
Expand All @@ -266,41 +209,11 @@ sub _get_stream {
last if m/^------/; # Kludgy, but it's how L. Stein does Boulder too
}

return Bio::SeqIO->new('-fh' => $sock, '-format' => 'Fasta');
return Bio::SeqIO->new(-fh => $sock, -format => 'Fasta');

}

sub _get_sock {
my $self = shift;
my $sock = IO::Socket::INET->new(PeerAddr => 'www.ncbi.nlm.nih.gov',
PeerPort => 80,
Proto => 'tcp',
Timeout => 60
);
unless ($sock) {
$@ =~ s/^.*?: //;
$self->throw("Can't connect to GenBank ($@)\n");
}
$sock->autoflush(); # just for safety's sake if they have old IO::Socket

return $sock;
}

1;
__END__
Loading

0 comments on commit 7382c90

Please sign in to comment.