Skip to content

Commit

Permalink
Merge branch 'master' of github.com:bioperl/bioperl-live
Browse files Browse the repository at this point in the history
  • Loading branch information
majensen committed Mar 5, 2015
2 parents a6172de + 0c08f33 commit 557d24e
Show file tree
Hide file tree
Showing 56 changed files with 858 additions and 389 deletions.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -7,6 +7,7 @@
Build
Build.bat
_build*
_Inline
pm_to_blib*
*.tar.gz
.lwpcookies
Expand Down
6 changes: 6 additions & 0 deletions .travis.yml
Expand Up @@ -26,6 +26,8 @@ install:
- "cpanm Bio::Phylo | tail -n 1"
- "cpanm Test::Weaken | tail -n 1"
- "cpanm Test::Memory::Cycle | tail -n 1"
#Test coverage from Coveralls
#- cpanm --quiet --notest Devel::Cover::Report::Coveralls
#for some reason tests and deps aren't skipped here. Will have to look into it more...
#git repos, seems to only work for simple checkouts, so pure perl only (TODO: look into before_script for more detail)
- "git clone https://github.com/bioperl/Bio-Root.git; export PERL5LIB=$( pwd )/Bio-Root/lib:$PERL5LIB"
Expand All @@ -34,6 +36,10 @@ install:

script:
- "./Build test"
#Devel::Cover coverage options are: statement, branch, condition, path, subroutine, pod, time, all and none
#- "./Build build && cover -test -report coveralls" #complete version coverage test
#- PERL5OPT=-MDevel::Cover=+ignore,prove,-coverage,statement,subroutine prove -lr t #limited version coverage test
#- cover -report coveralls

#TODO - send emails to bioperl-guts-l
notifications:
Expand Down
2 changes: 2 additions & 0 deletions AUTHORS
Expand Up @@ -20,6 +20,8 @@

=item * Brian Osborne <bosborne at bioteam.net>

=item * Francisco J. Ossandon <fco.j.ossandon at gmail.com>

=item * Jason Stajich <jason at bioperl.org>

=item * Lincoln Stein <lstein at cshl.org>
Expand Down
6 changes: 3 additions & 3 deletions Bio/DB/BioFetch.pm
Expand Up @@ -26,14 +26,14 @@ Bio::DB::BioFetch - Database object interface to BioFetch retrieval
$bf = Bio::DB::BioFetch->new();
$seq = $bf->get_Seq_by_id('BUM'); # EMBL or SWALL ID
$seq = $bf->get_Seq_by_id('HSFOS'); # EMBL or SWALL ID
# change formats, storage procedures
$bf = Bio::DB::BioFetch->new(-format => 'fasta',
-retrievaltype => 'tempfile',
-db => 'EMBL');
$stream = $bf->get_Stream_by_id(['BUM','J00231']);
$stream = $bf->get_Stream_by_id(['HSFOS','J00231']);
while (my $s = $stream->next_seq) {
print $s->seq,"\n";
}
Expand Down Expand Up @@ -135,7 +135,7 @@ BEGIN {
fasta => 'fasta',
namespace => 'uniprot',
},
'uniprot' => {
'uniprot' => {
default => 'swiss',
swissprot => 'swiss',
fasta => 'fasta',
Expand Down
2 changes: 1 addition & 1 deletion Bio/DB/EMBL.pm
Expand Up @@ -23,7 +23,7 @@ Bio::DB::EMBL - Database object interface for EMBL entry retrieval
$embl = Bio::DB::EMBL->new();
# remember that EMBL_ID does not equal GenBank_ID!
$seq = $embl->get_Seq_by_id('BUM'); # EMBL ID
$seq = $embl->get_Seq_by_id('HSFOS'); # EMBL ID
print "cloneid is ", $seq->id, "\n";
# or changeing to accession number and Fasta format ...
Expand Down
8 changes: 3 additions & 5 deletions Bio/DB/Fasta.pm
Expand Up @@ -243,7 +243,6 @@ sub _calculate_offsets {
return \%offsets;
}


=head2 seq
Title : seq, sequence, subseq
Expand Down Expand Up @@ -289,8 +288,8 @@ sub subseq {

seek($fh, $filestart,0);
read($fh, $data, $filestop-$filestart+1);
$data =~ s/\n//g;
$data =~ s/\r//g;

$data = Bio::DB::IndexedBase::_strip_crnl($data);

if ($strand == -1) {
# Reverse-complement the sequence
Expand Down Expand Up @@ -332,8 +331,7 @@ sub header {
read($fh, $data, $headerlen);
# On Windows chomp remove '\n' but leaves '\r'
# when reading '\r\n' in binary mode
$data =~ s/\n//g;
$data =~ s/\r//g;
$data = Bio::DB::IndexedBase::_strip_crnl($data);
substr($data, 0, 1) = '';
return $data;
}
Expand Down
4 changes: 2 additions & 2 deletions Bio/DB/Flat.pm
Expand Up @@ -23,9 +23,9 @@ Bio::DB::Flat - Interface for indexed flat files
-write_flag => 1);
$db->build_index('/usr/share/embl/primate.embl',
'/usr/share/embl/protists.embl');
$seq = $db->get_Seq_by_id('BUM');
$seq = $db->get_Seq_by_id('HSFOS');
@sequences = $db->get_Seq_by_acc('DIV' => 'primate');
$raw = $db->fetch_raw('BUM');
$raw = $db->fetch_raw('HSFOS');
=head1 DESCRIPTION
Expand Down
46 changes: 44 additions & 2 deletions Bio/DB/IndexedBase.pm
Expand Up @@ -241,7 +241,7 @@ methods. Internal methods are usually preceded with a _
package Bio::DB::IndexedBase;

BEGIN {
@AnyDBM_File::ISA = qw(DB_File GDBM_File NDBM_File SDBM_File)
@AnyDBM_File::ISA = qw(DB_File GDBM_File NDBM_File SDBM_File)
if(!$INC{'AnyDBM_File.pm'});
}

Expand All @@ -268,6 +268,46 @@ use constant DIE_ON_MISSMATCHED_LINES => 1;
# you can avoid dying if you want but you may get incorrect results


# Compiling the below regular expressions speeds up the Pure Perl
# seq/subseq() from Bio::DB::Fasta by about 7% from 7.76s to 7.22s
# over 32358 calls on Variant Effect Prediction data.
my $nl = qr/\n/;
my $cr = qr/\r/;

# Remove carriage returns (\r) and newlines (\n) from a string. When
# called from subseq, this can take a signficiant portion of time, in
# Variant Effect Prediction. Therefore we compile the match portion.
sub _strip_crnl {
my $str = shift;
$str =~ s/$nl//g;
$str =~ s/$cr//g;
return $str;
}

# C can do perfrom _strip_crnl much faster. But this requires the
# Inline::C module which we don't require people to have. So we make
# this optional by wrapping the C code in an eval. If the eval works,
# the Perl strip_crnl() function is overwritten.
eval q{
use Inline C => <<'END_OF_C_CODE';
/* Strip all new line (\n) and carriage return (\r) characters
from string str
*/
char* _strip_crnl(char* str) {
char *s;
char *s2 = str;
for (s = str; *s; *s++) {
if (*s != '\n' && *s != '\r') {
*s2++ = *s;
}
}
*s2 = '\0';
return str;
}
END_OF_C_CODE
};


=head2 new
Title : new
Expand Down Expand Up @@ -682,6 +722,8 @@ sub _close_index {
return 1;
}

# Compiling the below regular expression speeds up _parse_compound_id
my $compound_id = qr/^ (.+?) (?:\:([\d_]+)(?:,|-|\.\.)([\d_]+))? (?:\/(.+))? $/x;

sub _parse_compound_id {
# Handle compound IDs:
Expand All @@ -699,7 +741,7 @@ sub _parse_compound_id {
if ( (not defined $start ) &&
(not defined $stop ) &&
(not defined $strand) &&
($id =~ /^ (.+?) (?:\:([\d_]+)(?:,|-|\.\.)([\d_]+))? (?:\/(.+))? $/x) ) {
($id =~ m{$compound_id}) ) {
# Start, stop and strand not provided and ID looks like a compound ID
($id, $start, $stop, $strand) = ($1, $2, $3, $4);
}
Expand Down
9 changes: 4 additions & 5 deletions Bio/DB/Qual.pm
Expand Up @@ -335,8 +335,7 @@ sub subqual {
read($fh, $data, $filestop-$filestart+1);

# Process quality score
$data =~ s/\n//g;
$data =~ s/\r//g;
Bio::DB::IndexedBase::_strip_crnl($data);
my $subqual = 0;
$subqual = 1 if ( $start || $stop );
my @data;
Expand Down Expand Up @@ -379,9 +378,9 @@ sub header {
seek($fh, $offset, 0);
read($fh, $data, $headerlen);
# On Windows chomp remove '\n' but leaves '\r'
# when reading '\r\n' in binary mode
$data =~ s/\n//g;
$data =~ s/\r//g;
# when reading '\r\n' in binary mode,
# _strip_crnl removes both
$data = Bio::DB::IndexedBase::_strip_crnl($data);
substr($data, 0, 1) = '';
return $data;
}
Expand Down
4 changes: 3 additions & 1 deletion Bio/DB/Registry.pm
Expand Up @@ -122,7 +122,9 @@ sub _load_registry {
my $self = shift;
eval { $HOME = (getpwuid($>))[7]; } unless $HOME;
if ($@) {
$self->warn("This Perl doesn't implement function getpwuid(), no \$HOME");
# Windows can have Win32::LoginName to get the Username, so check if it works before giving up
( defined &Win32::LoginName ) ? ( $HOME = Win32::LoginName() )
: $self->warn("This Perl doesn't implement function getpwuid(), no \$HOME");
}
my @ini_files = $self->_get_ini_files();

Expand Down
2 changes: 1 addition & 1 deletion Bio/DB/SeqFeature/Store/LoadHelper.pm
Expand Up @@ -40,7 +40,7 @@ use File::Temp 'tempdir';
use File::Spec;
use Fcntl qw(O_CREAT O_RDWR);

our $VERSION = '1.10';
our $VERSION = '1.11';

my %DBHandles;

Expand Down
3 changes: 3 additions & 0 deletions Bio/DB/Taxonomy/flatfile.pm
Expand Up @@ -99,6 +99,9 @@ $DEFAULT_PARENT_INDEX = 'parents';

$DB_BTREE->{'flags'} = R_DUP; # allow duplicate values in DB_File BTREEs

# 8192 bytes; this seems to work to keep OS X from complaining
$DB_HASH->{'bsize'} = 0x2000;

@DIVISIONS = ([qw(BCT Bacteria)],
[qw(INV Invertebrates)],
[qw(MAM Mammals)],
Expand Down
15 changes: 13 additions & 2 deletions Bio/SeqFeatureI.pm
Expand Up @@ -522,12 +522,13 @@ sub spliced_seq {
my @locset = $self->location->each_Location;
my @locs;
if ( not $nosort ) {
@locs = map { $_->[0] }
# @locs = map { $_->[0] }
# sort so that most negative is first basically to order
# the features on the opposite strand 5'->3' on their strand
# rather than they way most are input which is on the fwd strand

sort { $a->[1] <=> $b->[1] } # Yes Tim, Schwartzian transformation
# sort { $a->[1] <=> $b->[1] } # Yes Tim, Schwartzian transformation
my @proc_locs =
map {
$fstrand = $_->strand unless defined $fstrand;
$mixed = 1 if defined $_->strand && $fstrand != $_->strand;
Expand All @@ -538,6 +539,16 @@ sub spliced_seq {
[ $_, $_->start * ($_->strand || 1) ];
} @locset;

my @sort_locs;
if ( $fstrand == 1 ) {
@sort_locs = sort { $a->[1] <=> $b->[1] } @proc_locs; # Yes Tim, Schwartzian transformation
}elsif ( $fstrand == -1 ){
@sort_locs = sort { $b->[1] <=> $a->[1] } @proc_locs; # Yes Tim, Schwartzian transformation
} else {
@sort_locs = @proc_locs;
}
@locs = map { $_->[0] } @sort_locs;

if ( $mixed ) {
$self->warn( "Mixed strand locations, spliced seq using the "
. "input order rather than trying to sort");
Expand Down
2 changes: 1 addition & 1 deletion Bio/SeqIO/embl.pm
Expand Up @@ -1463,7 +1463,7 @@ sub _write_line_EMBL_regex {

CHUNK: while($line) {
foreach my $pat ($regex, '[,;\.\/-]\s|'.$regex, '[,;\.\/-]|'.$regex) {
if ($line =~ m/^(.{0,$subl})($pat)(.*)/ ) {
if ($line =~ m/^(.{1,$subl})($pat)(.*)/ ) {
my $l = $1.$2;
$l =~ s/#/ /g # remove word wrap protection char '#'
if $pre1 eq "RA ";
Expand Down
18 changes: 16 additions & 2 deletions Bio/SeqIO/genbank.pm
Expand Up @@ -499,16 +499,30 @@ sub next_seq {
}
}

# Comments
# Comments may be plain text or Structured Comments.
# Structured Comments are made up of tag/value pairs and have beginning
# and end delimiters like ##*-Data-START## and ##*-Data-END##
elsif ($line =~ /^COMMENT\s+(\S.*)/) {
if ($annotation) {
my $comment = $1;
while ( defined( $line = $self->_readline ) ) {
last if ($line =~ /^\S/);
$comment .= $line;
}
$comment =~ s/\n/ /g;
$comment =~ s/ +/ /g;
# Structured Comment, do not remove returns in the tabular section
if ( my ( $text, $table )= $comment
=~ /([^#]*)(##\S+Data-START##.+?##\S+Data-END##)/is
) {
$text =~ s/\n/ /g if $text;
$table =~ s/START##/START##\n/;
$table =~ s/^\s+//gm;
$comment = $text . "\n" . $table;
}
# Plain text, remove returns
else {
$comment =~ s/\n/ /g;
}
$annotation->add_Annotation(
'comment',
Bio::Annotation::Comment->new(
Expand Down
8 changes: 3 additions & 5 deletions Bio/Tools/Analysis/Protein/GOR4.pm
Expand Up @@ -153,12 +153,12 @@ use Bio::SeqIO;
use HTTP::Request::Common qw(POST);
use Bio::SeqFeature::Generic;
use Bio::Seq::Meta::Array;

$ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0;

use base qw(Bio::Tools::Analysis::SimpleAnalysisBase);

use constant MIN_STRUC_LEN => 3;
my $URL = 'http://npsa-pbil.ibcp.fr/cgi-bin/secpred_gor4.pl';
my $URL = 'https://npsa-prabi.ibcp.fr/cgi-bin/secpred_sopma.pl';
my $ANALYSIS_NAME = 'GOR4';
my $ANALYSIS_SPEC = {name => 'Gor4', type => 'Protein'};
my $INPUT_SPEC = [
Expand Down Expand Up @@ -366,11 +366,9 @@ sub _run {
my $out = 'http://npsa-pbil.ibcp.fr/'.$next;
my $req2 = HTTP::Request->new(GET=>$out);
my $resp2 = $self->request($req2);
$self->status('COMPLETED') if $resp2 ne '';
$self->status('COMPLETED') if $resp2 ne '';
$self->{'_result'} = $resp2->content;
}




1;
7 changes: 4 additions & 3 deletions Bio/Tools/Analysis/Protein/HNN.pm
Expand Up @@ -154,7 +154,7 @@ use Bio::SeqIO;
use HTTP::Request::Common qw (POST);
use Bio::SeqFeature::Generic;
use Bio::Seq::Meta::Array;

$ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0;

use base qw(Bio::Tools::Analysis::SimpleAnalysisBase);

Expand Down Expand Up @@ -195,9 +195,10 @@ sub _run {
# delay repeated calls by default by 3 sec, set delay() to change
$self->sleep;
$self->status('TERMINATED_BY_ERROR');
my $request = POST 'http://npsa-pbil.ibcp.fr/cgi-bin/secpred_hnn.pl',
my $request = POST 'https://npsa-prabi.ibcp.fr/cgi-bin/secpred_hnn.pl',
Content_Type => 'form-data',
Content => [title => "",
Content => [
title => "",
notice => $self->seq->seq,
ali_width => 70,
];
Expand Down
7 changes: 4 additions & 3 deletions Bio/Tools/Analysis/Protein/Sopma.pm
Expand Up @@ -154,12 +154,12 @@ use Bio::SeqIO;
use HTTP::Request::Common qw (POST);
use Bio::SeqFeature::Generic;
use Bio::Seq::Meta::Array;

$ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0;

use base qw(Bio::Tools::Analysis::SimpleAnalysisBase);

#extends array for 2struc.
my $URL = 'http://npsa-pbil.ibcp.fr/cgi-bin/secpred_sopma.pl';
my $URL = 'https://npsa-prabi.ibcp.fr/cgi-bin/secpred_sopma.pl';
my $ANALYSIS_NAME= 'Sopma';
my $ANALYSIS_SPEC= {name => 'Sopma', type => 'Protein'};
my $INPUT_SPEC = [
Expand Down Expand Up @@ -450,7 +450,7 @@ sub _run {
# delay repeated calls by default by 3 sec, set delay() to change
$self->sleep;
$self->status('TERMINATED_BY_ERROR');
my $request = POST 'http://npsa-pbil.ibcp.fr/cgi-bin/secpred_sopma.pl',
my $request = POST 'https://npsa-prabi.ibcp.fr/cgi-bin/secpred_sopma.pl',
Content_Type => 'form-data',
Content => [title => "",
notice => $self->seq->seq,
Expand All @@ -465,6 +465,7 @@ sub _run {

#### get text only version of results ##
my ($next) = $text =~ /Prediction.*?=(.*?)>/;
return $self unless $next;
my $out = "http://npsa-pbil.ibcp.fr/". "$next";
my $req2 = HTTP::Request->new(GET=>$out);
my $resp2 = $self->request ($req2);
Expand Down

0 comments on commit 557d24e

Please sign in to comment.