Skip to content

Commit

Permalink
Merge branch 'master' into topic/cjfields_psl_fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
cjfields committed Dec 18, 2013
2 parents 0cfee80 + ef72a65 commit a458d35
Show file tree
Hide file tree
Showing 46 changed files with 610 additions and 236 deletions.
9 changes: 7 additions & 2 deletions .travis.yml
Expand Up @@ -21,7 +21,7 @@ install:
- "cpanm XML::SAX XML::SAX::Writer XML::Simple XML::LibXML XML::Twig XML::Writer 2>&1 | tail -n 1"
- "cpanm PostScript::TextBlock Set::Scalar Sort::Naturally YAML | tail -n 1"
- "cpanm Math::Random SOAP::Lite Spreadsheet::ParseExcel | tail -n 1"
#- "cpanm Bio::ASN1::EntrezGene | tail -n 1"
- "cpanm Bio::ASN1::EntrezGene | tail -n 1"
- "cpanm Bio::Phylo | tail -n 1"
#for some reason tests and deps aren't skipped here. Will have to look into it more...
#git repos, seems to only work for simple checkouts, so pure perl only (TODO: look into before_script for more detail)
Expand All @@ -34,7 +34,12 @@ script:

#TODO - send emails to bioperl-guts-l
notifications:
email: false
email:
recipients:
- bioperl-guts-l@lists.open-bio.org
- cjfields1@gmail.com
on_success: change
on_failure: change

# whitelist branches
branches:
Expand Down
79 changes: 57 additions & 22 deletions Bio/Align/Utilities.pm
@@ -1,3 +1,20 @@
package Bio::Align::Utilities;
use strict;
use warnings;
use Carp;
use Bio::Root::Version;

use Exporter 'import';
our @EXPORT_OK = qw(
aa_to_dna_aln
bootstrap_replicates
cat
bootstrap_replicates_codons
dna_to_aa_aln
most_common_sequences
);
our %EXPORT_TAGS = (all => \@EXPORT_OK);

#
# BioPerl module for Bio::Align::Utilities
#
Expand Down Expand Up @@ -87,28 +104,9 @@ Internal methods are usually preceded with a _
=cut

#' keep my emacs happy
# Let the code begin...

package Bio::Align::Utilities;
use vars qw(@EXPORT @EXPORT_OK $GAP $CODONGAP %EXPORT_TAGS );
use strict;
use Carp;
use Bio::Root::Version;
require Exporter;

use base qw(Exporter);

@EXPORT = qw();
@EXPORT_OK =
qw(aa_to_dna_aln bootstrap_replicates cat bootstrap_replicates_codons dna_to_aa_aln);
%EXPORT_TAGS = ( all => [ @EXPORT, @EXPORT_OK ] );

BEGIN {
use constant CODONSIZE => 3;
$GAP = '-';
$CODONGAP = $GAP x CODONSIZE;
}
use constant CODONSIZE => 3;
our $GAP = '-';
our $CODONGAP = $GAP x CODONSIZE;

=head2 aa_to_dna_aln
Expand Down Expand Up @@ -503,4 +501,41 @@ sub cat {
return $aln;
}


=head2 most_common_sequences
Title : most_common_sequences
Usage : @common = most_common_sequences ($align, $case_sensitivity)
Function : Returns an array of the sequences that appear most often in the
alignment (although this probably makes more sense when there is
only a single most common sequence). Sequences are compared after
removing any "-" (gap characters), and ambiguous units (e.g., R
for purines) are only compared to themselves. The returned
sequence is also missing the "-" since they don't actually make
part of the sequence.
Returns : Array of text strings.
Arguments : Optional argument defining whether the comparison between sequences
to find the most common should be case sensitive. Defaults to
false, i.e, not case sensitive.
=cut

sub most_common_sequences {
my $align = shift
or croak ("Must provide Bio::AlignI object to Bio::Align::Utilities::most_common_sequences");
my $case_sensitive = shift; # defaults to false (we get undef if nothing)

## We keep track of the max on this loop. Saves us having to
## transverse the hash table later to find the maximum value.
my $max = 0;
my %counts;
foreach ($align->each_seq) {
(my $seq = $_->seq) =~ tr/-//d;
$seq = uc ($seq) unless $case_sensitive;
$max++ if (++$counts{$seq} > $max);
}
my @common = grep ($counts{$_} == $max, keys %counts);
return @common;
}

1;
4 changes: 4 additions & 0 deletions Bio/Assembly/Singlet.pm
Expand Up @@ -157,6 +157,10 @@ sub _seq_to_singlet {
-start => 1,
#-end => we let Bio::LocatableSeq calculate it (Seq and LocatableSeq)
);
# Get End from $seq if $lseq can't figure it out (e.g. phrap output)
if (not defined $lseq->end) {
$lseq->end($seq->end);
}
# Add new sequence and its coordinates to the contig
my $lcoord = Bio::SeqFeature::Generic->new( -start => $lseq->start,
-end => $lseq->end );
Expand Down
1 change: 1 addition & 0 deletions Bio/DB/Flat/BDB.pm
Expand Up @@ -241,6 +241,7 @@ sub _index_file {
my $fh = $self->_fhcache($file) or $self->throw("could not open $file for indexing: $!");
my $offset = 0;
my $count = 0;

while (!eof($fh)) {
my ($ids,$adjustment) = $self->parse_one_record($fh) or next;
$adjustment ||= 0; # prevent uninit variable warning
Expand Down
2 changes: 1 addition & 1 deletion Bio/DB/Flat/BDB/fasta.pm
Expand Up @@ -85,7 +85,7 @@ sub parse_one_record {
my $fh = shift;

# fasta parses by changing $/ to '\n>', need to adjust accordingly
my $adj = ( $^O =~ /mswin/i ) ? -2 : -1;
my $adj = -1;
my $parser =
$self->{cached_parsers}{fileno($fh)}
||= Bio::SeqIO->new(-fh=>$fh,-format=>$self->default_file_format);
Expand Down
12 changes: 11 additions & 1 deletion Bio/DB/Flat/BinarySearch.pm
Expand Up @@ -840,6 +840,16 @@ sub _index_file {
my %secondary_id;
my $last_one;

# In Windows, text files have '\r\n' as line separator, but when reading in
# text mode Perl will only show the '\n'. This means that for a line "ABC\r\n",
# "length $_" will report 4 although the line is 5 bytes in length.
# We assume that all lines have the same line separator and only read current line.
my $init_pos = tell($fh);
my $curr_line = <$fh>;
my $pos_diff = tell($fh) - $init_pos;
my $correction = $pos_diff - length $curr_line;
seek $fh, $init_pos, 0; # Rewind position to proceed to read the file

while (<$fh>) {
$last_one = $_;
$self->{alphabet} ||= $self->guess_alphabet($_);
Expand All @@ -848,7 +858,7 @@ sub _index_file {
$id = $new_primary_entry;
$self->{alphabet} ||= $self->guess_alphabet($_);

my $tmplen = ( tell $fh ) - length($_);
my $tmplen = ( tell $fh ) - length($_) - $correction;

$length = $tmplen - $pos;

Expand Down
26 changes: 22 additions & 4 deletions Bio/DB/IndexedBase.pm
Expand Up @@ -747,9 +747,18 @@ sub _calc_termination_length {
# Account for crlf-terminated Windows and Mac files
my ($self, $file) = @_;
my $fh = IO::File->new($file) or $self->throw( "Could not open $file: $!");
my $line = <$fh>;

# In Windows, text files have '\r\n' as line separator, but when reading in
# text mode Perl will only show the '\n'. This means that for a line "ABC\r\n",
# "length $_" will report 4 although the line is 5 bytes in length.
# We assume that all lines have the same line separator and only read current line.
my $init_pos = tell($fh);
my $curr_line = <$fh>;
my $pos_diff = tell($fh) - $init_pos;
my $correction = $pos_diff - length $curr_line;
close $fh;
$self->{termination_length} = ($line =~ /\r\n$/) ? 2 : 1;

$self->{termination_length} = ($curr_line =~ /\r\n$/) ? 2 : 1+$correction;
return $self->{termination_length};
}

Expand Down Expand Up @@ -1056,9 +1065,18 @@ sub NEXTKEY {

sub DESTROY {
my $self = shift;

# Close filehandles
while (my ($file, $fh) = each %{ $self->{fhcache} }) {
if (defined $fh) {
$fh->close;
}
}
$self->_close_index($self->{offsets});

if ( $self->{clean} || $self->{indexing} ) {
# Indexing aborted or cleaning requested. Delete the index file.
unlink $self->{index_name};
# Indexing aborted or cleaning requested. Delete the index file.
unlink $self->{index_name};
}
return 1;
}
Expand Down
5 changes: 4 additions & 1 deletion Bio/DB/Qual.pm
Expand Up @@ -378,7 +378,10 @@ sub header {
my $fh = $self->_fh($id) or return;
seek($fh, $offset, 0);
read($fh, $data, $headerlen);
chomp $data;
# On Windows chomp remove '\n' but leaves '\r'
# when reading '\r\n' in binary mode
$data =~ s/\n//g;
$data =~ s/\r//g;
substr($data, 0, 1) = '';
return $data;
}
Expand Down
16 changes: 15 additions & 1 deletion Bio/DB/SeqFeature/Store/DBI/SQLite.pm
Expand Up @@ -146,7 +146,7 @@ use base 'Bio::DB::SeqFeature::Store::DBI::mysql';
use Bio::DB::SeqFeature::Store::DBI::Iterator;
use DBI qw(:sql_types);
use Memoize;
use Cwd 'abs_path';
use Cwd qw(abs_path getcwd);
use Bio::DB::GFF::Util::Rearrange 'rearrange';
use Bio::SeqFeature::Lite;
use File::Spec;
Expand Down Expand Up @@ -254,6 +254,10 @@ sub init {
$dbh->do("PRAGMA synchronous = OFF;"); # makes writes much faster
$dbh->do("PRAGMA temp_store = MEMORY;"); # less disk I/O; some speedup
$dbh->do("PRAGMA cache_size = 20000;"); # less disk I/O; some speedup
# Keep track of database file location
my $cwd = getcwd;
my ($db_file) = ($dsn =~ m/(?:db(?:name)?|database)=(.+)$/);
$self->{dbh_file} = "$cwd/$db_file";
}
$self->{dbh} = $dbh;
$self->{is_temp} = $is_temporary;
Expand Down Expand Up @@ -1196,6 +1200,16 @@ sub _dump_update_location_index {
print $fh join("\t",@args),"\n";
}

sub DESTROY {
my $self = shift;
# Remove filehandles, so temporal files can be properly deleted
if (%DBI::installed_drh) {
DBI->disconnect_all;
%DBI::installed_drh = ();
}
undef $self->{dbh};
}

1;

=head1 AUTHOR
Expand Down
7 changes: 7 additions & 0 deletions Bio/DB/SeqFeature/Store/LoadHelper.pm
Expand Up @@ -59,6 +59,13 @@ sub new {

sub DESTROY {
my $self = shift;
# Destroy all filehandle references
# before trying to delete files and folder
%DBHandles = ();
undef $self->{IndexIt};
undef $self->{TopLevel};
undef $self->{Local2Global};
undef $self->{Parent2Child};
rmtree $self->{tmppath};
# File::Temp::cleanup() unless $self->{keep};
}
Expand Down
24 changes: 22 additions & 2 deletions Bio/DB/SeqFeature/Store/Loader.pm
Expand Up @@ -720,6 +720,28 @@ sub unescape {

sub DESTROY {
my $self = shift;
# Close filehandles, so temporal files can be properly deleted
my $store = $self->store;
if ( $store->isa('Bio::DB::SeqFeature::Store::memory')
or $store->isa('Bio::DB::SeqFeature::Store::berkeleydb3')
) {
$store->private_fasta_file->close;

if ($store->{fasta_db}) {
while (my ($file, $fh) = each %{ $store->{fasta_db}->{fhcache} }) {
$fh->close;
}
$store->{fasta_db}->_close_index($store->{fasta_db}->{offsets});
}
}
elsif ($store->isa('Bio::DB::SeqFeature::Store::DBI::SQLite')) {
if (%DBI::installed_drh) {
DBI->disconnect_all;
%DBI::installed_drh = ();
}
undef $store->{dbh};
}

if (my $ld = $self->{temp_load}) {
unlink $ld;
}
Expand Down Expand Up @@ -755,5 +777,3 @@ This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=cut
4 changes: 3 additions & 1 deletion Bio/DB/SeqFeature/Store/berkeleydb.pm
Expand Up @@ -621,6 +621,7 @@ sub _open_databases {
return if $ignore_errors; # autoindex set, so defer this
$self->throw("Couldn't tie: ".$self->_features_path . " $!");
}

if ($create) {
%h = ();
$h{'.next_id'} = 1;
Expand Down Expand Up @@ -701,6 +702,7 @@ sub _close_databases {
$self->db(undef);
$self->dna_db(undef);
$self->notes_db(undef);
$self->parentage_db(undef);
$self->index_db($_=>undef) foreach $self->_index_files;
}

Expand Down Expand Up @@ -1449,6 +1451,7 @@ sub db_version {
sub DESTROY {
my $self = shift;
$self->_close_databases();
$self->private_fasta_file->close;
rmtree($self->directory,0,1) if $self->temporary && -e $self->directory;
}

Expand Down Expand Up @@ -1575,4 +1578,3 @@ This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=cut
11 changes: 11 additions & 0 deletions Bio/DB/TFBS/transfac_pro.pm
Expand Up @@ -1884,4 +1884,15 @@ sub _species_to_taxid {
return $ncbi_taxid;
}

sub DESTROY {
my $self = shift;
# Destroy tied references to close filehandles
# and allow proper temporary files deletion
undef $self->{_tax_db}->{'_nodes'};
undef $self->{_tax_db}->{'_id2name'};
undef $self->{_tax_db}->{'_name2id'};
undef $self->{_tax_db}->{'_parent2children'};
undef $self->{_tax_db}->{'_parentbtree'};
}

1;
9 changes: 7 additions & 2 deletions Bio/Index/Abstract.pm
Expand Up @@ -773,8 +773,13 @@ sub add_record {
=cut

sub pack_record {
my( $self, @args ) = @_;
return join "\034", @args;
my( $self, @args ) = @_;
# Silence undefined warnings
@args = map {
$_ = (defined $_) ? $_ : '';
$_ ;
} @args;
return join "\034", @args;
}

=head2 unpack_record
Expand Down

0 comments on commit a458d35

Please sign in to comment.