Skip to content

Commit

Permalink
Merge pull request #49 from pcantalupo/topic/bug-3375
Browse files Browse the repository at this point in the history
Fixed bug 3375 and added some tests
  • Loading branch information
pcantalupo committed Nov 7, 2012
2 parents fac6f33 + 65bb304 commit 8039fe1
Show file tree
Hide file tree
Showing 3 changed files with 517 additions and 11 deletions.
30 changes: 20 additions & 10 deletions Bio/SeqIO/genbank.pm
Expand Up @@ -587,20 +587,30 @@ sub next_seq {
-version => $version,
-database => $db || 'GenBank',
-tagname => 'dblink'));
} elsif ( $dbsource =~ /(\S+)([\.:])\s*(\d+)/ ) {
my ($id, $db, $version);
} elsif ( $dbsource =~ /(\S+)([\.:])\s*(\S+)/ ) {
my ($db, $version);
my @ids = ();
if ($2 eq ':') {
($db, $id) = ($1, $3);
$db = $1;
# Genbank 192 release notes say this: "The second field can consist of
# multiple comma-separated identifiers, if a sequence record has
# multiple DBLINK cross-references of a given type."
# For example: DBLINK Project:100,200,300"
@ids = split (/,/, $3);
} else {
($db, $id, $version) = ('GenBank', $1, $3);
($db, $version) = ('GenBank', $3);
$ids[0] = $1;
}
$annotation->add_Annotation('dblink',
Bio::Annotation::DBLink->new(
-primary_id => $id,
-version => $version,
-database => $db,
-tagname => 'dblink')

foreach my $id (@ids) {
$annotation->add_Annotation('dblink',
Bio::Annotation::DBLink->new(
-primary_id => $id,
-version => $version,
-database => $db,
-tagname => 'dblink')
);
}
} else {
$self->warn("Unrecognized DBSOURCE data: $dbsource\n");
}
Expand Down
19 changes: 18 additions & 1 deletion t/SeqIO/genbank.t
Expand Up @@ -7,7 +7,7 @@ BEGIN {
use lib '.';
use Bio::Root::Test;

test_begin(-tests => 274 );
test_begin(-tests => 281 );

use_ok('Bio::SeqIO::genbank');
}
Expand Down Expand Up @@ -601,3 +601,20 @@ $as = $ast->next_seq;
($cds) = grep { $_->primary_tag eq 'CDS' } $as->get_SeqFeatures();
@notes = $cds->get_tag_values('note');
is(scalar @notes, 2);


#bug 3375
my $in = Bio::SeqIO->new(-format => 'genbank',
-file => test_input_file('NC_002058_multDBLINK_bug3375.gb'));
my $seq = $in->next_seq(); # should not throw a warning now
my @dblinks = $seq->annotation->get_Annotations('dblink'); # contains 5 dblink references
# testing DBLINK BioProject: PRJNA15288
is($dblinks[0]->database, 'BioProject', 'bug3375 database is BioProject');
is($dblinks[0]->primary_id, 'PRJNA15288', 'bug3375 primary_id is PRJNA15288');
# testing DBLINK Project:100,200,300
is($dblinks[3]->database, 'Project');
is($dblinks[3]->primary_id, '300');
# testing DBLINK NC_002058.3
is($dblinks[4]->database, 'GenBank');
is($dblinks[4]->primary_id, 'NC_002058');
is($dblinks[4]->version, '3');

0 comments on commit 8039fe1

Please sign in to comment.