Permalink
Browse files

Fixed issue where ambiguous taxa names got the same internal ID

Change Bio::DB::Taxonomy::_handle_internal_id() so that it first looks up taxa
by taxon ID, and then, if unsuccessful and required by the user, by name and rank.
This allows to disambiguate taxa that have the same scientific name, but are
really different taxa (different taxon ID and internal ID).
  • Loading branch information...
1 parent 168c098 commit a9a020a58c8abec6ea5f3a2f372091ed99f3216c @fangly fangly committed Jun 27, 2013
Showing with 32 additions and 20 deletions.
  1. +29 −19 Bio/DB/Taxonomy.pm
  2. +3 −1 t/RemoteDB/Taxonomy.t
View
@@ -311,41 +311,51 @@ END
Title : _handle_internal_id
Usage : *INTERNAL Bio::DB::Taxonomy stuff*
- Function: Tries to ensure that when a taxon is requested from any database,
- the Taxon object returned will have the same internal id regardless
- of database.
- Args : Bio::Taxon, and optionally true value to try and do the job using
- scientific name & rank if your ids aren't comparable to other dbs.
+ Function: Add an internal ID to a taxon object, ensuring that the taxon gets
+ the same internal ID, regardless of which database it is retrieved
+ from.
+ Args : * A Bio::Taxon
+ * An optional boolean to decide whether or not to try and do the job
+ using scientific name & rank in addition to taxon ID. This is
+ useful if your IDs are not comparable to that of other databases,
+ e.g. if they are arbitrary, as in the case of Bio::DB::Taxonomy::list
=cut
sub _handle_internal_id {
my ($self, $taxon, $try_name) = @_;
$self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa('Bio::Taxon');
+
my $taxid = $taxon->id || return;
my $sci_name = $taxon->scientific_name || '';
my $rank = $taxon->rank || 'no rank';
-
- if ($try_name && $sci_name && defined $TAXON_IIDS->{names}->{$sci_name}) {
- if (defined $TAXON_IIDS->{names}->{$sci_name}->{$rank}) {
- $TAXON_IIDS->{taxids}->{$taxid} = $TAXON_IIDS->{names}->{$sci_name}->{$rank};
+
+ my $iid = $TAXON_IIDS->{taxids}->{$taxid};
+ if ( (not defined $iid) && $try_name && $sci_name && exists $TAXON_IIDS->{names}->{$sci_name}) {
+ # Try to look up IID based on species name and rank
+ $iid = $TAXON_IIDS->{names}->{$sci_name}->{$rank};
+ if (defined $iid) {
+ $TAXON_IIDS->{taxids}->{$taxid} = $iid;
}
elsif ($rank eq 'no rank') {
# pick the internal id of one named rank taxa at random
- my ($iid) = values %{$TAXON_IIDS->{names}->{$sci_name}};
+ ($iid) = values %{$TAXON_IIDS->{names}->{$sci_name}};
$TAXON_IIDS->{taxids}->{$taxid} = $iid;
}
}
-
- if (defined $TAXON_IIDS->{taxids}->{$taxid}) {
- # a little dangerous to use this internal method of Bio::Tree::Node;
- # but it is how internal_id() is set
- $taxon->_creation_id($TAXON_IIDS->{taxids}->{$taxid});
- }
- else {
- $TAXON_IIDS->{taxids}->{$taxid} = $taxon->internal_id;
- $TAXON_IIDS->{names}->{$sci_name}->{$rank} = $taxon->internal_id if $sci_name;
+
+ if (defined $iid) {
+ # Save existing IID the Bio::Tree::Node way, despite internal method
+ $taxon->_creation_id($iid);
+ } else {
+ # Create a new IID for this taxon and register it
+ $iid = $taxon->internal_id;
+ $TAXON_IIDS->{taxids}->{$taxid} = $iid;
+ if ($sci_name) {
+ $TAXON_IIDS->{names}->{$sci_name}->{$rank} = $iid;
+ }
}
+
}
View
@@ -7,7 +7,7 @@ BEGIN {
use lib '.';
use Bio::Root::Test;
- test_begin(-tests => 159,
+ test_begin(-tests => 160,
-requires_module => 'XML::Twig');
use_ok('Bio::DB::Taxonomy');
@@ -357,6 +357,8 @@ is scalar @taxonids, 2; # multiple taxa would match using $db_list->get_taxon(-n
ok $node = $db_list->get_taxon( -names => ['c__Gammaproteobacteria', 'o__Alteromonadales' , 'f__Alteromonadaceae'] );
is $node->ancestor->node_name, 'o__Alteromonadales';
+my $iid = $node->internal_id;
ok $node = $db_list->get_taxon( -names => ['c__Gammaproteobacteria', 'o__Oceanospirillales', 'f__Alteromonadaceae'] );
is $node->ancestor->node_name, 'o__Oceanospirillales';
+isnt $node->internal_id, $iid;

0 comments on commit a9a020a

Please sign in to comment.