Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

maint: remove files that have been moved to a separate Bio-Coordinate…

… distribution
  • Loading branch information...
commit ee8b48d583f07b575a0c77e7c458c687cbe97b11 1 parent 9b85911
Carnë Draug carandraug authored
217 Bio/Coordinate/Chain.pm
View
@@ -1,217 +0,0 @@
-#
-# bioperl module for Bio::Coordinate::Chain
-#
-# Please direct questions and support issues to <bioperl-l@bioperl.org>
-#
-# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
-#
-# Copyright Heikki Lehvaslaiho
-#
-# You may distribute this module under the same terms as perl itself
-
-# POD documentation - main docs before the code
-
-=head1 NAME
-
-Bio::Coordinate::Chain - Mapping locations through a chain of coordinate mappers
-
-=head1 SYNOPSIS
-
- # create Bio::Coordinate::Pairs, or any MapperIs, somehow
- $pair1; $pair2;
-
- # add them into a Chain
- $collection = Bio::Coordinate::Chain->new;
- $collection->add_mapper($pair1);
- $collection->add_mapper($pair2);
-
- # create a position and map it
- $pos = Bio::Location::Simple->new (-start => 5, -end => 9 );
- $match = $collection->map($pos);
- if ($match) {
- sprintf "Matches at %d-%d\n", $match->start, $match->end,
- } else {
- print "No match\n";
- }
-
-=head1 DESCRIPTION
-
-This class assumes that you have built several mappers and want to
-link them together so that output from the previous mapper is the next
-mappers input. This way you can build arbitrarily complex mappers from
-simpler components.
-
-Note that Chain does not do any sanity checking on its mappers. You
-are solely responsible that input and output coordinate systems,
-direction of mapping and parameters internal to mappers make sense
-when chained together.
-
-To put it bluntly, the present class is just a glorified foreach loop
-over an array of mappers calling the map method.
-
-It would be neat to an internal function that would generate a new
-single step mapper from those included in the chain. It should speed
-things up considerably. Any volunteers?
-
-=head1 FEEDBACK
-
-=head2 Mailing Lists
-
-User feedback is an integral part of the evolution of this and other
-Bioperl modules. Send your comments and suggestions preferably to the
-Bioperl mailing lists Your participation is much appreciated.
-
- bioperl-l@bioperl.org - General discussion
- http://bioperl.org/wiki/Mailing_lists - About the mailing lists
-
-=head2 Support
-
-Please direct usage questions or support issues to the mailing list:
-
-I<bioperl-l@bioperl.org>
-
-rather than to the module maintainer directly. Many experienced and
-reponsive experts will be able look at the problem and quickly
-address it. Please include a thorough description of the problem
-with code and data examples if at all possible.
-
-=head2 Reporting Bugs
-
-Report bugs to the Bioperl bug tracking system to help us keep track
-the bugs and their resolution. Bug reports can be submitted via the
-web:
-
- https://redmine.open-bio.org/projects/bioperl/
-
-=head1 AUTHOR - Heikki Lehvaslaiho
-
-Email: heikki-at-bioperl-dot-org
-
-=head1 CONTRIBUTORS
-
-Ewan Birney, birney@ebi.ac.uk
-
-=head1 APPENDIX
-
-The rest of the documentation details each of the object
-methods. Internal methods are usually preceded with a _
-
-=cut
-
-
-# Let the code begin...
-
-package Bio::Coordinate::Chain;
-use strict;
-
-# Object preamble - inherits from Bio::Root::Root
-use Bio::Root::Root;
-use Bio::Coordinate::Result;
-
-use base qw(Bio::Coordinate::Collection Bio::Coordinate::MapperI);
-
-
-=head2 map
-
- Title : map
- Usage : $newpos = $obj->map($pos);
- Function: Map the location through all the mappers in the chain.
- Example :
- Returns : new Location in the output coordiante system
- Args : a Bio::Location::Simple object
-
-=cut
-
-sub map {
- my ($self,$value) = @_;
-
- $self->throw("Need to pass me a value.")
- unless defined $value;
- $self->throw("I need a Bio::Location, not [$value]")
- unless $value->isa('Bio::LocationI');
- $self->throw("No coordinate mappers!")
- unless $self->each_mapper;
-
- my $res = Bio::Coordinate::Result->new();
-
- foreach my $mapper ($self->each_mapper) {
-
- my $res = $mapper->map($value);
- return unless $res->each_match;
- $value = $res->match;
- }
-
- return $value;
-}
-
-
-=head2 Inherited methods
-
-=cut
-
-=head2 add_mapper
-
- Title : add_mapper
- Usage : $obj->add_mapper($mapper)
- Function: Pushes one Bio::Coodinate::MapperI into the list of mappers.
- Sets _is_sorted() to false.
- Example :
- Returns : 1 when succeeds, 0 for failure.
- Args : mapper object
-
-=cut
-
-=head2 mappers
-
- Title : mappers
- Usage : $obj->mappers();
- Function: Returns or sets a list of mappers.
- Example :
- Returns : array of mappers
- Args : array of mappers
-
-=cut
-
-=head2 each_mapper
-
- Title : each_mapper
- Usage : $obj->each_mapper();
- Function: Returns a list of mappers.
- Example :
- Returns : array of mappers
- Args : none
-
-=cut
-
-=head2 swap
-
- Title : swap
- Usage : $obj->swap;
- Function: Swap the direction of mapping;input <-> output
- Example :
- Returns : 1
- Args :
-
-=cut
-
-=head2 test
-
- Title : test
- Usage : $obj->test;
- Function: test that both components of all pairs are of the same length.
- Ran automatically.
- Example :
- Returns : boolean
- Args :
-
-=cut
-
-
-
-sub sort{
- my ($self) = @_;
- $self->warn("You do not really want to sort your chain, do you!\nDoing nothing.");
-}
-
-1;
-
428 Bio/Coordinate/Collection.pm
View
@@ -1,428 +0,0 @@
-#
-# bioperl module for Bio::Coordinate::Collection
-#
-# Please direct questions and support issues to <bioperl-l@bioperl.org>
-#
-# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
-#
-# Copyright Heikki Lehvaslaiho
-#
-# You may distribute this module under the same terms as perl itself
-
-# POD documentation - main docs before the code
-
-=head1 NAME
-
-Bio::Coordinate::Collection - Noncontinuous match between two coordinate sets
-
-=head1 SYNOPSIS
-
- # create Bio::Coordinate::Pairs or other Bio::Coordinate::MapperIs somehow
- $pair1; $pair2;
-
- # add them into a Collection
- $collection = Bio::Coordinate::Collection->new;
- $collection->add_mapper($pair1);
- $collection->add_mapper($pair2);
-
- # create a position and map it
- $pos = Bio::Location::Simple->new (-start => 5, -end => 9 );
- $res = $collection->map($pos);
- $res->match->start == 1;
- $res->match->end == 5;
-
- # if mapping is many to one (*>1) or many-to-many (*>*)
- # you have to give seq_id not get unrelevant entries
- $pos = Bio::Location::Simple->new
- (-start => 5, -end => 9 -seq_id=>'clone1');
-
-=head1 DESCRIPTION
-
-Generic, context neutral mapper to provide coordinate transforms
-between two B<disjoint> coordinate systems. It brings into Bioperl the
-functionality from Ewan Birney's Bio::EnsEMBL::Mapper ported into
-current bioperl.
-
-This class is aimed for representing mapping between whole chromosomes
-and contigs, or between contigs and clones, or between sequencing
-reads and assembly. The submaps are automatically sorted, so they can
-be added in any order.
-
-To map coordinates to the other direction, you have to swap() the
-collection. Keeping track of the direction and ID restrictions
-are left to the calling code.
-
-
-
-=head1 FEEDBACK
-
-=head2 Mailing Lists
-
-User feedback is an integral part of the evolution of this and other
-Bioperl modules. Send your comments and suggestions preferably to the
-Bioperl mailing lists Your participation is much appreciated.
-
- bioperl-l@bioperl.org - General discussion
- http://bioperl.org/wiki/Mailing_lists - About the mailing lists
-
-=head2 Support
-
-Please direct usage questions or support issues to the mailing list:
-
-I<bioperl-l@bioperl.org>
-
-rather than to the module maintainer directly. Many experienced and
-reponsive experts will be able look at the problem and quickly
-address it. Please include a thorough description of the problem
-with code and data examples if at all possible.
-
-=head2 Reporting Bugs
-
-report bugs to the Bioperl bug tracking system to help us keep track
-the bugs and their resolution. Bug reports can be submitted via the
-web:
-
- https://redmine.open-bio.org/projects/bioperl/
-
-=head1 AUTHOR - Heikki Lehvaslaiho
-
-Email: heikki-at-bioperl-dot-org
-
-=head1 CONTRIBUTORS
-
-Ewan Birney, birney@ebi.ac.uk
-
-=head1 APPENDIX
-
-The rest of the documentation details each of the object
-methods. Internal methods are usually preceded with a _
-
-=cut
-
-
-# Let the code begin...
-
-package Bio::Coordinate::Collection;
-use strict;
-
-# Object preamble - inherits from Bio::Root::Root
-use Bio::Coordinate::Result;
-use Bio::Coordinate::Result::Gap;
-
-use base qw(Bio::Root::Root Bio::Coordinate::MapperI);
-
-
-sub new {
- my($class,@args) = @_;
- my $self = $class->SUPER::new(@args);
-
- $self->{'_mappers'} = [];
-
- my($in, $out, $strict, $mappers, $return_match) =
- $self->_rearrange([qw(IN
- OUT
- STRICT
- MAPPERS
- RETURN_MATCH
- )],
- @args);
-
- $in && $self->in($in);
- $out && $self->out($out);
- $mappers && $self->mappers($mappers);
- $return_match && $self->return_match('return_match');
- return $self; # success - we hope!
-}
-
-
-=head2 add_mapper
-
- Title : add_mapper
- Usage : $obj->add_mapper($mapper)
- Function: Pushes one Bio::Coordinate::MapperI into the list of mappers.
- Sets _is_sorted() to false.
- Example :
- Returns : 1 when succeeds, 0 for failure.
- Args : mapper object
-
-=cut
-
-sub add_mapper {
- my ($self,$value) = @_;
-
- $self->throw("Is not a Bio::Coordinate::MapperI but a [$self]")
- unless defined $value && $value->isa('Bio::Coordinate::MapperI');
-
- # test pair range lengths
- $self->warn("Coordinates in pair [". $value . ":" .
- $value->in->seq_id . "/". $value->out->seq_id .
- "] are not right.")
- unless $value->test;
-
- $self->_is_sorted(0);
- push(@{$self->{'_mappers'}},$value);
-}
-
-=head2 mappers
-
- Title : mappers
- Usage : $obj->mappers();
- Function: Returns or sets a list of mappers.
- Example :
- Returns : array of mappers
- Args : array of mappers
-
-=cut
-
-sub mappers{
- my ($self,@args) = @_;
-
- if (@args) {
-
- $self->throw("Is not a Bio::Coordinate::MapperI but a [$self]")
- unless defined $args[0] && $args[0]->isa('Bio::Coordinate::MapperI');
- push(@{$self->{'_mappers'}}, @args);
- }
-
- return @{$self->{'_mappers'}};
-}
-
-
-=head2 each_mapper
-
- Title : each_mapper
- Usage : $obj->each_mapper();
- Function: Returns a list of mappers.
- Example :
- Returns : list of mappers
- Args : none
-
-=cut
-
-sub each_mapper{
- my ($self) = @_;
- return @{$self->{'_mappers'}};
-}
-
-=head2 mapper_count
-
- Title : mapper_count
- Usage : my $count = $collection->mapper_count;
- Function: Get the count of the number of mappers stored
- in this collection
- Example :
- Returns : integer
- Args : none
-
-
-=cut
-
-sub mapper_count{
- my $self = shift;
- return scalar @{$self->{'_mappers'} || []};
-}
-
-
-=head2 swap
-
- Title : swap
- Usage : $obj->swap;
- Function: Swap the direction of mapping;input <-> output
- Example :
- Returns : 1
- Args :
-
-=cut
-
-sub swap {
- my ($self) = @_;
- use Data::Dumper;
-
- $self->sort unless $self->_is_sorted;
- map {$_->swap;} @{$self->{'_mappers'}};
- ($self->{'_in_ids'}, $self->{'_out_ids'}) =
- ($self->{'_out_ids'}, $self->{'_in_ids'});
- 1;
-}
-
-=head2 test
-
- Title : test
- Usage : $obj->test;
- Function: test that both components of all pairs are of the same length.
- Ran automatically.
- Example :
- Returns : boolean
- Args :
-
-=cut
-
-sub test {
- my ($self) = @_;
-
- my $res = 1;
-
- foreach my $mapper ($self->each_mapper) {
- unless( $mapper->test ) {
- $self->warn("Coordinates in pair [". $mapper . ":" .
- $mapper->in->seq_id . "/". $mapper->out->seq_id .
- "] are not right.");
- $res = 0;
- }
- }
- $res;
-}
-
-
-=head2 map
-
- Title : map
- Usage : $newpos = $obj->map($pos);
- Function: Map the location from the input coordinate system
- to a new value in the output coordinate system.
- Example :
- Returns : new value in the output coordinate system
- Args : integer
-
-=cut
-
-sub map {
- my ($self,$value) = @_;
-
- $self->throw("Need to pass me a value.")
- unless defined $value;
- $self->throw("I need a Bio::Location, not [$value]")
- unless $value->isa('Bio::LocationI');
- $self->throw("No coordinate mappers!")
- unless $self->each_mapper;
-
- $self->sort unless $self->_is_sorted;
-
-
- if ($value->isa("Bio::Location::SplitLocationI")) {
-
- my $result = Bio::Coordinate::Result->new();
- foreach my $loc ( $value->sub_Location(1) ) {
-
- my $res = $self->_map($loc);
- map { $result->add_sub_Location($_) } $res->each_Location;
-
- }
- return $result;
-
- } else {
- return $self->_map($value);
- }
-
-
-}
-
-
-=head2 _map
-
- Title : _map
- Usage : $newpos = $obj->_map($simpleloc);
- Function: Internal method that does the actual mapping. Called multiple times
- by map() if the location to be mapped is a split location
-
- Example :
- Returns : new location in the output coordinate system or undef
- Args : Bio::Location::Simple
-
-=cut
-
-sub _map {
- my ($self,$value) = @_;
-
- my $result = Bio::Coordinate::Result->new(-is_remote=>1);
-
-IDMATCH: {
-
- # bail out now we if are forcing the use of an ID
- # and it is not in this collection
- last IDMATCH if defined $value->seq_id &&
- ! $self->{'_in_ids'}->{$value->seq_id};
-
- foreach my $pair ($self->each_mapper) {
-
- # if we are limiting input to a certain ID
- next if defined $value->seq_id && $value->seq_id ne $pair->in->seq_id;
-
- # if we haven't even reached the start, move on
- next if $pair->in->end < $value->start;
- # if we have over run, break
- last if $pair->in->start > $value->end;
-
- my $subres = $pair->map($value);
- $result->add_result($subres);
- }
- }
-
- $result->seq_id($result->match->seq_id) if $result->match;
- unless ($result->each_Location) {
- #build one gap;
- my $gap = Bio::Location::Simple->new(-start => $value->start,
- -end => $value->end,
- -strand => $value->strand,
- -location_type => $value->location_type
- );
- $gap->seq_id($value->seq_id) if defined $value->seq_id;
- bless $gap, 'Bio::Coordinate::Result::Gap';
- $result->seq_id($value->seq_id) if defined $value->seq_id;
- $result->add_sub_Location($gap);
- }
- return $result;
-}
-
-
-=head2 sort
-
- Title : sort
- Usage : $obj->sort;
- Function: Sort function so that all mappings are sorted by
- input coordinate start
- Example :
- Returns : 1
- Args :
-
-=cut
-
-sub sort{
- my ($self) = @_;
-
- @{$self->{'_mappers'}} = map { $_->[0] }
- sort { $a->[1] <=> $b->[1] }
- map { [ $_, $_->in->start] }
- @{$self->{'_mappers'}};
-
- #create hashes for sequence ids
- $self->{'_in_ids'} = ();
- $self->{'_out_ids'} = ();
- foreach ($self->each_mapper) {
- $self->{'_in_ids'}->{$_->in->seq_id} = 1;
- $self->{'_out_ids'}->{$_->out->seq_id} = 1;
- }
-
- $self->_is_sorted(1);
-}
-
-=head2 _is_sorted
-
- Title : _is_sorted
- Usage : $newpos = $obj->_is_sorted;
- Function: toggle for whether the (internal) coodinate mapper data are sorted
- Example :
- Returns : boolean
- Args : boolean
-
-=cut
-
-sub _is_sorted{
- my ($self,$value) = @_;
-
- $self->{'_is_sorted'} = 1 if defined $value && $value;
- return $self->{'_is_sorted'};
-}
-
-1;
-
244 Bio/Coordinate/ExtrapolatingPair.pm
View
@@ -1,244 +0,0 @@
-#
-# bioperl module for Bio::Coordinate::ExtrapolatingPair
-#
-# Please direct questions and support issues to <bioperl-l@bioperl.org>
-#
-# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
-#
-# Copyright Heikki Lehvaslaiho
-#
-# You may distribute this module under the same terms as perl itself
-
-# POD documentation - main docs before the code
-
-=head1 NAME
-
-Bio::Coordinate::ExtrapolatingPair - Continuous match between two coordinate sets
-
-=head1 SYNOPSIS
-
-
- use Bio::Location::Simple;
- use Bio::Coordinate::ExtrapolatingPair;
-
-
- $match1 = Bio::Location::Simple->new
- (-seq_id => 'propeptide', -start => 21, -end => 40, -strand=>1 );
- $match2 = Bio::Location::Simple->new
- (-seq_id => 'peptide', -start => 1, -end => 20, -strand=>1 );
-
- $pair = Bio::Coordinate::ExtrapolatingPair->
- new(-in => $match1,
- -out => $match2,
- -strict => 1
- );
-
- $pos = Bio::Location::Simple->new
- (-start => 40, -end => 60, -strand=> 1 );
- $res = $pair->map($pos);
- $res->start eq 20;
- $res->end eq 20;
-
-=head1 DESCRIPTION
-
-This class represents a one continuous match between two coordinate
-systems represented by Bio::Location::Simple objects. The relationship
-is directed and reversible. It implements methods to ensure internal
-consistency, and map continuous and split locations from one
-coordinate system to another.
-
-This class is an elaboration of Bio::Coordinate::Pair. The map
-function returns only matches which is the mode needed most of
-tehtime. By default the matching regions between coordinate systems
-are boundless, so that you can say e.g. that gene starts from here in
-the chromosomal coordinate system and extends indefinetely in both
-directions. If you want to define the matching regions exactly, you
-can do that and set strict() to true.
-
-
-=head1 FEEDBACK
-
-=head2 Mailing Lists
-
-User feedback is an integral part of the evolution of this and other
-Bioperl modules. Send your comments and suggestions preferably to the
-Bioperl mailing lists Your participation is much appreciated.
-
- bioperl-l@bioperl.org - General discussion
- http://bioperl.org/wiki/Mailing_lists - About the mailing lists
-
-=head2 Support
-
-Please direct usage questions or support issues to the mailing list:
-
-I<bioperl-l@bioperl.org>
-
-rather than to the module maintainer directly. Many experienced and
-reponsive experts will be able look at the problem and quickly
-address it. Please include a thorough description of the problem
-with code and data examples if at all possible.
-
-=head2 Reporting Bugs
-
-report bugs to the Bioperl bug tracking system to help us keep track
-the bugs and their resolution. Bug reports can be submitted via the
-web:
-
- https://redmine.open-bio.org/projects/bioperl/
-
-=head1 AUTHOR - Heikki Lehvaslaiho
-
-Email: heikki-at-bioperl-dot-org
-
-=head1 APPENDIX
-
-The rest of the documentation details each of the object
-methods. Internal methods are usually preceded with a _
-
-=cut
-
-
-# Let the code begin...
-
-package Bio::Coordinate::ExtrapolatingPair;
-use strict;
-
-# Object preamble - inherits from Bio::Root::Root
-use Bio::Root::Root;
-use Bio::LocationI;
-
-use base qw(Bio::Coordinate::Pair);
-
-
-sub new {
- my($class,@args) = @_;
- my $self = $class->SUPER::new(@args);
-
- my($strict) =
- $self->_rearrange([qw(STRICT
- )],
- @args);
-
- $strict && $self->strict($strict);
- return $self;
-}
-
-
-=head2 strict
-
- Title : strict
- Usage : $obj->strict(1);
- Function: Set and read the strictness of the coordinate system.
- Example :
- Returns : value of input system
- Args : boolean
-
-=cut
-
-sub strict {
- my ($self,$value) = @_;
- if( defined $value) {
- $self->{'_strict'} = 1 if $value;
- }
- return $self->{'_strict'};
-}
-
-
-=head2 map
-
- Title : map
- Usage : $newpos = $obj->map($loc);
- Function: Map the location from the input coordinate system
- to a new value in the output coordinate system.
-
- In extrapolating coodinate system there is no location zero.
- Locations are...
- Example :
- Returns : new location in the output coordinate system or undef
- Args : Bio::Location::Simple
-
-=cut
-
-sub map {
- my ($self,$value) = @_;
-
- $self->throw("Need to pass me a value.")
- unless defined $value;
- $self->throw("I need a Bio::Location, not [$value]")
- unless $value->isa('Bio::LocationI');
- $self->throw("Input coordinate system not set")
- unless $self->in;
- $self->throw("Output coordinate system not set")
- unless $self->out;
-
- my $match;
-
- if ($value->isa("Bio::Location::SplitLocationI")) {
-
- my $split = Bio::Coordinate::Result->new(-seq_id=>$self->out->seq_id);
- foreach my $loc ( sort { $a->start <=> $b->start }
- $value->sub_Location ) {
-
- $match = $self->_map($loc);
- $split->add_sub_Location($match) if $match;
-
- }
- $split->each_Location ? (return $split) : return ;
-
- } else {
- return $self->_map($value);
- }
-}
-
-
-=head2 _map
-
- Title : _map
- Usage : $newpos = $obj->_map($simpleloc);
- Function: Internal method that does the actual mapping. Called
- multiple times by map() if the location to be mapped is a
- split location
-
- Example :
- Returns : new location in the output coordinate system or undef
- Args : Bio::Location::Simple
-
-=cut
-
-sub _map {
- my ($self,$value) = @_;
-
- my ($offset, $start, $end);
-
- if ($self->strand == -1) {
- $offset = $self->in->end + $self->out->start;
- $start = $offset - $value->end;
- $end = $offset - $value->start ;
- } else { # undef, 0 or 1
- $offset = $self->in->start - $self->out->start;
- $start = $value->start - $offset;
- $end = $value->end - $offset;
- }
-
- # strict prevents matches outside stated range
- if ($self->strict) {
- return if $start < 0 and $end < 0;
- return if $start > $self->out->end;
- $start = 1 if $start < 0;
- $end = $self->out->end if $end > $self->out->end;
- }
-
- my $match = Bio::Location::Simple->
- new(-start => $start,
- -end => $end,
- -strand => $self->strand,
- -seq_id => $self->out->seq_id,
- -location_type => $value->location_type
- );
- $match->strand($match->strand * $value->strand) if $value->strand;
- bless $match, 'Bio::Coordinate::Result::Match';
-
- return $match;
-}
-
-1;
1,351 Bio/Coordinate/GeneMapper.pm
View
@@ -1,1351 +0,0 @@
-#
-# bioperl module for Bio::Coordinate::GeneMapper
-#
-# Please direct questions and support issues to <bioperl-l@bioperl.org>
-#
-# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
-#
-# Copyright Heikki Lehvaslaiho
-#
-# You may distribute this module under the same terms as perl itself
-
-# POD documentation - main docs before the code
-
-=head1 NAME
-
-Bio::Coordinate::GeneMapper - transformations between gene related coordinate systems
-
-=head1 SYNOPSIS
-
- use Bio::Coordinate::GeneMapper;
-
- # get a Bio::RangeI representing the start, end and strand of the CDS
- # in chromosomal (or entry) coordinates
- my $cds;
-
- # get a Bio::Location::Split or an array of Bio::LocationI objects
- # holding the start, end and strand of all the exons in chromosomal
- # (or entry) coordinates
- my $exons;
-
- # create a gene mapper and set it to map from chromosomal to cds coordinates
- my $gene = Bio::Coordinate::GeneMapper->new(-in =>'chr',
- -out =>'cds',
- -cds =>$cds,
- -exons=>$exons
- );
-
- # get a a Bio::Location or sequence feature in input (chr) coordinates
- my $loc;
-
- # map the location into output coordinates and get a new location object
- $newloc = $gene->map($loc);
-
-
-=head1 DESCRIPTION
-
-Bio::Coordinate::GeneMapper is a module for simplifying the mappings
-of coodinate locations between various gene related locations in human
-genetics. It also adds a special human genetics twist to coordinate
-systems by making it possible to disable the use of zero
-(0). Locations before position one start from -1. See method
-L<nozero>.
-
-It understands by name the following coordinate systems and mapping
-between them:
-
- peptide (peptide length)
- ^
- | -peptide_offset
- |
- frame propeptide (propeptide length)
- ^ ^
- \ |
- translate \ |
- \ |
- cds (transcript start and end)
- ^
- negative_intron | \
- ^ | \ transcribe
- \ | \
- intron exon \
- ^ ^ ^ /
- splice \ \ / | /
- \ \ / | /
- \ inex | /
- \ ^ | /
- \ \ |/
- ----- gene (gene_length)
- ^
- | - gene_offset
- |
- chr (or entry)
-
-
-This structure is kept in the global variable $DAG which is a
-representation of a Directed Acyclic Graph. The path calculations
-traversing this graph are done in a helper class. See
-L<Bio::Coordinate::Graph>.
-
-Of these, two operations are special cases, translate and splice.
-Translating and reverse translating are implemented as internal
-methods that do the simple 1E<lt>-E<gt>3 conversion. Splicing needs
-additional information that is provided by method L<exons> which takes
-in an array of Bio::LocationI objects.
-
-Most of the coordinate system names should be selfexplanatory to
-anyone familiar with genes. Negative intron coordinate system is
-starts counting backwards from -1 as the last nucleotide in the
-intron. This used when only exon and a few flanking intron nucleotides
-are known.
-
-
-This class models coordinates within one transcript of a gene, so to
-tackle multiple transcripts you need several instances of the
-class. It is therefore valid to argue that the name of the class
-should be TranscriptMapper. GeneMapper is a catchier name, so it
-stuck.
-
-
-=head1 FEEDBACK
-
-=head2 Mailing Lists
-
-User feedback is an integral part of the evolution of this and other
-Bioperl modules. Send your comments and suggestions preferably to the
-Bioperl mailing lists Your participation is much appreciated.
-
- bioperl-l@bioperl.org - General discussion
- http://bioperl.org/wiki/Mailing_lists - About the mailing lists
-
-=head2 Support
-
-Please direct usage questions or support issues to the mailing list:
-
-I<bioperl-l@bioperl.org>
-
-rather than to the module maintainer directly. Many experienced and
-reponsive experts will be able look at the problem and quickly
-address it. Please include a thorough description of the problem
-with code and data examples if at all possible.
-
-=head2 Reporting Bugs
-
-Report bugs to the Bioperl bug tracking system to help us keep track
-the bugs and their resolution. Bug reports can be submitted via the
-web:
-
- https://redmine.open-bio.org/projects/bioperl/
-
-=head1 AUTHOR - Heikki Lehvaslaiho
-
-Email: heikki-at-bioperl-dot-org
-
-=head1 APPENDIX
-
-The rest of the documentation details each of the object
-methods. Internal methods are usually preceded with a _
-
-=cut
-
-
-# Let the code begin...
-
-package Bio::Coordinate::GeneMapper;
-use vars qw(%COORDINATE_SYSTEMS %COORDINATE_INTS $TRANSLATION $DAG
- $NOZERO_VALUES $NOZERO_KEYS);
-use strict;
-
-# Object preamble - inherits from Bio::Root::Root
-
-use Bio::Coordinate::Result;
-use Bio::Location::Simple;
-use Bio::Coordinate::Graph;
-use Bio::Coordinate::Collection;
-use Bio::Coordinate::Pair;
-use Bio::Coordinate::ExtrapolatingPair;
-
-use base qw(Bio::Root::Root Bio::Coordinate::MapperI);
-
-# first set internal values for all translation tables
-
-%COORDINATE_SYSTEMS = (
- peptide => 10,
- propeptide => 9,
- frame => 8,
- cds => 7,
- negative_intron => 6,
- intron => 5,
- exon => 4,
- inex => 3,
- gene => 2,
- chr => 1
- );
-
-%COORDINATE_INTS = (
- 10 => 'peptide',
- 9 => 'propeptide',
- 8 => 'frame',
- 7 => 'cds',
- 6 => 'negative_intron',
- 5 => 'intron',
- 4 => 'exon',
- 3 => 'inex',
- 2 => 'gene',
- 1 => 'chr'
- );
-
-$TRANSLATION = $COORDINATE_SYSTEMS{'cds'}. "-".
- $COORDINATE_SYSTEMS{'propeptide'};
-
-$DAG = {
- 10 => [],
- 9 => [10],
- 8 => [],
- 7 => [8, 9],
- 6 => [],
- 5 => [6],
- 4 => [7],
- 3 => [4, 5],
- 2 => [3, 4, 5, 7],
- 1 => [2]
- };
-
-$NOZERO_VALUES = {0 => 0, 'in' => 1, 'out' => 2, 'in&out' => 3 };
-$NOZERO_KEYS = { 0 => 0, 1 => 'in', 2 => 'out', 3 => 'in&out' };
-
-
-sub new {
- my($class,@args) = @_;
- my $self = $class->SUPER::new(@args);
-
- # prime the graph
- my $graph = Bio::Coordinate::Graph->new();
- $graph->hash_of_arrays($DAG);
- $self->graph($graph);
-
- my($in, $out, $peptide_offset, $exons,
- $cds, $nozero, $strict) =
- $self->_rearrange([qw(IN
- OUT
- PEPTIDE_OFFSET
- EXONS
- CDS
- NOZERO
- STRICT
- )],
- @args);
-
- # direction of mapping when going chr to protein
- $self->{_direction} = 1;
-
- $in && $self->in($in);
- $out && $self->out($out);
- $cds && $self->cds($cds);
- $exons && ref($exons) =~ /ARRAY/i && $self->exons(@$exons);
- $peptide_offset && $self->peptide_offset($peptide_offset);
- $nozero && $self->nozero($nozero);
- $strict && $self->strict($strict);
-
- return $self; # success - we hope!
-}
-
-=head2 in
-
- Title : in
- Usage : $obj->in('peptide');
- Function: Set and read the input coordinate system.
- Example :
- Returns : value of input system
- Args : new value (optional)
-
-=cut
-
-sub in {
- my ($self,$value) = @_;
- if( defined $value) {
- $self->throw("Not a valid input coordinate system name [$value]\n".
- "Valid values are ". join(", ", keys %COORDINATE_SYSTEMS ))
- unless defined $COORDINATE_SYSTEMS{$value};
-
- $self->{'_in'} = $COORDINATE_SYSTEMS{$value};
- }
- return $COORDINATE_INTS{ $self->{'_in'} };
-}
-
-
-=head2 out
-
- Title : out
- Usage : $obj->out('peptide');
- Function: Set and read the output coordinate system.
- Example :
- Returns : value of output system
- Args : new value (optional)
-
-=cut
-
-sub out {
- my ($self,$value) = @_;
- if( defined $value) {
- $self->throw("Not a valid input coordinate system name [$value]\n".
- "Valid values are ". join(", ", keys %COORDINATE_SYSTEMS ))
- unless defined $COORDINATE_SYSTEMS{$value};
-
- $self->{'_out'} = $COORDINATE_SYSTEMS{$value};
- }
- return $COORDINATE_INTS{ $self->{'_out'} };
-}
-
-=head2 strict
-
- Title : strict
- Usage : $obj->strict('peptide');
- Function: Set and read whether strict boundaried of coordinate
- systems are enforced.
- When strict is on, the end of the coordinate range must be defined.
- Example :
- Returns : boolean
- Args : boolean (optional)
-
-=cut
-
-sub strict {
- my ($self,$value) = @_;
- if( defined $value) {
- $value ? ( $self->{'_strict'} = 1 ) : ( $self->{'_strict'} = 0 );
- ## update in each mapper !!
- }
- return $self->{'_strict'} || 0 ;
-}
-
-
-=head2 nozero
-
- Title : nozero
- Usage : $obj->nozero(1);
- Function: Flag to disable the use of zero in the input,
- output or both coordinate systems. Use of coordinate
- systems without zero is a peculiarity common in
- human genetics community.
- Example :
- Returns : 0 (default), or 'in', 'out', 'in&out'
- Args : 0 (default), or 'in', 'out', 'in&out'
-
-=cut
-
-sub nozero {
- my ($self,$value) = @_;
-
- if (defined $value) {
- $self->throw("Not a valid value for nozero [$value]\n".
- "Valid values are ". join(", ", keys %{$NOZERO_VALUES} ))
- unless defined $NOZERO_VALUES->{$value};
- $self->{'_nozero'} = $NOZERO_VALUES->{$value};
- }
-
- my $res = $self->{'_nozero'} || 0;
- return $NOZERO_KEYS->{$res};
-}
-
-=head2 graph
-
- Title : graph
- Usage : $obj->graph($new_graph);
- Function: Set and read the graph object representing relationships
- between coordinate systems
- Example :
- Returns : Bio::Coordinate::Graph object
- Args : new Bio::Coordinate::Graph object (optional)
-
-=cut
-
-sub graph {
- my ($self,$value) = @_;
- if( defined $value) {
- $self->throw("Not a valid graph [$value]\n")
- unless $value->isa('Bio::Coordinate::Graph');
- $self->{'_graph'} = $value;
- }
- return $self->{'_graph'};
-}
-
-=head2 peptide
-
- Title : peptide
- Usage : $obj->peptide_offset($peptide_coord);
- Function: Read and write the offset of peptide from the start of propeptide
- and peptide length
- Returns : a Bio::Location::Simple object
- Args : a Bio::LocationI object
-
-=cut
-
-sub peptide {
- my ($self, $value) = @_;
- if( defined $value) {
- $self->throw("I need a Bio::LocationI, not [". $value. "]")
- unless $value->isa('Bio::LocationI');
-
- $self->throw("Peptide start not defined")
- unless defined $value->start;
- $self->{'_peptide_offset'} = $value->start - 1;
-
- $self->throw("Peptide end not defined")
- unless defined $value->end;
- $self->{'_peptide_length'} = $value->end - $self->{'_peptide_offset'};
-
-
- my $a = $self->_create_pair
- ('propeptide', 'peptide', $self->strict,
- $self->{'_peptide_offset'}, $self->{'_peptide_length'} );
- my $mapper = $COORDINATE_SYSTEMS{'propeptide'}. "-". $COORDINATE_SYSTEMS{'peptide'};
- $self->{'_mappers'}->{$mapper} = $a;
- }
- return Bio::Location::Simple->new
- (-seq_id => 'propeptide',
- -start => $self->{'_peptide_offset'} + 1 ,
- -end => $self->{'_peptide_length'} + $self->{'_peptide_offset'},
- -strand => 1,
- -verbose => $self->verbose,
- );
-}
-
-=head2 peptide_offset
-
- Title : peptide_offset
- Usage : $obj->peptide_offset(20);
- Function: Set and read the offset of peptide from the start of propeptide
- Returns : set value or 0
- Args : new value (optional)
-
-=cut
-
-sub peptide_offset {
- my ($self,$offset, $len) = @_;
- if( defined $offset) {
- $self->throw("I need an integer, not [$offset]")
- unless $offset =~ /^[+-]?\d+$/;
- $self->{'_peptide_offset'} = $offset;
-
- if (defined $len) {
- $self->throw("I need an integer, not [$len]")
- unless $len =~ /^[+-]?\d+$/;
- $self->{'_peptide_length'} = $len;
- }
-
- my $a = $self->_create_pair
- ('propeptide', 'peptide', $self->strict, $offset, $self->{'_peptide_length'} );
- my $mapper = $COORDINATE_SYSTEMS{'propeptide'}. "-". $COORDINATE_SYSTEMS{'peptide'};
- $self->{'_mappers'}->{$mapper} = $a;
- }
- return $self->{'_peptide_offset'} || 0;
-}
-
-=head2 peptide_length
-
- Title : peptide_length
- Usage : $obj->peptide_length(20);
- Function: Set and read the offset of peptide from the start of propeptide
- Returns : set value or 0
- Args : new value (optional)
-
-=cut
-
-
-sub peptide_length {
- my ($self, $len) = @_;
- if( defined $len) {
- $self->throw("I need an integer, not [$len]")
- if defined $len && $len !~ /^[+-]?\d+$/;
- $self->{'_peptide_length'} = $len;
- }
- return $self->{'_peptide_length'};
-}
-
-
-=head2 exons
-
- Title : exons
- Usage : $obj->exons(@exons);
- Function: Set and read the offset of CDS from the start of transcript
- You do not have to sort the exons before calling this method as
- they will be sorted automatically.
- If you have not defined the CDS, is will be set to span all
- exons here.
- Returns : array of Bio::LocationI exons in genome coordinates or 0
- Args : array of Bio::LocationI exons in genome (or entry) coordinates
-
-=cut
-
-sub exons {
- my ($self,@value) = @_;
- my $cds_mapper = $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'cds'};
- my $inex_mapper =
- $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'inex'};
- my $exon_mapper =
- $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'exon'};
- my $intron_mapper =
- $COORDINATE_SYSTEMS{'gene'}. "-". $COORDINATE_SYSTEMS{'intron'};
- my $negative_intron_mapper =
- $COORDINATE_SYSTEMS{'intron'}. "-". $COORDINATE_SYSTEMS{'negative_intron'};
- my $exon_cds_mapper = $COORDINATE_SYSTEMS{'exon'}. "-". $COORDINATE_SYSTEMS{'cds'};
-
- if(@value) {
- if (ref($value[0]) &&
- $value[0]->isa('Bio::SeqFeatureI') and
- $value[0]->location->isa('Bio::Location::SplitLocationI')) {
- @value = $value[0]->location->each_Location;
- } else {
- $self->throw("I need an array , not [@value]")
- unless ref \@value eq 'ARRAY';
- $self->throw("I need a reference to an array of Bio::LocationIs, not to [".
- $value[0]. "]")
- unless ref $value[0] and $value[0]->isa('Bio::LocationI');
- }
-
- #
- # sort the input array
- #
- # and if the used has not defined CDS assume it is the complete exonic range
- if (defined $value[0]->strand &&
- $value[0]->strand == - 1) { #reverse strand
- @value = map { $_->[0] }
- sort { $b->[1] <=> $a->[1] }
- map { [ $_, $_->start] }
- @value;
-
- unless ($self->cds) {
- $self->cds(Bio::Location::Simple->new
- (-start => $value[-1]->start,
- -end => $value[0]->end,
- -strand => $value[0]->strand,
- -seq_id => $value[0]->seq_id,
- -verbose => $self->verbose,
- )
- );
- }
- } else { # undef or forward strand
- @value = map { $_->[0] }
- sort { $a->[1] <=> $b->[1] }
- map { [ $_, $_->start] }
- @value;
- unless ($self->cds) {
- $self->cds(Bio::Location::Simple->new
- (-start => $value[0]->start,
- -end => $value[-1]->end,
- -strand => $value[0]->strand,
- -seq_id => $value[0]->seq_id,
- -verbose => $self->verbose,
- )
- );
- }
-
- }
-
- $self->{'_chr_exons'} = \@value;
-
- # transform exons from chromosome to gene coordinates
- # but only if gene coordinate system has been set
- my @exons ;
- #my $gene_mapper = $self->$COORDINATE_SYSTEMS{'chr'}. "-". $COORDINATE_SYSTEMS{'gene'};
- my $gene_mapper = "1-2";
- if (defined $self->{'_mappers'}->{$gene_mapper} ) {
-
- my $tmp_in = $self->{'_in'};
- my $tmp_out = $self->{'_out'};
- my $tmp_verb = $self->verbose;
- $self->verbose(0);
-
- $self->in('chr');
- $self->out('gene');
- @exons = map {$self->map($_) } @value;
-
- $self->{'_in'} = ($tmp_in);
- $self->{'_out'} = ($tmp_out);
- $self->verbose($tmp_verb);
- } else {
- @exons = @value;
- }
-
- my $cds_map = Bio::Coordinate::Collection->new;
- my $inex_map = Bio::Coordinate::Collection->new;
- my $exon_map = Bio::Coordinate::Collection->new;
- my $exon_cds_map = Bio::Coordinate::Collection->new;
- my $intron_map = Bio::Coordinate::Collection->new;
- my $negative_intron_map = Bio::Coordinate::Collection->new;
-
- my $tr_end = 0;
- my $coffset;
- my $exon_counter;
- my $prev_exon_end;
-
- for my $exon ( @exons ) {
- $exon_counter++;
-
- #
- # gene -> cds
- #
-
- my $match1 = Bio::Location::Simple->new
- (-seq_id =>'gene' ,
- -start => $exon->start,
- -end => $exon->end,
- -strand => 1,
- -verbose=> $self->verbose);
-
- my $match2 = Bio::Location::Simple->new
- (-seq_id => 'cds',
- -start => $tr_end + 1,
- -end => $tr_end + $exon->end - $exon->start +1,
- -strand=>$exon->strand,
- -verbose=>$self->verbose);
-
- $cds_map->add_mapper(Bio::Coordinate::Pair->new
- (-in => $match1,
- -out => $match2,
- )
- );
-
- if ($exon->start <= 1 and $exon->end >= 1) {
- $coffset = $tr_end - $exon->start + 1;
- }
- $tr_end = $tr_end + $exon->end - $exon->start + 1;
-
- #
- # gene -> intron
- #
-
- if (defined $prev_exon_end) {
- my $match3 = Bio::Location::Simple->new
- (-seq_id => 'gene',
- -start => $prev_exon_end + 1,
- -end => $exon->start -1,
- -strand => $exon->strand,
- -verbose => $self->verbose);
-
- my $match4 = Bio::Location::Simple->new
- (-seq_id => 'intron'. ($exon_counter -1),
- -start => 1,
- -end => $exon->start - 1 - $prev_exon_end,
- -strand =>$exon->strand,
- -verbose => $self->verbose,);
-
- # negative intron coordinates
- my $match5 = Bio::Location::Simple->new
- (-seq_id => 'intron'. ($exon_counter -1),
- -start => -1 * ($exon->start - 2 - $prev_exon_end) -1,
- -end => -1,
- -strand => $exon->strand,
- -verbose => $self->verbose);
-
- $inex_map->add_mapper(Bio::Coordinate::Pair->new
- (-in => $match3,
- -out => $match4
- )
- );
- $intron_map->add_mapper(Bio::Coordinate::Pair->new
- (-in => $self->_clone_loc($match3),
- -out => $self->_clone_loc($match4)
- )
- );
- $negative_intron_map->add_mapper(Bio::Coordinate::Pair->new
- (-in => $self->_clone_loc($match4),
- -out => $match5
- ));
-
- }
-
- # store the value
- $prev_exon_end = $exon->end;
-
- #
- # gene -> exon
- #
- my $match6 = Bio::Location::Simple->new
- (-seq_id => 'exon'. $exon_counter,
- -start => 1,
- -end => $exon->end - $exon->start +1,
- -strand => $exon->strand,
- -verbose=> $self->verbose,);
-
- my $pair2 = Bio::Coordinate::Pair->new(-in => $self->_clone_loc($match1),
- -out => $match6
- );
- my $pair3 = Bio::Coordinate::Pair->new(-in => $self->_clone_loc($match6),
- -out => $self->_clone_loc($match2)
- );
- $inex_map->add_mapper(Bio::Coordinate::Pair->new
- (-in => $self->_clone_loc($match1),
- -out => $match6
- )
- );
- $exon_map->add_mapper(Bio::Coordinate::Pair->new
- (-in => $self->_clone_loc($match1),
- -out => $self->_clone_loc($match6)
- )
- );
- $exon_cds_map->add_mapper(Bio::Coordinate::Pair->new
- (-in => $self->_clone_loc($match6),
- -out => $self->_clone_loc($match2)
- )
- );
-
- }
-
- # move coordinate start if exons have negative values
- if ($coffset) {
- foreach my $m ($cds_map->each_mapper) {
- $m->out->start($m->out->start - $coffset);
- $m->out->end($m->out->end - $coffset);
- }
-
- }
-
- $self->{'_mappers'}->{$cds_mapper} = $cds_map;
- $self->{'_mappers'}->{$exon_cds_mapper} = $exon_cds_map;
- $self->{'_mappers'}->{$inex_mapper} = $inex_map;
- $self->{'_mappers'}->{$exon_mapper} = $exon_map;
- $self->{'_mappers'}->{$intron_mapper} = $intron_map;
- $self->{'_mappers'}->{$negative_intron_mapper} = $negative_intron_map;
- }
- return @{$self->{'_chr_exons'}} || 0;
-}
-
-=head2 _clone_loc
-
- Title : _clone_loc
- Usage : $copy_of_loc = $obj->_clone_loc($loc);
- Function: Make a deep copy of a simple location
- Returns : a Bio::Location::Simple object
- Args : a Bio::Location::Simple object to be cloned
-
-=cut
-
-
-sub _clone_loc { # clone a simple location
- my ($self,$loc) = @_;
-
- $self->throw("I need a Bio::Location::Simple , not [". ref $loc. "]")
- unless $loc->isa('Bio::Location::Simple');
-
- return Bio::Location::Simple->new
- (-verbose => $self->verbose,
- -seq_id => $loc->seq_id,
- -start => $loc->start,
- -end => $loc->end,
- -strand => $loc->strand,
- -location_type => $loc->location_type
- );
-}
-
-
-=head2 cds
-
- Title : cds
- Usage : $obj->cds(20);
- Function: Set and read the offset of CDS from the start of transcipt
-
- Simple input can be an integer which gives the start of the
- coding region in genomic coordinate. If you want to provide
- the end of the coding region or indicate the use of the
- opposite strand, you have to pass a Bio::RangeI
- (e.g. Bio::Location::Simple or Bio::SegFeature::Generic)
- object to this method.
-
- Returns : set value or 0
- Args : new value (optional)
-
-=cut
-
-sub cds {
- my ($self,$value) = @_;
- if( defined $value) {
- if ($value =~ /^[+-]?\d+$/ ) {
- my $loc = Bio::Location::Simple->new(-start=>$value, -end => $value,
- -verbose=>$self->verbose);
- $self->{'_cds'} = $loc;
- }
- elsif (ref $value && $value->isa('Bio::RangeI') ) {
- $self->{'_cds'} = $value;
- } else {
- $self->throw("I need an integer or Bio::RangeI, not [$value]")
- }
- # strand !!
- my $len;
-
- $len = $self->{'_cds'}->end - $self->{'_cds'}->start +1
- if defined $self->{'_cds'}->end;
-
- my $a = $self->_create_pair
- ('chr', 'gene', 0,
- $self->{'_cds'}->start-1,
- $len,
- $self->{'_cds'}->strand);
- my $mapper = $COORDINATE_SYSTEMS{'chr'}. "-". $COORDINATE_SYSTEMS{'gene'};
- $self->{'_mappers'}->{$mapper} = $a;
-
- # recalculate exon-based mappers
- if ( defined $self->{'_chr_exons'} ) {
- $self->exons(@{$self->{'_chr_exons'}});
- }
-
- }
- return $self->{'_cds'} || 0;
-}
-
-
-=head2 map
-
- Title : map
- Usage : $newpos = $obj->map(5);
- Function: Map the location from the input coordinate system
- to a new value in the output coordinate system.
- Example :
- Returns : new value in the output coordiante system
- Args : a Bio::Location::Simple
-
-=cut
-
-sub map {
- my ($self,$value) = @_;
- my ($res);
- $self->throw("Need to pass me a Bio::Location::Simple or ".
- "Bio::Location::Simple or Bio::SeqFeatureI, not [".
- ref($value). "]")
- unless ref($value) && ($value->isa('Bio::Location::Simple') or
- $value->isa('Bio::Location::SplitLocationI') or
- $value->isa('Bio::SeqFeatureI'));
- $self->throw("Input coordinate system not set")
- unless $self->{'_in'};
- $self->throw("Output coordinate system not set")
- unless $self->{'_out'};
- $self->throw("Do not be silly. Input and output coordinate ".
- "systems are the same!")
- unless $self->{'_in'} != $self->{'_out'};
-
- $self->_check_direction();
-
- $value = $value->location if $value->isa('Bio::SeqFeatureI');
- $self->debug( "=== Start location: ". $value->start. ",".
- $value->end. " (". ($value->strand || ''). ")\n");
-
- # if nozero coordinate system is used in the input values
- if ( defined $self->{'_nozero'} &&
- ( $self->{'_nozero'} == 1 || $self->{'_nozero'} == 3 ) ) {
- $value->start($value->start + 1)
- if defined $value->start && $value->start < 1;
- $value->end($value->end + 1)
- if defined $value->end && $value->end < 1;
- }
-
- my @steps = $self->_get_path();
- $self->debug( "mapping ". $self->{'_in'}. "->". $self->{'_out'}.
- " Mappers: ". join(", ", @steps). "\n");
-
- foreach my $mapper (@steps) {
- if ($mapper eq $TRANSLATION) {
- if ($self->direction == 1) {
-
- $value = $self->_translate($value);
- $self->debug( "+ $TRANSLATION cds -> propeptide (translate) \n");
- } else {
- $value = $self->_reverse_translate($value);
- $self->debug("+ $TRANSLATION propeptide -> cds (reverse translate) \n");
- }
- }
- # keep the start and end values, and go on to next iteration
- # if this mapper is not set
- elsif ( ! defined $self->{'_mappers'}->{$mapper} ) {
- # update mapper name
- $mapper =~ /\d+-(\d+)/; my ($counter) = $1;
- $value->seq_id($COORDINATE_INTS{$counter});
- $self->debug( "- $mapper\n");
- } else {
- #
- # the DEFAULT : generic mapping
- #
-
- $value = $self->{'_mappers'}->{$mapper}->map($value);
-
- $value->purge_gaps
- if ($value && $value->isa('Bio::Location::SplitLocationI') &&
- $value->can('gap'));
-
- $self->debug( "+ $mapper (". $self->direction. "): start ".
- $value->start. " end ". $value->end. "\n")
- if $value && $self->verbose > 0;
- }
- }
-
- # if nozero coordinate system is asked to be used in the output values
- if ( defined $value && defined $self->{'_nozero'} &&
- ( $self->{'_nozero'} == 2 || $self->{'_nozero'} == 3 ) ) {
-
- $value->start($value->start - 1)
- if defined $value->start && $value->start < 1;
- $value->end($value->end - 1)
- if defined $value->end && $value->end < 1;
- }
-
- # handle merging of adjacent split locations!
-
- if (ref $value eq "Bio::Coordinate::Result" && $value->each_match > 1 ) {
- my $prevloc;
- my $merging = 0;
- my $newvalue;
- my @matches;
- foreach my $loc ( $value->each_Location(1) ) {
- unless ($prevloc) {
- $prevloc = $loc;
- push @matches, $prevloc;
- next;
- }
- if ($prevloc->end == ($loc->start - 1) &&
- $prevloc->seq_id eq $loc->seq_id) {
- $prevloc->end($loc->end);
- $merging = 1;
- } else {
- push @matches, $loc;
- $prevloc = $loc;
- }
- }
- if ($merging) {
- if (@matches > 1 ) {
- $newvalue = Bio::Coordinate::Result->new;
- map {$newvalue->add_sub_Location} @matches;
- } else {
- $newvalue = Bio::Coordinate::Result::Match->new
- (-seq_id => $matches[0]->seq_id,
- -start => $matches[0]->start,
- -end => $matches[0]->end,
- -strand => $matches[0]->strand,
- -verbose => $self->verbose,);
- }
- $value = $newvalue;
- }
- }
- elsif (ref $value eq "Bio::Coordinate::Result" &&
- $value->each_match == 1 ){
- $value = $value->match;
- }
-
- return $value;
-}
-
-=head2 direction
-
- Title : direction
- Usage : $obj->direction('peptide');
- Function: Read-only method for the direction of mapping deduced from
- predefined input and output coordinate names.
- Example :
- Returns : 1 or -1, mapping direction
- Args : new value (optional)
-
-=cut
-
-sub direction {
- my ($self) = @_;
- return $self->{'_direction'};
-}
-
-
-=head2 swap
-
- Title : swap
- Usage : $obj->swap;
- Function: Swap the direction of transformation
- (input <-> output)
- Example :
- Returns : 1
- Args :
-
-=cut
-
-sub swap {
- my ($self,$value) = @_;
-
- ($self->{'_in'}, $self->{'_out'}) = ($self->{'_out'}, $self->{'_in'});
- map { $self->{'_mappers'}->{$_}->swap } keys %{$self->{'_mappers'}};
-
- # record the changed direction;
- $self->{_direction} *= -1;
-
- return 1;
-}
-
-
-=head2 to_string
-
- Title : to_string
- Usage : $newpos = $obj->to_string(5);
- Function: Dump the internal mapper values into a human readable format
- Example :
- Returns : string
- Args :
-
-=cut
-
-sub to_string {
- my ($self) = shift;
-
- print "-" x 40, "\n";
-
- # chr-gene
- my $mapper_str = 'chr-gene';
- my $mapper = $self->_mapper_string2code($mapper_str);
-
- printf "\n %-12s (%s)\n", $mapper_str, $mapper ;
- if (defined $self->cds) {
- my $end = $self->cds->end -1 if defined $self->cds->end;
- printf "%16s%s: %s (%s)\n", ' ', 'gene offset', $self->cds->start-1 , $end || '';
- printf "%16s%s: %s\n", ' ', 'gene strand', $self->cds->strand || 0;
- }
-
- # gene-intron
- $mapper_str = 'gene-intron';
- $mapper = $self->_mapper_string2code($mapper_str);
- printf "\n %-12s (%s)\n", $mapper_str, $mapper ;
-
- my $i = 1;
- foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) {
- printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ;
- printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ;
- $i++;
- }
-
- # intron-negative_intron
- $mapper_str = 'intron-negative_intron';
- $mapper = $self->_mapper_string2code($mapper_str);
- printf "\n %-12s (%s)\n", $mapper_str, $mapper ;
-
- $i = 1;
- foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) {
- printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ;
- printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ;
- $i++;
- }
-
-
- # gene-exon
- $mapper_str = 'gene-exon';
- $mapper = $self->_mapper_string2code($mapper_str);
- printf "\n %-12s (%s)\n", $mapper_str, $mapper ;
-
- $i = 1;
- foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) {
- printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ;
- printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ;
- $i++;
- }
-
-
- # gene-cds
- $mapper_str = 'gene-cds';
- $mapper = $self->_mapper_string2code($mapper_str);
- printf "\n %-12s (%s)\n", $mapper_str, $mapper ;
-
- $i = 1;
- foreach my $pair ( $self->{'_mappers'}->{$mapper}->each_mapper ) {
- printf "%8s :%8s -> %-12s\n", $i, $pair->in->start, $pair->out->start ;
- printf "%8s :%8s -> %-12s\n", '', $pair->in->end, $pair->out->end ;
- $i++;
- }
-
- # cds-propeptide
- $mapper_str = 'cds-propeptide';
- $mapper = $self->_mapper_string2code($mapper_str);
- printf "\n %-12s (%s)\n", $mapper_str, $mapper ;
- printf "%9s%-12s\n", "", '"translate"';
-
-
- # propeptide-peptide
- $mapper_str = 'propeptide-peptide';
- $mapper = $self->_mapper_string2code($mapper_str);
- printf "\n %-12s (%s)\n", $mapper_str, $mapper ;
- printf "%16s%s: %s\n", ' ', "peptide offset", $self->peptide_offset;
-
-
-
- print "\nin : ", $self->in, "\n";
- print "out: ", $self->out, "\n";
- my $dir;
- $self->direction ? ($dir='forward') : ($dir='reverse');
- printf "direction: %-8s(%s)\n", $dir, $self->direction;
- print "\n", "-" x 40, "\n";
-
- 1;
-}
-
-sub _mapper_code2string {
- my ($self, $code) = @_;
- my ($a, $b) = $code =~ /(\d+)-(\d+)/;
- return $COORDINATE_INTS{$a}. '-'. $COORDINATE_INTS{$b};
-
-}
-
-sub _mapper_string2code {
- my ($self, $string) =@_;
- my ($a, $b) = $string =~ /([^-]+)-(.*)/;
- return $COORDINATE_SYSTEMS{$a}. '-'. $COORDINATE_SYSTEMS{$b};
-}
-
-
-=head2 _create_pair
-
- Title : _create_pair
- Usage : $mapper = $obj->_create_pair('chr', 'gene', 0, 2555, 10000, -1);
- Function: Internal helper method to create a mapper between
- two coordinate systems
- Returns : a Bio::Coordinate::Pair object
- Args : string, input coordinate system name,
- string, output coordinate system name,
- boolean, strict mapping
- positive integer, offset
- positive integer, length
- 1 || -1 , strand
-
-=cut
-
-sub _create_pair {
- my ($self, $in, $out, $strict, $offset, $length, $strand ) = @_;
- $strict ||= 0;
- $strand ||= 1;
- $length ||= 20;
-
- my $match1 = Bio::Location::Simple->new
- (-seq_id => $in,
- -start => $offset+1,
- -end => $offset+$length,
- -strand => 1,
- -verbose => $self->verbose);
-
- my $match2 = Bio::Location::Simple->new
- (-seq_id => $out,
- -start => 1,
- -end => $length,
- -strand => $strand,
- -verbose => $self->verbose);
-
- my $pair = Bio::Coordinate::ExtrapolatingPair->new
- (-in => $match1,
- -out => $match2,
- -strict => $strict,
- -verbose => $self->verbose,
- );
-
- return $pair;
-
-}
-
-
-=head2 _translate
-
- Title : _translate
- Usage : $newpos = $obj->_translate($loc);
- Function: Translate the location from the CDS coordinate system
- to a new value in the propeptide coordinate system.
- Example :
- Returns : new location
- Args : a Bio::Location::Simple or Bio::Location::SplitLocationI
-
-=cut
-
-sub _translate {
- my ($self,$value) = @_;
-
- $self->throw("Need to pass me a Bio::Location::Simple or ".
- "Bio::Location::SplitLocationI, not [". ref($value). "]")
- unless defined $value &&
- ($value->isa('Bio::Location::Simple') || $value->isa('Bio::Location::SplitLocationI'));
-
- my $seqid = 'propeptide';
-
- if ($value->isa("Bio::Location::SplitLocationI") ) {
- my $split = Bio::Location::Split->new(-seq_id=>$seqid);
- foreach my $loc ( $value->each_Location(1) ) {
- my $match = Bio::Location::Simple->new
- (-start => int ($loc->start / 3 ) +1,
- -end => int ($loc->end / 3 ) +1,
- -seq_id => $seqid,
- -strand => 1,
- -verbose => $self->verbose,
- );
- $split->add_sub_Location($match);
- }
- return $split;
-
- } else {
- return new Bio::Location::Simple(-start => int($value->start / 3 )+1,
- -end => int($value->end / 3 )+1,
- -seq_id => $seqid,
- -strand => 1,
- -verbose=> $self->verbose,
- );
- }
-}
-
-sub _frame {
- my ($self,$value) = @_;
-
- $self->throw("Need to pass me a Bio::Location::Simple or ".
- "Bio::Location::SplitLocationI, not [". ref($value). "]")
- unless defined $value &&
- ($value->isa('Bio::Location::Simple') || $value->isa('Bio::Location::SplitLocationI'));
-
- my $seqid = 'propeptide';
-
- if ($value->isa("Bio::Location::SplitLocationI")) {
- my $split = Bio::Location::Split->new(-seq_id=>$seqid);
- foreach my $loc ( $value->each_Location(1) ) {
-
- my $match = Bio::Location::Simple->new
- (-start => ($value->start-1) % 3 +1,
- -end => ($value->end-1) % 3 +1,
- -seq_id => 'frame',
- -strand => 1,
- -verbose=> $self->verbose);
- $split->add_sub_Location($match);
- }
- return $split;
- } else {
- return new Bio::Location::Simple(-start => ($value->start-1) % 3 +1,
- -end => ($value->end-1) % 3 +1,
- -seq_id => 'frame',
- -strand => 1,
- -verbose => $self->verbose,
- );
- }
-}
-
-
-=head2 _reverse_translate
-
- Title : _reverse_translate
- Usage : $newpos = $obj->_reverse_translate(5);
- Function: Reverse translate the location from the propeptide
- coordinate system to a new value in the CSD.
- Note that a single peptide location expands to cover
- the codon triplet
- Example :
- Returns : new location in the CDS coordinate system
- Args : a Bio::Location::Simple or Bio::Location::SplitLocationI
-
-=cut
-
-sub _reverse_translate {
- my ($self,$value) = @_;
-
-
- $self->throw("Need to pass me a Bio::Location::Simple or ".
- "Bio::Location::SplitLocationI, not [". ref($value). "]")
- unless defined $value &&
- ($value->isa('Bio::Location::Simple') || $value->isa('Bio::Location::SplitLocationI'));
-
- my $seqid = 'cds';
-
- if ($value->isa("Bio::Location::SplitLocationI")) {
- my $split = Bio::Location::Split->new(-seq_id=>$seqid);
- foreach my $loc ( $value->each_Location(1) ) {
-
- my $match = Bio::Location::Simple->new
- (-start => $value->start * 3 - 2,
- -end => $value->end * 3,
- -seq_id => $seqid,
- -strand => 1,
- -verbose => $self->verbose,
- );
- $split->add_sub_Location($match);
- }
- return $split;
-
- } else {
- return new Bio::Location::Simple(-start => $value->start * 3 - 2,
- -end => $value->end * 3,
- -seq_id => $seqid,
- -strand => 1,
- -verbose => $self->verbose,
- );
- }
-}
-
-
-=head2 _check_direction
-
- Title : _check_direction
- Usage : $obj->_check_direction();
- Function: Check and swap when needed the direction the location
- mapping Pairs based on input and output values
- Example :
- Returns : new location
- Args : a Bio::Location::Simple
-
-=cut
-
-sub _check_direction {
- my ($self) = @_;
-
- my $new_direction = 1;
- $new_direction = -1 if $self->{'_in'} > $self->{'_out'};
-
- unless ($new_direction == $self->{_direction} ) {
- map { $self->{'_mappers'}->{$_}->swap } keys %{$self->{'_mappers'}};
- # record the changed direction;
- $self->{_direction} *= -1;
- }
- 1;
-}
-
-
-=head2 _get_path
-
- Title : _get_path
- Usage : $obj->_get_path('peptide');
- Function: internal method for finding that shortest path between
- input and output coordinate systems.
- Calculations and caching are handled by the graph class.
- See L<Bio::Coordinate::Graph>.
- Example :
- Returns : array of the mappers
- Args : none
-
-=cut
-
-sub _get_path {
- my ($self) = @_;
-
- my $start = $self->{'_in'} || 0;
- my $end = $self->{'_out'} || 0;
-
- # note the order
- # always go from smaller to bigger: it makes caching more efficient
- my $reverse;
- if ($start > $end) {
- ($start, $end) = ($end, $start );
- $reverse++;
- }
-
- my @mappers;
- if (exists $self->{'_previous_path'} and
- $self->{'_previous_path'} eq "$start$end" ) {
- # use cache
- @mappers = @{$self->{'_mapper_path'}};
- } else {
- my $mapper;
- my $prev_node = '';
- @mappers =
- map { $mapper = "$prev_node-$_"; $prev_node = $_; $mapper; }
- $self->{'_graph'}->shortest_path($start, $end);
- shift @mappers;
-
- $self->{'_previous_path'} = "$start$end";
- $self->{'_mapper_path'} = \@mappers;
- }
-
- $reverse ? return reverse @mappers : return @mappers;
-}
-
-
-1;
-
409 Bio/Coordinate/Graph.pm
View
@@ -1,409 +0,0 @@
-#
-# bioperl module for Bio::Coordinate::Graph
-#
-# Please direct questions and support issues to <bioperl-l@bioperl.org>
-#
-# Cared for by Heikki Lehvaslaiho <heikki-at-bioperl-dot-org>
-#
-# Copyright Heikki Lehvaslaiho
-#
-# You may distribute this module under the same terms as perl itself
-
-# POD documentation - main docs before the code
-
-=head1 NAME
-
-Bio::Coordinate::Graph - Finds shortest path between nodes in a graph
-
-=head1 SYNOPSIS
-
- # get a hash of hashes representing the graph. E.g.:
- my $hash= {
- '1' => {
- '2' => 1
- },
- '2' => {
- '4' => 1,
- '3' => 1
- },
- '3' => undef,
- '4' => {
- '5' => 1
- },
- '5' => undef
- };
-
- # create the object;
- my $graph = Bio::Coordinate::Graph->new(-graph => $hash);
-
- # find the shortest path between two nodes
- my $a = 1;
- my $b = 6;
- my @path = $graph->shortest_paths($a);
- print join (", ", @path), "\n";
-
-
-=head1 DESCRIPTION
-
-This class calculates the shortest path between input and output
-coordinate systems in a graph that defines the relationships between
-them. This class is primarely designed to analyze gene-related
-coordinate systems. See L<Bio::Coordinate::GeneMapper>.
-
-Note that this module can not be used to manage graphs.
-
-Technically the graph implemented here is known as Directed Acyclic
-Graph (DAG). DAG is composed of vertices (nodes) and edges (with
-optional weights) linking them. Nodes of the graph are the coordinate
-systems in gene mapper.
-
-The shortest path is found using the Dijkstra's algorithm. This
-algorithm is fast and greedy and requires all weights to be
-positive. All weights in the gene coordinate system graph are
-currently equal (1) making the graph unweighted. That makes the use of
-Dijkstra's algorithm an overkill. A simpler and faster breadth-first
-would be enough. Luckily the difference for small graphs is not
-significant and the implementation is capable of taking weights into
-account if needed at some later time.
-
-=head2 Input format
-
-The graph needs to be primed using a hash of hashes where there is a
-key for each node. The second keys are the names of the downstream
-neighboring nodes and values are the weights for reaching them. Here
-is part of the gene coordiante system graph::
-
-
- $hash = {
- '6' => undef,
- '3' => {
- '6' => 1
- },
- '2' => {
- '6' => 1,
- '4' => 1,
- '3' => 1
- },
- '1' => {
- '2' => 1
- },
- '4' => {
- '5' => 1
- },
- '5' => undef
- };
-
-
-Note that the names need to be positive integers. Root should be '1'
-and directness of the graph is taken advantage of to speed
-calculations by assuming that downsream nodes always have larger
-number as name.
-
-An alternative (shorter) way of describing input is to use hash of
-arrays. See L<Bio::Coordinate::Graph::hash_of_arrays>.
-
-
-=head1 FEEDBACK
-
-=head2 Mailing Lists
-
-User feedback is an integral part of the evolution of this and other
-Bioperl modules. Send your comments and suggestions preferably to the
-Bioperl mailing lists Your participation is much appreciated.
-
- bioperl-l@bioperl.org - General discussion
- http://bioperl.org/wiki/Mailing_lists - About the mailing lists
-
-=head2 Support
-
-Please direct usage questions or support issues to the mailing list:
-
-I<bioperl-l@bioperl.org>
-
-rather than to the module maintainer directly. Many experienced and
-reponsive experts will be able look at the problem and quickly
-address it. Please include a thorough description of the problem
-with code and data examples if at all possible.
-
-=head2 Reporting Bugs
-
-report bugs to the Bioperl bug tracking system to help us keep track
-the bugs and their resolution. Bug reports can be submitted via the
-web:
-
- https://redmine.open-bio.org/projects/bioperl/
-
-=head1 AUTHOR - Heikki Lehvaslaiho
-
-Email: heikki-at-bioperl-dot-org
-
-=head1 APPENDIX
-
-The rest of the documentation details each of the object
-methods. Internal methods are usually preceded with a _
-
-=cut
-
-
-# Let the code begin...
-
-package Bio::Coordinate::Graph;
-use strict;
-
-# Object preamble - inherits from Bio::Root::Root
-
-use base qw(Bio::Root::Root);
-
-
-sub new {
- my($class,@args) = @_;
- my $self = $class->SUPER::new(@args);
-
- my($graph, $hasharray) =
- $self->_rearrange([qw(
- GRAPH
- HASHARRAY
- )],
- @args);
-
- $graph && $self->graph($graph);
- $hasharray && $self->hasharray($hasharray);
-
- $self->{'_root'} = undef;
-
- return $self; # success - we hope!
-
-}
-
-=head2 Graph structure input methods
-
-=cut
-
-=head2 graph
-
- Title : graph
- Usage : $obj->graph($my_graph)
- Function: Read/write method for the graph structure
- Example :
- Returns : hash of hashes grah structure
- Args : reference to a hash of hashes
-
-=cut
-
-sub graph {
-
- my ($self,$value) = @_;
-
- if ($value) {
- $self->throw("Need a hash of hashes")
- unless ref($value) eq 'HASH' ;
- $self->{'_dag'} = $value;
-
- # empty the cache
- $self->{'_root'} = undef;
-
- }
-
- return $self->{'_dag'};
-
-}
-
-
-=head2 hash_of_arrays
-
- Title : hash_of_arrays
- Usage : $obj->hash_of_array(%hasharray)
- Function: An alternative method to read in the graph structure.
- Hash arrays are easier to type. This method converts
- arrays into hashes and assigns equal values "1" to
- weights.
-
- Example : Here is an example of simple structure containing a graph.
-
- my $DAG = {
- 6 => [],
- 5 => [],
- 4 => [5],
- 3 => [6],
- 2 => [3, 4, 6],
- 1 => [2]
- };
-
- Returns : hash of hashes graph structure
- Args : reference to a hash of arrays
-
-=cut
-
-sub hash_of_arrays {
-
- my ($self,$value) = @_;
-
- # empty the cache
- $self->{'_root'} = undef;
-
- if ($value) {
-
- $self->throw("Need a hash of hashes")
- unless ref($value) eq 'HASH' ;
-
- #copy the hash of arrays into a hash of hashes;
- my %hash;
- foreach my $start ( keys %{$value}){
- $hash{$start} = undef;
- map { $hash{$start}{$_} = 1 } @{$value->{$start}};
- }
-
- $self->{'_dag'} = \%hash;
- }
-
- return $self->{'_dag'};
-
-}
-
-=head2 Methods for determining the shortest path in the graph
-
-=cut
-
-=head2 shortest_path
-
- Title : shortest_path
- Usage : $obj->shortest_path($a, $b);
- Function: Method for retrieving the shortest path between nodes.
- If the start node remains the same, the method is sometimes
- able to use cached results, otherwise it will recalculate
- the paths.
- Example :
- Returns : array of node names, only the start node name if no path
- Args : name of the start node
- : name of the end node
-
-=cut
-
-
-sub shortest_path {
- my ($self, $root, $end) = @_;
-
- $self->throw("Two arguments needed") unless @_ == 3;
- $self->throw("No node name [$root]")
- unless exists $self->{'_dag'}->{$root};
- $self->throw("No node name [$end]")
- unless exists $self->{'_dag'}->{$end};
-
- my @res; # results
- my $reverse;
-
- if ($root > $end) {
- ($root, $end) = ($end, $root );
- $reverse++;
- }
-
- # try to use cached paths
- $self->dijkstra($root) unless
- defined $self->{'_root'} and $self->{'_root'} eq $root;
-
- return @res unless $self->{'_paths'} ;
-
- # create the list
- my $node = $end;
- my $prev = $self->{'_paths'}->{$end}{'prev'};
- while ($prev) {
- unshift @res, $node;
- $node = $self->{'_paths'}->{$node}{'prev'};
- $prev = $self->{'_paths'}->{$node}{'prev'};
- }
- unshift @res, $node;
-
- $reverse ? return reverse @res : return @res;
-}
-
-
-=head2 dijkstra
-
- Title : dijkstra
- Usage : $graph->dijkstra(1);
- Function: Implements Dijkstra's algorithm.
- Returns or sets a list of mappers. The returned path
- description is always directed down from the root.
- Called from shortest_path().
- Example :
- Returns : Reference to a hash of hashes representing a linked list
- which contains shortest path down to all nodes from the start
- node. E.g.:
-
- $res = {
- '2' => {
- 'prev' => '1',
- 'dist' => 1
- },
- '1' => {
- 'prev' => undef,
- 'dist' => 0
- },
- };
-
- Args : name of the start node
-
-=cut
-
-#' keep emacs happy
-
-sub dijkstra {
- my ($self,$root) = @_;
-
- $self->throw("I need the name of the root node input") unless $root;
- $self->throw("No node name [$root]")
- unless exists $self->{'_dag'}->{$root};
-
- my %est = (); # estimate hash
- my %res = (); # result hash
- my $nodes = keys %{$self->{'_dag'}};
- my $maxdist = 1000000;
-
- # cache the root value
- $self->{'_root'} = $root;
-
- foreach my $node ( keys %{$self->{'_dag'}} ){
- if ($node eq $root) {
- $est{$node}{'prev'} = undef;
- $est{$node}{'dist'} = 0;
- } else {
- $est{$node}{'prev'} = undef;
- $est{$node}{'dist'} = $maxdist;
- }
- }
-
- # remove nodes from %est until it is empty
- while (keys %est) {
-
- #select the node closest to current one, or root node
- my $min_node;
- my $min = $maxdist;
- foreach my $node (reverse sort keys %est) {
- if ( $est{$node}{'dist'} < $min ) {
- $min = $est{$node}{'dist'};
- $min_node = $node;
- }
- }
-
- # no more links between nodes
- last unless ($min_node);
-
- # move the node from %est into %res;
- $res{$min_node} = delete $est{$min_node};
-
- # recompute distances to the neighbours
- my $dist = $res{$min_node}{'dist'};
- foreach my $neighbour ( keys %{$self->{'_dag'}->{$min_node}} ){
- next unless $est