Permalink
Browse files

Added methods for in-silico molecular cloning to Bio::SeqUtils.

delete: remove a segment from a sequence object, preserving annotations
and features.
insert: insert a fragment sequence object into a recipient sequence object,
preserving features and annotations
ligate: combine delete and insert to simulate digestion of a recipient
and ligation of a fragment into the recipient.
  • Loading branch information...
1 parent 4295078 commit f489238a9faa84fd7eaa6d238f8f20f37c5f17d5 Frank Schwach committed Jan 10, 2012
Showing with 564 additions and 1 deletion.
  1. +564 −1 Bio/SeqUtils.pm
View
565 Bio/SeqUtils.pm
@@ -136,7 +136,7 @@ package Bio::SeqUtils;
use vars qw(%ONECODE %THREECODE);
use strict;
use Carp;
-
+use Scalar::Util qw(blessed);
use base qw(Bio::Root::Root);
# new inherited from RootI
@@ -492,6 +492,569 @@ sub trunc_with_features{
}
+=head2 delete
+
+ Title : delete
+ Function: cuts a segment out of a sequence and re-joins the left and right fragments
+ (like splicing or digesting and re-ligating a molecule).
+ Positions (and types) of sequence features are adjusted accordingly:
+ Features that span the cut site are converted to split featuress to
+ indicate the disruption.
+ Features that extend into the cut-out fragment are truncated and
+ the end that was cut off becomes a fuzzy location to indicate that it
+ is no longer the true end of the original feature.
+ A new molecule is created and returned.
+ Usage : my $cutseq = Bio::SeqUtils::PbrTools->cut( $seq, 1000, 1100 );
+ Args : a Bio::PrimarySeqI compliant object to cut,
+ first nt of the segment to be deleted
+ last nt of the segment to be deleted
+ Returns : a new Bio::Seq object
+
+=cut
+
+sub delete{
+ my $self = shift;
+ my ( $seq, $left, $right) = @_ ;
+ $self->throw( 'was expecting 3 paramters but got '.@_) unless @_ == 3;
+
+ $self->throw( 'Object of class ['
+ . ref($seq)
+ . '] should be a Bio::PrimarySeqI ' )
+ unless blessed ($seq) && $seq->isa('Bio::PrimarySeqI');
+
+ $self->throw("Left coordinate ($left) must be >= 1") if $left < 1;
+ if ($right > $seq->length ){
+ $self->throw("Right coordinate ($right) must be less than "
+ . 'sequence length ('. $seq->length .')');
+ }
+
+ # piece together the sequence string of the remaining fragments
+ my $left_seq = $seq->subseq( 1, $left - 1) ;
+ my $right_seq = $seq->subseq( $right + 1, $seq->length);
+ if (!$left_seq || !$right_seq ){
+ $self->throw('could not assemble sequences. At least one of the fragments is empty');
+ }
+ my $seq_str = $left_seq . $right_seq;
+
+ # create the new seq object
+ my $seqclass;
+ if ( $seq->can_call_new ) {
+ $seqclass = ref($seq);
+ } else {
+ $seqclass = 'Bio::PrimarySeq';
+ $self->_attempt_to_load_Seq;
+ }
+ my $product = $seqclass->new(
+ -seq => $seq_str,
+ -display_id => $seq->display_id.
+ -accession_number => $seq->accession_number,
+ -alphabet => $seq->alphabet,
+ -desc => $seq->desc,
+ -verbose => $seq->verbose,
+ -is_circular => $seq->is_circular,
+ );
+
+ # move annotations
+ if ( $product->isa("Bio::AnnotatableI") && $seq->isa("Bio::AnnotatableI") ) {
+ foreach my $key ( $seq->annotation->get_all_annotation_keys ) {
+ foreach my $value ( $seq->annotation->get_Annotations($key) ) {
+ $product->annotation->add_Annotation( $key, $value );
+ }
+ }
+ }
+
+ # move sequence features
+ # or delete them
+ if ( $product->isa('Bio::SeqI') && $seq->isa('Bio::SeqI') ) {
+ for my $feat ( $seq->get_SeqFeatures ) {
+ my $adjfeat = $self->_coord_adjust_deletion( $feat, $left, $right );
+ $product->add_SeqFeature( $adjfeat ) if $adjfeat;
+ }
+ }
+
+ return $product;
+}
+
+=head2 insert
+
+ Title : insert
+ Function: inserts a fragment (a Bio::Seq object) into a nother sequence object
+ adding all annotations and features to the final product.
+ Features that span the insertion site are converted to split
+ features to indicate the disruption.
+ A new feature is added to indicate the inserted fragment itself.
+ A new molecule is created and returned.
+ Usage : # insert a fragment after pos 1000
+ my $insert_seq = Bio::SeqUtils::PbrTools->insert(
+ $recipient_seq,
+ $fragment_seq,
+ 1000
+ );
+ Args : recipient sequence (a Bio::PrimarySeqI compliant object),
+ a fragmetn to insert (Bio::PrimarySeqI compliant object),
+ insertion position (fragment is inserted to the right of this pos)
+ pos=0 will prepend the fragment to the recipient
+ Returns : a new Bio::Seq object
+
+=cut
+
+sub insert{
+ my $self = shift;
+ my ( $recipient, $fragment, $insert_pos) = @_ ;
+ $self->throw( 'was expecting 3 paramters but got '.@_) unless @_ == 3;
+
+ $self->throw( 'Recipient object of class ['
+ . ref($recipient)
+ . '] should be a Bio::PrimarySeqI ' )
+ unless blessed ($recipient) && $recipient->isa('Bio::PrimarySeqI');
+
+ $self->throw( 'Fragment object of class ['
+ . ref($fragment)
+ . '] should be a Bio::PrimarySeqI ' )
+ unless blessed ($fragment) && $fragment->isa('Bio::PrimarySeqI');
+
+ $self->throw( 'Can\'t concatenate sequences with different alphabets: '
+ . 'recipient is '.$recipient->alphabet
+ . ' and fragment is '. $fragment->alphabet )
+ unless $recipient->alphabet eq $fragment->alphabet;
+
+ if ($insert_pos < 0 or $insert_pos > $recipient->length ){
+ $self->throw("insertion position ($insert_pos) must be between 0 and "
+ . 'recipient sequence length ('. $recipient->length .')');
+ }
+
+ if ($fragment->can('is_circular') && $fragment->is_circular){
+ $self->throw('Can\'t insert circular fragments');
+ }
+
+ if ( !$recipient->seq ){
+ $self->throw('Recipient has no sequence, can not insert into this object');
+ }
+
+ # construct raw sequence of the new molecule
+ my $left_seq = $insert_pos > 0
+ ? $recipient->subseq( 1, $insert_pos )
+ : '';
+ my $mid_seq = $fragment->seq;
+ my $right_seq = $insert_pos < $recipient->length
+ ? $recipient->subseq( $insert_pos + 1, $recipient->length)
+ : '';
+ my $seq_str = $left_seq . $mid_seq . $right_seq;
+
+ # create the new seq object with the same class as the recipient
+ my $seqclass;
+ if ( $recipient->can_call_new ) {
+ $seqclass = ref($recipient);
+ } else {
+ $seqclass = 'Bio::Primaryseq';
+ $self->_attempt_to_load_seq;
+ }
+
+ my @desc;
+ push @desc, 'Inserted fragment: '.$fragment->desc if defined $fragment->desc;
+ push @desc, 'Recipient: '.$recipient->desc if defined $recipient->desc;
+ my $product = $seqclass->new(
+ -seq => $seq_str,
+ -display_id => $recipient->display_id,
+ -accession_number => $recipient->accession_number || '',
+ -alphabet => $recipient->alphabet,
+ -desc => join('; ', @desc),
+ -verbose => $recipient->verbose || $fragment->verbose,
+ -is_circular => $recipient->is_circular || 0,
+ );
+
+ # move annotations from recipient and fragment to product
+ if ( $product->isa("Bio::AnnotatableI") ) {
+ foreach ( $recipient, $fragment ) {
+ if ( $_->isa("Bio::AnnotatableI") ) {
+ foreach my $key ( $_->annotation->get_all_annotation_keys ) {
+ foreach my $value ( $_->annotation->get_Annotations($key) ) {
+ $product->annotation->add_Annotation( $key, $value );
+ }
+ }
+ }
+ }
+ }
+
+ # move sequence features to product with adjusted coordinates
+ if ( $product->isa('Bio::SeqI') ) {
+ # for the fragment, just shift the features to new position
+ if ( $fragment->isa('Bio::SeqI') ) {
+ for my $feat ( $fragment->get_SeqFeatures ) {
+ $product->add_SeqFeature( $self->_coord_adjust( $feat, $insert_pos ) );
+ }
+ }
+ # for recipient, shift and modify features according to insertion
+ if ( $recipient->isa('Bio::SeqI') ) {
+ for my $feat ( $recipient->get_SeqFeatures ) {
+ my $adjfeat = $self->_coord_adjust_insertion( $feat, $insert_pos, $fragment->length );
+ $product->add_SeqFeature($adjfeat) if $adjfeat;
+ }
+ }
+ }
+
+ # add a feautre to annotate the insertion
+ my $insertion_feature = Bio::SeqFeature::Generic->new(
+ -start => $insert_pos + 1,
+ -end => $insert_pos + $fragment->length,
+ -primary_tag => 'misc_feature',
+ -tag => {note => 'inserted fragment'},
+ );
+ $product->add_SeqFeature( $insertion_feature );
+
+ return $product;
+}
+
+=head2 ligate
+
+ title : ligate
+ function: pastes a fragment (which can also have features) into a recipient
+ sequence between two "cut" sites, preserving features and adjusting
+ their locations.
+ This is a shortcut for deleting a segment from a sequence object followed
+ by an insertion of a fragmnet and is supposed to be used to simulate
+ in-vitro cloning where a recipient (a vector) is digested and a fragment
+ is then ligated into the recipient molecule. The fragment can be flipped
+ (reverse-complemented with all its features).
+ A new sequence object is returned to represent the product of the reaction.
+ Features and annotations are transferred from the insert to the product
+ and features on the recipient are adjusted according to the methods
+ L</"delete"> amd L</"insert">:
+ Features spanning the insertion site will be split up into two sub-locations.
+ (Sub-)features in the deleted region are themselves deleted.
+ Start/edn positions of (Sub-)features that extend into the deleted region
+ are turned into "fuzzy" positions to indicate that the true start/end was lost.
+ The class of the product object depends on the class of the recipient (vector)
+ sequence object. if it is not possible to instantiate a new
+ object of that class, a Bio::Primaryseq object is created instead.
+ usage : # insert the flipped fragment between positions 1000 and 1100 of the
+ # vector, i.e. everything between these two positions is deleted and
+ # replaced by the fragment
+ my $new_molecule = Bio::Sequtils::Pbrtools->ligate(
+ -vector => $vector,
+ -fragment => $fragment,
+ -left => 1000,
+ -right => 1100,
+ -flip => 1
+ );
+ args : vector: the recipient molecule
+ fragment: molecule that is to be ligated into the vector
+ left: left cut site (fragment will be inserted to the right of
+ this position)
+ optional:
+ right: right cut site (fragment will be inseterted to the
+ left of this position). defaults to left+1
+ flip: boolean, if true, the fragment is reverse-complemented
+ (including features) before inserting
+ returns : a new Bio::Seq object of the ligated fragments
+
+=cut
+
+sub ligate {
+ my $self = shift;
+ my ($recipient, $fragment, $left, $right, $flip) = $self->_rearrange(
+ [qw(RECIPIENT FRAGMENT LEFT RIGHT FLIP)],@_);
+ $self->throw("missing required parameter 'recipient'") unless $recipient;
+ $self->throw("missing required parameter 'fragment'") unless $fragment;
+ $self->throw("missing required parameter 'left'") unless defined $left;
+
+ $right ||= $left + 1;
+
+ $self->throw("Fragment must be a Bio::PrimarySeqI compliant object but it is a ".
+ ref($fragment)) unless blessed($fragment) && $fragment->isa('Bio::PrimarySeqI');
+
+ $fragment = $fragment->revcom_with_features if $flip;
+
+ # clone in two steps: first delete between the insertion sites,
+ # then insert the fragment
+ my ($product1, $product2) ;
+ eval { $product1 = $self->delete($recipient, $left + 1, $right - 1 ) };
+ $self->throw("Failed in step 1 (cut recipient): ".$@) if $@;
+ eval { $product2 = $self->insert( $product1, $fragment, $left ) };
+ $self->throw("Failed in step 2 (insert fragment): ".$@) if $@;
+
+ return $product2;
+
+}
+
+
+=head2 _coord_adjust_deletion
+
+ title : _coord_adjust_deletion
+ function: recursively adjusts coordinates of seqfeatures on a molecule
+ where a segment has been deleted.
+ (sub)features that span the deletion site become split features.
+ (sub)features that extend into the deletion site are truncated
+ and the end that was cut off becomes a fuzzy location.
+ usage : my $adjusted_feature = Bio::Sequtils::_coord_adjust_deletion(
+ $feature,
+ $start,
+ $end
+ );
+ args : a Bio::SeqFeatureI compliant object,
+ start (inclusive) position of the deletion site,
+ end (inclusive) position of the deletion site
+ returns : a Bio::SeqFeatureI compliant object
+
+=cut
+
+sub _coord_adjust_deletion {
+ my ($self, $feat, $left, $right)=@_;
+
+ $self->throw('object [$feat] '. 'of class ['. ref($feat)
+ . '] should be a Bio::SeqFeatureI ')
+ unless $feat->isa('Bio::SeqFeatureI');
+ $self->throw('missing coordinates: need a left and a right position')
+ unless defined $left && defined $right;
+
+ if ($left > $right){
+ if ($feat->can('is_circular') && $feat->is_circular){
+ # todo handle circular molecules
+ $self->throw('can not yet handle deletions in circular molecules if deletion spans origin');
+ } else {
+ $self->throw("left coordinate ($left) must be less than right ($right)"
+ . " but it was greater");
+ }
+ }
+
+ my $deletion = Bio::Location::Simple->new(
+ -start => $left,
+ -end => $right,
+ );
+ my $del_length = $right - $left + 1;
+
+ my @adjsubfeat;
+ for my $subfeat ($feat->remove_SeqFeatures) {
+ my $adjsubfeat = $self->_coord_adjust_deletion($subfeat, $left, $right);
+ push @adjsubfeat, $adjsubfeat if $adjsubfeat;
+ }
+
+ my @loc;
+ for ($feat->location->each_Location) {
+ next if $deletion->contains( $_ ); # this location will be deleted;
+ my $strand = $_->strand;
+ my $type = $_->location_type;
+ my $start = $_->start;
+ my $start_type = $_->can('start_pos_type') ? $_->start_pos_type : undef;
+ my $end = $_->end;
+ my $end_type = $_->can('end_pos_type') ? $_->end_pos_type : undef;
+ my @newcoords=();
+ if ($_->contains( $deletion )){ # split the feature
+ @newcoords = (
+ [ $start, '>'.($deletion->start - 1), $start_type, 'AFTER' ],
+ [ '<'.($deletion->start ), $end - $del_length, 'BEFORE', $end_type ]
+ );
+ } elsif ($_->contains( $deletion->start )){ # truncate feature end
+ @newcoords = ([$start, ($deletion->start - 1), $start_type, 'AFTER' ]);
+ } elsif ($_->contains( $deletion->end )){ # truncate feature start and shift end
+ @newcoords = ([($deletion->start), $end - $del_length, 'BEFORE', $end_type]);
+ } elsif ($start >= $deletion->end){ # just shift entire location
+ @newcoords = ([$start - $del_length, $end - $del_length, $start_type, $end_type ]);
+ } else { # not affected by deletion
+ @newcoords = ( [$start, $end, $start_type, $end_type ]);
+ }
+
+ # if we have no coordinates, we return nothing
+ # the feature is deleted
+ return unless @newcoords;
+
+ my @subloc = $self->_location_objects_from_coordinate_list(
+ \@newcoords,
+ $strand,
+ $type
+ );
+
+ # put together final location which could be a split now
+ push @loc, $self->_single_loc_object_from_collection( @subloc );
+ } # each location
+
+ # create new feature based on original one and move annotation across
+ my $newfeat=Bio::SeqFeature::Generic->new(-primary=>$feat->primary_tag);
+ foreach my $key ( $feat->annotation->get_all_annotation_keys() ) {
+ foreach my $value ( $feat->annotation->get_Annotations($key) ) {
+ $newfeat->annotation->add_Annotation($key, $value);
+ }
+ }
+ foreach my $key ( $feat->get_all_tags() ) {
+ $newfeat->add_tag_value($key, $feat->get_tag_values($key));
+ }
+
+ # set modified location(s) for the new feature and
+ # add its subfeatures if any
+ my $loc = $self->_single_loc_object_from_collection( @loc );
+ $loc ? $newfeat->location( $loc ) : return ;
+ $newfeat->add_SeqFeature($_) for @adjsubfeat;
+
+ return $newfeat;
+
+}
+
+=head2 _coord_adjust_insertion
+
+ title : _coord_adjust_insertion
+ function: recursively adjusts coordinates of seqfeatures on a molecule
+ where another sequence has been inserted.
+ (sub)features that span the insertion site become split features.
+ In contrast to _coord_adjust_deletion, affected feature start/ends
+ are not turned fuzzy because no part of the feature is lost.
+ usage : my $adjusted_feature = Bio::Sequtils::_coord_adjust_insertion(
+ $feature,
+ $insert_pos,
+ $insert_length
+ );
+ args : a Bio::SeqFeatureI compliant object,
+ insertion position (insert to the right of this position)
+ length of inserted fragment
+ returns : a Bio::SeqFeatureI compliant object
+
+=cut
+
+sub _coord_adjust_insertion {
+ my ($self, $feat, $insert_pos, $insert_len)=@_;
+
+ $self->throw('object [$feat] '. 'of class ['. ref($feat)
+ . '] should be a Bio::SeqFeatureI ')
+ unless $feat->isa('Bio::SeqFeatureI');
+ $self->throw('missing insert position') unless defined $insert_pos;
+ $self->throw('missing insert length') unless defined $insert_len;
+
+ my @adjsubfeat;
+ for my $subfeat ($feat->remove_SeqFeatures) {
+ push @adjsubfeat, $self->_coord_adjust_insertion($subfeat, $insert_pos, $insert_len);
+ }
+
+ my @loc;
+ for ($feat->location->each_Location) {
+ my $strand = $_->strand;
+ my $type = $_->location_type;
+ my $start = $_->start;
+ my $start_type = $_->can('start_pos_type') ? $_->start_pos_type : undef;
+ my $end = $_->end;
+ my $end_type = $_->can('end_pos_type') ? $_->end_pos_type : undef;
+ my @newcoords=();
+ if ($start <= $insert_pos && $end > $insert_pos){ # split the feature
+ @newcoords = (
+ [ $start, $insert_pos, $start_type, $end_type ],
+ [ ($insert_pos + $insert_len ), $end + $insert_len, $start_type, $end_type ]
+ );
+ } elsif ($start > $insert_pos){ # just shift entire location
+ @newcoords = ([$start + $insert_len, $end + $insert_len, $start_type, $end_type ]);
+ } else { # not affected
+ @newcoords = ( [$start, $end, $start_type, $end_type ]);
+ }
+
+ my @subloc = $self->_location_objects_from_coordinate_list(
+ \@newcoords,
+ $strand,
+ $type
+ );
+ # put together final location which could be a split now
+ push @loc, $self->_single_loc_object_from_collection( @subloc );
+ } # each location
+
+ # create new feature based on original one and move annotation across
+ my $newfeat=Bio::SeqFeature::Generic->new(-primary=>$feat->primary_tag);
+ foreach my $key ( $feat->annotation->get_all_annotation_keys() ) {
+ foreach my $value ( $feat->annotation->get_Annotations($key) ) {
+ $newfeat->annotation->add_Annotation($key, $value);
+ }
+ }
+ foreach my $key ( $feat->get_all_tags() ) {
+ $newfeat->add_tag_value($key, $feat->get_tag_values($key));
+ }
+
+ # set modified location(s) for the new feature and
+ # add its subfeatures if any
+ $newfeat->location( $self->_single_loc_object_from_collection( @loc ) ) ;
+ $newfeat->add_SeqFeature($_) for @adjsubfeat;
+
+ return $newfeat;
+
+}
+
+=head2 _single_loc_object_from_collection
+
+ Title : _single_loc_object_from_collection
+ Function: takes an array of location objects. Returns either a split
+ location object if there are more than one locations in the
+ array or returns the single location if there is only one
+ Usage : my $loc = _single_loc_object_from_collection( @sublocs );
+ Args : array of Bio::Location objects
+ Returns : a single Bio:;Location object containing all locations
+
+=cut
+
+sub _single_loc_object_from_collection {
+ my ($self, @locs) = @_;
+ my $loc;
+ if (@locs > 1){
+ $loc = Bio::Location::Split->new;
+ $loc->add_sub_Location( @locs);
+ } elsif (@locs == 1) {
+ $loc = shift @locs;
+ }
+ return $loc;
+} # _single_loc_object_from_collection
+
+=head2 _location_objects_from_coordinate_list
+
+ Title : _location_objects_from_coordinate_list
+ Function: takes an array-ref of start/end coordinates, a strand and a
+ type and returns a list of Bio::Location objects (Fuzzy by
+ default, Simple in case of in-between coordinates).
+ If location type is not "IN-BETWEEN", individual types may be
+ passed in for start and end location as per Bio::Location::Fuzzy
+ documentation.
+ Usage : my @loc_objs = $self->_location_objects_from_coordinate_list(
+ \@coords,
+ $strand,
+ $type
+ );
+ Args : array-ref of array-refs each containing:
+ start, end [, start-type, end-type]
+ where types are optional. If given, must be
+ a one of ('BEFORE', 'AFTER', 'EXACT','WITHIN', 'BETWEEN')
+ strand (all locations must be on same strand)
+ location-type (EXACT, IN-BETWEEN etc)
+ Returns : list of Bio::Location objects
+
+=cut
+
+sub _location_objects_from_coordinate_list {
+ my $self = shift;
+ my ($coords_ref, $strand, $type) = @_;
+ $self->throw('expected 3 parameters but got '.@_) unless @_ == 3;
+ $self->throw('first argument must be an ARRAY reference#')
+ unless ref($coords_ref) eq 'ARRAY';
+
+ my @loc;
+ foreach my $coords_set (@$coords_ref){
+ my ($start, $end, $start_type, $end_type ) = @$coords_set;
+ # taken from Bio::SeqUtils::_coord_adjust
+ unless ( $type eq 'IN-BETWEEN' ) {
+ my $loc = Bio::Location::Fuzzy->new(
+ -start => $start,
+ -end => $end,
+ -strand => $strand,
+ -location_type => $type
+ );
+ $loc->start_pos_type( $start_type ) if $start_type;
+ $loc->end_pos_type( $end_type ) if $end_type;
+ push @loc, $loc;
+ }
+ else {
+ push @loc,
+ Bio::Location::Simple->new(
+ -start => $start,
+ -end => $end,
+ -strand => $strand,
+ -location_type => $type
+ );
+ }
+ } # each coords_set
+ return @loc;
+} # _location_objects_from_coordinate_list
+
=head2 _coord_adjust

0 comments on commit f489238

Please sign in to comment.