Skip to content
Browse files

allow additional nuc IUPAC when guessing, this was borking MAKER base…

…d on assembly consensus; single test case where this failed is ambiguous, making an executive decision to modify that result
  • Loading branch information...
1 parent 9dc87e5 commit 4143bd3510ac6a6d9731301d7351fcd65f5b048a Chris Fields committed Nov 25, 2012
Showing with 14 additions and 13 deletions.
  1. +9 −10 Bio/PrimarySeq.pm
  2. +5 −3 t/Seq/PrimarySeq.t
View
19 Bio/PrimarySeq.pm
@@ -1,7 +1,7 @@
#
# bioperl module for Bio::PrimarySeq
#
-# Please direct questions and support issues to <bioperl-l@bioperl.org>
+# Please direct questions and support issues to <bioperl-l@bioperl.org>
#
# Cared for by Ewan Birney <birney@ebi.ac.uk>
#
@@ -91,15 +91,15 @@ of the Bioperl mailing lists. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bioperl.org/wiki/Mailing_lists - About the mailing lists
-=head2 Support
+=head2 Support
Please direct usage questions or support issues to the mailing list:
I<bioperl-l@bioperl.org>
-rather than to the module maintainer directly. Many experienced and
-reponsive experts will be able look at the problem and quickly
-address it. Please include a thorough description of the problem
+rather than to the module maintainer directly. Many experienced and
+reponsive experts will be able look at the problem and quickly
+address it. Please include a thorough description of the problem
with code and data examples if at all possible.
=head2 Reporting Bugs
@@ -268,7 +268,7 @@ sub seq {
my ($seq_str, $alphabet) = @args;
if (@args) {
- $self->_set_seq_by_ref(\$seq_str, $alphabet);
+ $self->_set_seq_by_ref(\$seq_str, $alphabet);
}
return $self->{'seq'};
@@ -352,7 +352,7 @@ sub validate_seq {
$substring = $seqobj->subseq($location_obj);
$substring = $seqobj->subseq($location_obj, -nogap => 1);
Function: Return the subseq from start to end, where the first sequence
- character has coordinate 1 number is inclusive, ie 1-2 are the
+ character has coordinate 1 number is inclusive, ie 1-2 are the
first two characters of the sequence. The given start coordinate
has to be larger than the end, even if the sequence is circular.
Returns : a string
@@ -373,7 +373,7 @@ sub subseq {
END
NOGAP
REPLACE_WITH)], @args);
-
+
# If -replace_with is specified, validate the replacement sequence
if (defined $replace) {
$self->validate_seq( $replace ) ||
@@ -919,7 +919,7 @@ sub _guess_alphabet_from_string {
# protein sequence can contain at this stage. Make our best guess
# based on sequence composition. If it contains over 70% of ACGTUN,
# it is likely nucleic.
- if( ($str =~ tr/ATUGCNatugcn//) / $total > 0.7 ) {
+ if( ($str =~ tr/ATUGCNWSKMatugcnwskm//) / $total > 0.7 ) {
if ( $str =~ m/U/i ) {
$alphabet = 'rna';
} else {
@@ -948,4 +948,3 @@ sub accession {
}
1;
-
View
8 t/Seq/PrimarySeq.t
@@ -7,7 +7,7 @@ use Data::Dumper;
BEGIN {
use lib '.';
use Bio::Root::Test;
- test_begin( -tests => 179 );
+ test_begin( -tests => 181 );
use_ok('Bio::PrimarySeq');
use_ok('Bio::Location::Simple');
@@ -298,12 +298,14 @@ ok $seq->seq('AAACTYAAAAGAATTGRCGG'); # valid degenerate DNA PCR primer sequence
is $seq->alphabet, 'dna';
ok $seq->seq('AAACTYAAAKGAATTGRCGG'); # another primer previously detected as protein (85% ACGTN)
is $seq->alphabet, 'dna';
-ok $seq->seq('YWACTYAAAKGARTTGRCGG'); # 70% ACGTN. Everything <= 70% is considered a protein
-is $seq->alphabet, 'protein';
+ok $seq->seq('YWACTYAAAKGARTTGRCGG'); # 70% ACGTNWSRM. Everything <= 70% is considered a protein
+is $seq->alphabet, 'dna';
ok $seq->seq('XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'); # Bug 2438
is $seq->alphabet, 'protein', 'Bug 2438';
ok $seq->seq('CAGTCXXXXXXXXXXXXXXXXXXXXXXXXXXXCAGCG');
is $seq->alphabet, 'protein';
+ok $seq->seq('WTGGGGCTATGAAAAAAAAAWTTKMGMMAAAAAWTTWTKRWMRATC'); # showed up on MAKER list
+is $seq->alphabet, 'dna';
ok $seq->seq('actgn', 'protein'); # accept specified alphabet, no matter what
is $seq->alphabet, 'protein';

0 comments on commit 4143bd3

Please sign in to comment.
Something went wrong with that request. Please try again.