Skip to content

Commit

Permalink
Adding simple string like startswith and endswith methods to the Seq …
Browse files Browse the repository at this point in the history
…object (Bug 2809)
  • Loading branch information
peterc committed Apr 16, 2009
1 parent e80c1fb commit 49096ec
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 2 deletions.
63 changes: 61 additions & 2 deletions Bio/Seq.py
Expand Up @@ -323,7 +323,7 @@ def rfind(self, sub, start=0, end=sys.maxint):
Returns -1 if the subsequence is NOT found.
e.g. Locating the last typical start codon, AUG, in an RNA sequence:
>>> from Bio.Seq import Seq
>>> my_rna = Seq("GUCAUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAGUUG")
>>> my_rna.rfind("AUG")
Expand All @@ -333,6 +333,65 @@ def rfind(self, sub, start=0, end=sys.maxint):
sub_str = self._get_seq_str_and_check_alphabet(sub)
return str(self).rfind(sub_str, start, end)

def startswith(self, prefix, start=0, end=sys.maxint) :
"""Does the Seq start with the given prefix? Returns True/False.
This behaves like the python string method of the same name.
Return True if the sequence starts with the specified prefix
(a string or another Seq object), False otherwise.
With optional start, test sequence beginning at that position.
With optional end, stop comparing sequence at that position.
prefix can also be a tuple of strings to try. e.g.
>>> from Bio.Seq import Seq
>>> my_rna = Seq("GUCAUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAGUUG")
>>> my_rna.startswith("GUC")
True
>>> my_rna.startswith("AUG")
False
>>> my_rna.startswith("AUG", 3)
True
>>> my_rna.startswith(("UCC","UCA","UCG"),1)
True
"""
#If it has one, check the alphabet:
if isinstance(prefix, tuple) :
prefix_str = tuple(self._get_seq_str_and_check_alphabet(p)
for p in prefix)
else :
prefix_str = self._get_seq_str_and_check_alphabet(prefix)
return str(self).startswith(prefix_str, start, end)

def endswith(self, suffix, start=0, end=sys.maxint) :
"""Does the Seq end with the given suffix? Returns True/False.
This behaves like the python string method of the same name.
Return True if the sequence ends with the specified suffix
(a string or another Seq object), False otherwise.
With optional start, test sequence beginning at that position.
With optional end, stop comparing sequence at that position.
suffix can also be a tuple of strings to try. e.g.
>>> from Bio.Seq import Seq
>>> my_rna = Seq("GUCAUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAGUUG")
>>> my_rna.endswith("UUG")
True
>>> my_rna.endswith("AUG")
False
>>> my_rna.endswith("AUG", 0, 18)
True
"""
#If it has one, check the alphabet:
if isinstance(suffix, tuple) :
suffix_str = tuple(self._get_seq_str_and_check_alphabet(s)
for s in suffix)
else :
suffix_str = self._get_seq_str_and_check_alphabet(suffix)
return str(self).endswith(suffix_str, start, end)


def split(self, sep=None, maxsplit=-1) :
"""Split method, like that of a python string.
Expand Down Expand Up @@ -756,7 +815,7 @@ class UnknownSeq(Seq):
>>> unk_four + unk_five
UnknownSeq(9, alphabet = Alphabet(), character = '?')
If the alphabet to characters don't match up, the addition gives an
If the alphabet or characters don't match up, the addition gives an
ordinary Seq object:
>>> unk_nnnn = UnknownSeq(4, character = "N")
Expand Down
42 changes: 42 additions & 0 deletions Tests/test_Seq_objs.py
Expand Up @@ -131,6 +131,48 @@ def test_rfind(self) :
"""Check matches the python string rfind method."""
self._test_method("rfind", start_end=True)

def test_startswith(self) :
"""Check matches the python string startswith method."""
self._test_method("startswith", start_end=True)
#Now check with a tuple of sub sequences
for example1 in self._examples :
if not hasattr(example1, "startswith") :
#e.g. MutableSeq does not support this
continue
subs = tuple([example1[start:start+2] for start \
in range(0, len(example1)-2,3)])
subs_str = tuple([str(s) for s in subs])

self.assertEqual(str(example1).startswith(subs_str),
example1.startswith(subs))
self.assertEqual(str(example1).startswith(subs_str),
example1.startswith(subs_str)) #strings!
self.assertEqual(str(example1).startswith(subs_str,3),
example1.startswith(subs,3))
self.assertEqual(str(example1).startswith(subs_str,2,6),
example1.startswith(subs,2,6))

def test_endswith(self) :
"""Check matches the python string endswith method."""
self._test_method("endswith", start_end=True)
#Now check with a tuple of sub sequences
for example1 in self._examples :
if not hasattr(example1, "endswith") :
#e.g. MutableSeq does not support this
continue
subs = tuple([example1[start:start+2] for start \
in range(0, len(example1)-2,3)])
subs_str = tuple([str(s) for s in subs])

self.assertEqual(str(example1).endswith(subs_str),
example1.endswith(subs))
self.assertEqual(str(example1).startswith(subs_str),
example1.startswith(subs_str)) #strings!
self.assertEqual(str(example1).endswith(subs_str,3),
example1.endswith(subs,3))
self.assertEqual(str(example1).endswith(subs_str,2,6),
example1.endswith(subs,2,6))

def test_strip(self) :
"""Check matches the python string strip method."""
self._test_method("strip", pre_comp_function=str)
Expand Down

0 comments on commit 49096ec

Please sign in to comment.