Browse files

Added the functions 'complement' and 'reverse_complement' to Bio.Seq'…

…s Seq and

MutableSeq objects. Similar functions previous existed in various locations in
BioPython:
- forward_complement, reverse_complement in Bio.GFF.easy
- complement, antiparallel in Bio.SeqUtils
These functions have now been deprecated, and will generate a DeprecationWarning
when used.
The functions complement and reverse_complement, when applied to a Seq object,
will return a new Seq object. The same function applied to a MutableSeq object
will modify the MutableSeq object itself, and don't return anything.
  • Loading branch information...
1 parent 9cf3372 commit 34b4f31ed222645fe8df7727bdcdcf9b44f8f358 mdehoon committed Aug 14, 2004
Showing with 131 additions and 20 deletions.
  1. +19 −0 Bio/Data/IUPACData.py
  2. +20 −3 Bio/GFF/easy.py
  3. +55 −0 Bio/Seq.py
  4. +26 −12 Bio/SeqUtils/__init__.py
  5. +11 −5 DEPRECATED
View
19 Bio/Data/IUPACData.py
@@ -72,6 +72,25 @@
"N": "N",
}
+ambiguous_rna_complement = {
+ "A": "U",
+ "C": "G",
+ "G": "C",
+ "U": "A",
+ "M": "K",
+ "R": "Y",
+ "W": "W",
+ "S": "S",
+ "Y": "R",
+ "K": "M",
+ "V": "B",
+ "H": "D",
+ "D": "H",
+ "B": "V",
+ "X": "X",
+ "N": "N",
+ }
+
def _make_ranges(dict):
d = {}
View
23 Bio/GFF/easy.py
@@ -11,7 +11,7 @@
from __future__ import generators # requires Python 2.2
-__version__ = "$Revision: 1.3 $"
+__version__ = "$Revision: 1.4 $"
# $Source: /home/bartek/cvs2bzr/biopython_fastimport/cvs_repo/biopython/Bio/GFF/easy.py,v $
import copy
@@ -568,10 +568,15 @@ def record_coords(record, start, end, strand=0, upper=0):
subseq = record_sequence(record)[start:end]
subseq_str = subseq.tostring()
subseq_str = subseq_str.upper()
+ subseq = Seq(subseq_str, subseq.alphabet)
if strand == '-' or strand == 1:
- return Seq(SeqUtils.antiparallel(subseq_str), subseq.alphabet)
+ return subseq.reverse_complement()
else:
- return Seq(subseq_str, subseq.alphabet)
+ return subseq
+
+### The following section was replaced by the functions complement,
+### forward_complement in Bio.Seq. The code here can be removed eventually
+### (using deprecation warnings for now).
def forward_complement(seq):
"""
@@ -584,6 +589,10 @@ def forward_complement(seq):
>>> forward_complement(Seq('aaauuuc'))
Seq('UUUAAAG', Alphabet())
"""
+ import warnings
+ warnings.warn(
+ "forward_complement in Bio.GFF is deprecated; please use complement in Bio.Seq instead",
+ category=DeprecationWarning)
return _complement(seq, reverse=0)
def _forward_complement_list_with_table(table, seq):
@@ -625,8 +634,16 @@ def reverse_complement(seq):
>>> reverse_complement(Seq('aaatttc'))
Seq('GAAATTT', Alphabet())
"""
+ import warnings
+ warnings.warn(
+ "reverse_complement in Bio.GFF is deprecated; please use reverse_complement in Bio.Seq instead",
+ category=DeprecationWarning)
return _complement(seq, reverse=1)
+###
+### End section to be removed.
+###
+
class TempFastaWriter(FastaWriter):
def __init__(self, *args, **keywds):
self.file = GenericTools.TempFile()
View
55 Bio/Seq.py
@@ -1,6 +1,8 @@
import string, array
import Alphabet
+from Data.IUPACData import ambiguous_dna_complement, ambiguous_rna_complement
+
class Seq:
def __init__(self, data, alphabet = Alphabet.generic_alphabet):
@@ -62,6 +64,45 @@ def tomutable(self): # Needed? Or use a function?
def count(self, item):
return len([x for x in self.data if x == item])
+ def complement(self):
+ """Returns the complement sequence.
+ """
+ if seq.alphabet in (IUPAC.ambiguous_dna, IUPAC.unambiguous_dna):
+ d = ambiguous_dna_complement
+ elif seq.alphabet in (IUPAC.ambiguous_rna, IUPAC.unambiguous_rna):
+ d = ambiguous_rna_complement
+ elif 'U' in self.data:
+ d = ambiguous_rna_complement
+ else:
+ d = ambiguous_dna_complement
+ before = ''.join(d.keys())
+ after = ''.join(d.values())
+ ttable = maketrans(before, after)
+ #Much faster on really long sequences than the previous loop based one.
+ #thx to Michael Palmer, University of Waterloo
+ s = self.data.translate(ttable)
+ return Seq(s, self.alphabet)
+
+ def reverse_complement(self):
+ """Returns the reverse complement sequence.
+ """
+ if seq.alphabet in (IUPAC.ambiguous_dna, IUPAC.unambiguous_dna):
+ d = ambiguous_dna_complement
+ elif seq.alphabet in (IUPAC.ambiguous_rna, IUPAC.unambiguous_rna):
+ d = ambiguous_rna_complement
+ elif 'U' in self.data:
+ d = ambiguous_rna_complement
+ else:
+ d = ambiguous_dna_complement
+ before = ''.join(d.keys())
+ after = ''.join(d.values())
+ ttable = maketrans(before, after)
+ #Much faster on really long sequences than the previous loop based one.
+ #thx to Michael Palmer, University of Waterloo
+ s = self.data[-1::-1].translate(ttable)
+ return Seq(s, self.alphabet)
+
+
class MutableSeq:
def __init__(self, data, alphabet = Alphabet.generic_alphabet):
if type(data) == type(""):
@@ -158,6 +199,20 @@ def index(self, item):
raise ValueError, "MutableSeq.index(x): x not in list"
def reverse(self):
self.data.reverse()
+ def complement(self):
+ if seq.alphabet in (IUPAC.ambiguous_dna, IUPAC.unambiguous_dna):
+ d = ambiguous_dna_complement
+ elif seq.alphabet in (IUPAC.ambiguous_rna, IUPAC.unambiguous_rna):
+ d = ambiguous_rna_complement
+ elif 'U' in self.data:
+ d = ambiguous_rna_complement
+ else:
+ d = ambiguous_dna_complement
+ self.data = map(lambda c: d[c], self.data)
+ self.data = array.array('c', self.data)
+ def reverse_complement(self):
+ self.complement()
+ self.data.reverse()
## Sorting a sequence makes no sense.
# def sort(self, *args): self.data.sort(*args)
View
38 Bio/SeqUtils/__init__.py
@@ -16,40 +16,54 @@
from Bio.Alphabet import IUPAC
from Bio.Data import IUPACData, CodonTable
-######################################
-# DNA
-######################
-# {{{
+### The following section was replaced by the functions complement,
+### forward_complement in Bio.Seq. The code here can be removed eventually
+### (using deprecation warnings for now).
_before = ''.join(IUPACData.ambiguous_dna_complement.keys())
_after = ''.join(IUPACData.ambiguous_dna_complement.values())
_ttable = maketrans(_before, _after)
def complement(seq):
"""Returns the complementary sequence (NOT antiparallel).
-
+
This works on string sequences, not on Bio.Seq objects.
"""
#Much faster on really long sequences than the previous loop based one.
#thx to Michael Palmer, University of Waterloo
+ import warnings
+ warnings.warn(
+ "complement in Bio.SeqUtils is deprecated; please use complement in Bio.Seq instead",
+ category=DeprecationWarning)
return seq.translate(_ttable)
-def reverse(seq):
- """Reverse the sequence. Works on string sequences.
- """
- r = map(None, seq)
- r.reverse()
- return ''.join(r)
-
def antiparallel(seq):
"""returns reversed complementary sequence ( = other strand )
This works on string sequences, not on Bio.Seq objects.
"""
+ import warnings
+ warnings.warn(
+ "antiparallel in Bio.SeqUtils is deprecated; please use reverse_complement in Bio.Seq instead",
+ category=DeprecationWarning)
s = complement(seq)
s = reverse(s)
return s
+### End section to be removed.
+
+######################################
+# DNA
+######################
+# {{{
+
+def reverse(seq):
+ """Reverse the sequence. Works on string sequences.
+ """
+ r = map(None, seq)
+ r.reverse()
+ return ''.join(r)
+
def GC(seq):
""" calculates G+C content """
# 19/8/03: Iddo: added provision for lowercase
View
16 DEPRECATED
@@ -2,11 +2,17 @@ This file provides documentation for modules in Biopython that have been moved
or deprecated in favor of other modules. This provides some quick and easy
to find documentation about how to update your code to work again.
-Bio.Blast.NCBIWWW.blast
-=======================
-Deprecated as of Release 1.31.
-Use Bio.Blast.NCBIWWW.qblast instead. The web blast interface is very
-unstable, and qblast should be more reliable.
+Bio.SeqUtils
+============
+The functions 'complement' and 'antiparallel' in Bio.SeqUtils have been
+deprecated as of Release 1.31. Use the functions 'complement' and
+'reverse_complement' in Bio.Seq instead.
+
+Bio.GFF
+=======
+The functions 'forward_complement' and 'antiparallel' in Bio.GFF.easy have been
+deprecated as of Release 1.31. Use the functions 'complement' and
+'reverse_complement' in Bio.Seq instead.
Bio.sequtils
============

0 comments on commit 34b4f31

Please sign in to comment.