Skip to content

Commit

Permalink
Added the functions 'complement' and 'reverse_complement' to Bio.Seq'…
Browse files Browse the repository at this point in the history
…s Seq and

MutableSeq objects. Similar functions previous existed in various locations in
BioPython:
- forward_complement, reverse_complement in Bio.GFF.easy
- complement, antiparallel in Bio.SeqUtils
These functions have now been deprecated, and will generate a DeprecationWarning
when used.
The functions complement and reverse_complement, when applied to a Seq object,
will return a new Seq object. The same function applied to a MutableSeq object
will modify the MutableSeq object itself, and don't return anything.
  • Loading branch information
mdehoon committed Aug 14, 2004
1 parent 9cf3372 commit 34b4f31
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 20 deletions.
19 changes: 19 additions & 0 deletions Bio/Data/IUPACData.py
Expand Up @@ -72,6 +72,25 @@
"N": "N",
}

ambiguous_rna_complement = {
"A": "U",
"C": "G",
"G": "C",
"U": "A",
"M": "K",
"R": "Y",
"W": "W",
"S": "S",
"Y": "R",
"K": "M",
"V": "B",
"H": "D",
"D": "H",
"B": "V",
"X": "X",
"N": "N",
}


def _make_ranges(dict):
d = {}
Expand Down
23 changes: 20 additions & 3 deletions Bio/GFF/easy.py
Expand Up @@ -11,7 +11,7 @@

from __future__ import generators # requires Python 2.2

__version__ = "$Revision: 1.3 $"
__version__ = "$Revision: 1.4 $"
# $Source: /home/bartek/cvs2bzr/biopython_fastimport/cvs_repo/biopython/Bio/GFF/easy.py,v $

import copy
Expand Down Expand Up @@ -568,10 +568,15 @@ def record_coords(record, start, end, strand=0, upper=0):
subseq = record_sequence(record)[start:end]
subseq_str = subseq.tostring()
subseq_str = subseq_str.upper()
subseq = Seq(subseq_str, subseq.alphabet)
if strand == '-' or strand == 1:
return Seq(SeqUtils.antiparallel(subseq_str), subseq.alphabet)
return subseq.reverse_complement()
else:
return Seq(subseq_str, subseq.alphabet)
return subseq

### The following section was replaced by the functions complement,
### forward_complement in Bio.Seq. The code here can be removed eventually
### (using deprecation warnings for now).

def forward_complement(seq):
"""
Expand All @@ -584,6 +589,10 @@ def forward_complement(seq):
>>> forward_complement(Seq('aaauuuc'))
Seq('UUUAAAG', Alphabet())
"""
import warnings
warnings.warn(
"forward_complement in Bio.GFF is deprecated; please use complement in Bio.Seq instead",
category=DeprecationWarning)
return _complement(seq, reverse=0)

def _forward_complement_list_with_table(table, seq):
Expand Down Expand Up @@ -625,8 +634,16 @@ def reverse_complement(seq):
>>> reverse_complement(Seq('aaatttc'))
Seq('GAAATTT', Alphabet())
"""
import warnings
warnings.warn(
"reverse_complement in Bio.GFF is deprecated; please use reverse_complement in Bio.Seq instead",
category=DeprecationWarning)
return _complement(seq, reverse=1)

###
### End section to be removed.
###

class TempFastaWriter(FastaWriter):
def __init__(self, *args, **keywds):
self.file = GenericTools.TempFile()
Expand Down
55 changes: 55 additions & 0 deletions Bio/Seq.py
@@ -1,6 +1,8 @@
import string, array

import Alphabet
from Data.IUPACData import ambiguous_dna_complement, ambiguous_rna_complement


class Seq:
def __init__(self, data, alphabet = Alphabet.generic_alphabet):
Expand Down Expand Up @@ -62,6 +64,45 @@ def tomutable(self): # Needed? Or use a function?
def count(self, item):
return len([x for x in self.data if x == item])

def complement(self):
"""Returns the complement sequence.
"""
if seq.alphabet in (IUPAC.ambiguous_dna, IUPAC.unambiguous_dna):
d = ambiguous_dna_complement
elif seq.alphabet in (IUPAC.ambiguous_rna, IUPAC.unambiguous_rna):
d = ambiguous_rna_complement
elif 'U' in self.data:
d = ambiguous_rna_complement
else:
d = ambiguous_dna_complement
before = ''.join(d.keys())
after = ''.join(d.values())
ttable = maketrans(before, after)
#Much faster on really long sequences than the previous loop based one.
#thx to Michael Palmer, University of Waterloo
s = self.data.translate(ttable)
return Seq(s, self.alphabet)

def reverse_complement(self):
"""Returns the reverse complement sequence.
"""
if seq.alphabet in (IUPAC.ambiguous_dna, IUPAC.unambiguous_dna):
d = ambiguous_dna_complement
elif seq.alphabet in (IUPAC.ambiguous_rna, IUPAC.unambiguous_rna):
d = ambiguous_rna_complement
elif 'U' in self.data:
d = ambiguous_rna_complement
else:
d = ambiguous_dna_complement
before = ''.join(d.keys())
after = ''.join(d.values())
ttable = maketrans(before, after)
#Much faster on really long sequences than the previous loop based one.
#thx to Michael Palmer, University of Waterloo
s = self.data[-1::-1].translate(ttable)
return Seq(s, self.alphabet)


class MutableSeq:
def __init__(self, data, alphabet = Alphabet.generic_alphabet):
if type(data) == type(""):
Expand Down Expand Up @@ -158,6 +199,20 @@ def index(self, item):
raise ValueError, "MutableSeq.index(x): x not in list"
def reverse(self):
self.data.reverse()
def complement(self):
if seq.alphabet in (IUPAC.ambiguous_dna, IUPAC.unambiguous_dna):
d = ambiguous_dna_complement
elif seq.alphabet in (IUPAC.ambiguous_rna, IUPAC.unambiguous_rna):
d = ambiguous_rna_complement
elif 'U' in self.data:
d = ambiguous_rna_complement
else:
d = ambiguous_dna_complement
self.data = map(lambda c: d[c], self.data)
self.data = array.array('c', self.data)
def reverse_complement(self):
self.complement()
self.data.reverse()

## Sorting a sequence makes no sense.
# def sort(self, *args): self.data.sort(*args)
Expand Down
38 changes: 26 additions & 12 deletions Bio/SeqUtils/__init__.py
Expand Up @@ -16,40 +16,54 @@
from Bio.Alphabet import IUPAC
from Bio.Data import IUPACData, CodonTable

######################################
# DNA
######################
# {{{
### The following section was replaced by the functions complement,
### forward_complement in Bio.Seq. The code here can be removed eventually
### (using deprecation warnings for now).

_before = ''.join(IUPACData.ambiguous_dna_complement.keys())
_after = ''.join(IUPACData.ambiguous_dna_complement.values())
_ttable = maketrans(_before, _after)

def complement(seq):
"""Returns the complementary sequence (NOT antiparallel).
This works on string sequences, not on Bio.Seq objects.
"""
#Much faster on really long sequences than the previous loop based one.
#thx to Michael Palmer, University of Waterloo
import warnings
warnings.warn(
"complement in Bio.SeqUtils is deprecated; please use complement in Bio.Seq instead",
category=DeprecationWarning)
return seq.translate(_ttable)

def reverse(seq):
"""Reverse the sequence. Works on string sequences.
"""
r = map(None, seq)
r.reverse()
return ''.join(r)

def antiparallel(seq):
"""returns reversed complementary sequence ( = other strand )
This works on string sequences, not on Bio.Seq objects.
"""
import warnings
warnings.warn(
"antiparallel in Bio.SeqUtils is deprecated; please use reverse_complement in Bio.Seq instead",
category=DeprecationWarning)
s = complement(seq)
s = reverse(s)
return s

### End section to be removed.

######################################
# DNA
######################
# {{{

def reverse(seq):
"""Reverse the sequence. Works on string sequences.
"""
r = map(None, seq)
r.reverse()
return ''.join(r)

def GC(seq):
""" calculates G+C content """
# 19/8/03: Iddo: added provision for lowercase
Expand Down
16 changes: 11 additions & 5 deletions DEPRECATED
Expand Up @@ -2,11 +2,17 @@ This file provides documentation for modules in Biopython that have been moved
or deprecated in favor of other modules. This provides some quick and easy
to find documentation about how to update your code to work again.

Bio.Blast.NCBIWWW.blast
=======================
Deprecated as of Release 1.31.
Use Bio.Blast.NCBIWWW.qblast instead. The web blast interface is very
unstable, and qblast should be more reliable.
Bio.SeqUtils
============
The functions 'complement' and 'antiparallel' in Bio.SeqUtils have been
deprecated as of Release 1.31. Use the functions 'complement' and
'reverse_complement' in Bio.Seq instead.

Bio.GFF
=======
The functions 'forward_complement' and 'antiparallel' in Bio.GFF.easy have been
deprecated as of Release 1.31. Use the functions 'complement' and
'reverse_complement' in Bio.Seq instead.

Bio.sequtils
============
Expand Down

0 comments on commit 34b4f31

Please sign in to comment.