Skip to content

Commit

Permalink
merged Andrew's Seq package with the tree
Browse files Browse the repository at this point in the history
added MANIFEST.in and setup.py for distutils
  • Loading branch information
jchang committed Apr 30, 2000
1 parent 18dbba1 commit f726249
Show file tree
Hide file tree
Showing 26 changed files with 2,310 additions and 6 deletions.
4 changes: 4 additions & 0 deletions AUTHORS
@@ -0,0 +1,4 @@
Jeffrey Chang <jchang@smi.stanford.edu>
Andrew Dalke <dalke@acm.org>
Katharine Lindner <katel@worldpath.net>

103 changes: 103 additions & 0 deletions Bio/Alphabet/IUPAC.py
@@ -0,0 +1,103 @@
# Define the IUPAC Alphabets you know and love

from Bio import Alphabet
from Bio.Data import IUPACData

##################### Protein

# From the IUPAC definition at:
# http://www.chem.qmw.ac.uk/iupac/AminoAcid/A2021.html#AA21
class IUPACProtein(Alphabet.ProteinAlphabet):
letters = IUPACData.protein_letters

protein = IUPACProtein()

# This could be considered the base class for the standard IUPAC
# protein, except that some encodings will use "X" to mean "unknown
# character", which causes a collision. If you use X for
# selenocysteines, then you'll need a new alphabet.

class ExtendedIUPACProtein(Alphabet.ProteinAlphabet):
letters = IUPACData.extended_protein_letters
# B = "Asx"; aspartic acid or asparagine
# X = "Sec"; selenocysteine
# Z = "Glx"; glutamic acid or glutamine (or substances such as
# 4-carboxyglutamic acid and 5-oxoproline that yield glutamic
# acid on acid hydrolysis of peptides)

extended_protein = ExtendedIUPACProtein()

##################### DNA

# The next two are the IUPAC definitions, from:
# http://www.chem.qmw.ac.uk/iubmb/misc/naseq.html
class IUPACAmbiguousDNA(Alphabet.DNAAlphabet):
letters = IUPACData.ambiguous_dna_letters

ambiguous_dna = IUPACAmbiguousDNA()

class IUPACUnambiguousDNA(IUPACAmbiguousDNA):
letters = IUPACData.unambiguous_dna_letters

unambiguous_dna = IUPACUnambiguousDNA()


# Also from the URL, but not part of the standard
class ExtendedIUPACDNA(Alphabet.DNAAlphabet):
letters = IUPACData.extended_dna_letters
# B == 5-bromouridine
# D == 5,6-dihydrouridine
# S == thiouridine
# W == wyosine

extended_dna = ExtendedIUPACDNA()

##################### RNA

class IUPACAmbiguousRNA(Alphabet.RNAAlphabet):
letters = IUPACData.ambiguous_rna_letters

ambiguous_rna = IUPACAmbiguousRNA()

class IUPACUnambiguousRNA(IUPACAmbiguousRNA):
letters = IUPACData.unambiguous_rna_letters

unambiguous_rna = IUPACUnambiguousRNA()

# are there extended forms?
#class ExtendedIUPACRNA(Alphabet.RNAAlphabet):
# letters = extended_rna_letters
# # B == 5-bromouridine
# # D == 5,6-dihydrouridine
# # S == thiouridine
# # W == wyosine


# We need to load the property resolution information, but we need to
# wait until after the systems have been loaded. (There's a nasty loop
# where, eg, translation objects need an alphabet, which need to be
# assocated with translators.)

from Bio.PropertyManager import default_manager

def _bootstrap(manager, klass, property):
assert manager is default_manager
del default_manager.class_resolver[IUPACProtein]
del default_manager.class_resolver[ExtendedIUPACProtein]
del default_manager.class_resolver[IUPACAmbiguousDNA]
del default_manager.class_resolver[IUPACUnambiguousDNA]
del default_manager.class_resolver[ExtendedIUPACDNA]
del default_manager.class_resolver[IUPACAmbiguousRNA]
del default_manager.class_resolver[IUPACUnambiguousRNA]

from Bio.Encodings import IUPACEncoding

return manager.resolve_class(klass, property)

default_manager.class_resolver[IUPACProtein] = _bootstrap
default_manager.class_resolver[ExtendedIUPACProtein] = _bootstrap
default_manager.class_resolver[IUPACAmbiguousDNA] = _bootstrap
default_manager.class_resolver[IUPACUnambiguousDNA] = _bootstrap
default_manager.class_resolver[ExtendedIUPACDNA] = _bootstrap
default_manager.class_resolver[IUPACAmbiguousRNA] = _bootstrap
default_manager.class_resolver[IUPACUnambiguousRNA] = _bootstrap
101 changes: 101 additions & 0 deletions Bio/Alphabet/__init__.py
@@ -0,0 +1,101 @@
import string, re

# This is used by sequences which contain a finite number of similar
# words.

class Alphabet:
size = None # no fixed size for words
letters = None # no fixed alphabet; implement as a list-like
# interface,
def __repr__(self):
return self.__class__.__name__ + "()"

def contains(self, other):
return isinstance(other, self.__class__)

generic_alphabet = Alphabet()

class SingleLetterAlphabet(Alphabet):
size = 1
letters = None # string of all letters in the alphabet

########### Protein

class ProteinAlphabet(SingleLetterAlphabet):
pass

generic_protein = ProteinAlphabet()

########### DNA
class NucleotideAlphabet(SingleLetterAlphabet):
pass

generic_nucleotide = NucleotideAlphabet()

class DNAAlphabet(NucleotideAlphabet):
pass

generic_dna = DNAAlphabet()


########### RNA

class RNAAlphabet(NucleotideAlphabet):
pass

generic_rna = RNAAlphabet()



########### Other per-sequence encodings

class SecondaryStructure(SingleLetterAlphabet):
letters = "HSTC"

class ThreeLetterProtein(Alphabet):
size = 3
letters = [
"Ala", "Asx", "Cys", "Asp", "Glu", "Phe", "Gly", "His", "Ile",
"Lys", "Leu", "Met", "Asn", "Pro", "Gln", "Arg", "Ser", "Thr",
"Sec", "Val", "Trp", "Xaa", "Tyr", "Glx",
]

###### Non per-sequence modifications

# (These are Decorator classes)

class AlphabetEncoder:
def __init__(self, alphabet, new_letters):
self.alphabet = alphabet
if alphabet.letters is not None:
self.letters = alphabet.letters + new_letters
else:
self.letters = None
def __getattr__(self, key):
return getattr(self.alphabet, key)

def contains(self, other):
return 0

class Gapped(AlphabetEncoder):
gap_char = '-'
def __init__(self, alphabet, gap_char = gap_char):
AlphabetEncoder.__init__(self, alphabet, gap_char)

def contains(self, other):
return other.gap_char == self.gap_char and \
self.alphabet.contains(other.alphabet)

class HasStopCodon(AlphabetEncoder):
stop_symbol = "*"
def __init__(self, alphabet, stop_symbol = stop_symbol):
AlphabetEncoder.__init__(self, alphabet, stop_symbol)
def __cmp__(self, other):
x = cmp(self.alphabet, other.alphabet)
if x == 0:
return cmp(self.stop_symbol, other.stop_symbol)
return x

def contains(self, other):
return other.stop_symbol == self.stop_symbol and \
self.alphabet.contains(other.alphabet)

0 comments on commit f726249

Please sign in to comment.