Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
merged Andrew's Seq package with the tree
added MANIFEST.in and setup.py for distutils
- Loading branch information
jchang
committed
Apr 30, 2000
1 parent
18dbba1
commit f726249
Showing
26 changed files
with
2,310 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
Jeffrey Chang <jchang@smi.stanford.edu> | ||
Andrew Dalke <dalke@acm.org> | ||
Katharine Lindner <katel@worldpath.net> | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
# Define the IUPAC Alphabets you know and love | ||
|
||
from Bio import Alphabet | ||
from Bio.Data import IUPACData | ||
|
||
##################### Protein | ||
|
||
# From the IUPAC definition at: | ||
# http://www.chem.qmw.ac.uk/iupac/AminoAcid/A2021.html#AA21 | ||
class IUPACProtein(Alphabet.ProteinAlphabet): | ||
letters = IUPACData.protein_letters | ||
|
||
protein = IUPACProtein() | ||
|
||
# This could be considered the base class for the standard IUPAC | ||
# protein, except that some encodings will use "X" to mean "unknown | ||
# character", which causes a collision. If you use X for | ||
# selenocysteines, then you'll need a new alphabet. | ||
|
||
class ExtendedIUPACProtein(Alphabet.ProteinAlphabet): | ||
letters = IUPACData.extended_protein_letters | ||
# B = "Asx"; aspartic acid or asparagine | ||
# X = "Sec"; selenocysteine | ||
# Z = "Glx"; glutamic acid or glutamine (or substances such as | ||
# 4-carboxyglutamic acid and 5-oxoproline that yield glutamic | ||
# acid on acid hydrolysis of peptides) | ||
|
||
extended_protein = ExtendedIUPACProtein() | ||
|
||
##################### DNA | ||
|
||
# The next two are the IUPAC definitions, from: | ||
# http://www.chem.qmw.ac.uk/iubmb/misc/naseq.html | ||
class IUPACAmbiguousDNA(Alphabet.DNAAlphabet): | ||
letters = IUPACData.ambiguous_dna_letters | ||
|
||
ambiguous_dna = IUPACAmbiguousDNA() | ||
|
||
class IUPACUnambiguousDNA(IUPACAmbiguousDNA): | ||
letters = IUPACData.unambiguous_dna_letters | ||
|
||
unambiguous_dna = IUPACUnambiguousDNA() | ||
|
||
|
||
# Also from the URL, but not part of the standard | ||
class ExtendedIUPACDNA(Alphabet.DNAAlphabet): | ||
letters = IUPACData.extended_dna_letters | ||
# B == 5-bromouridine | ||
# D == 5,6-dihydrouridine | ||
# S == thiouridine | ||
# W == wyosine | ||
|
||
extended_dna = ExtendedIUPACDNA() | ||
|
||
##################### RNA | ||
|
||
class IUPACAmbiguousRNA(Alphabet.RNAAlphabet): | ||
letters = IUPACData.ambiguous_rna_letters | ||
|
||
ambiguous_rna = IUPACAmbiguousRNA() | ||
|
||
class IUPACUnambiguousRNA(IUPACAmbiguousRNA): | ||
letters = IUPACData.unambiguous_rna_letters | ||
|
||
unambiguous_rna = IUPACUnambiguousRNA() | ||
|
||
# are there extended forms? | ||
#class ExtendedIUPACRNA(Alphabet.RNAAlphabet): | ||
# letters = extended_rna_letters | ||
# # B == 5-bromouridine | ||
# # D == 5,6-dihydrouridine | ||
# # S == thiouridine | ||
# # W == wyosine | ||
|
||
|
||
# We need to load the property resolution information, but we need to | ||
# wait until after the systems have been loaded. (There's a nasty loop | ||
# where, eg, translation objects need an alphabet, which need to be | ||
# assocated with translators.) | ||
|
||
from Bio.PropertyManager import default_manager | ||
|
||
def _bootstrap(manager, klass, property): | ||
assert manager is default_manager | ||
del default_manager.class_resolver[IUPACProtein] | ||
del default_manager.class_resolver[ExtendedIUPACProtein] | ||
del default_manager.class_resolver[IUPACAmbiguousDNA] | ||
del default_manager.class_resolver[IUPACUnambiguousDNA] | ||
del default_manager.class_resolver[ExtendedIUPACDNA] | ||
del default_manager.class_resolver[IUPACAmbiguousRNA] | ||
del default_manager.class_resolver[IUPACUnambiguousRNA] | ||
|
||
from Bio.Encodings import IUPACEncoding | ||
|
||
return manager.resolve_class(klass, property) | ||
|
||
default_manager.class_resolver[IUPACProtein] = _bootstrap | ||
default_manager.class_resolver[ExtendedIUPACProtein] = _bootstrap | ||
default_manager.class_resolver[IUPACAmbiguousDNA] = _bootstrap | ||
default_manager.class_resolver[IUPACUnambiguousDNA] = _bootstrap | ||
default_manager.class_resolver[ExtendedIUPACDNA] = _bootstrap | ||
default_manager.class_resolver[IUPACAmbiguousRNA] = _bootstrap | ||
default_manager.class_resolver[IUPACUnambiguousRNA] = _bootstrap |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
import string, re | ||
|
||
# This is used by sequences which contain a finite number of similar | ||
# words. | ||
|
||
class Alphabet: | ||
size = None # no fixed size for words | ||
letters = None # no fixed alphabet; implement as a list-like | ||
# interface, | ||
def __repr__(self): | ||
return self.__class__.__name__ + "()" | ||
|
||
def contains(self, other): | ||
return isinstance(other, self.__class__) | ||
|
||
generic_alphabet = Alphabet() | ||
|
||
class SingleLetterAlphabet(Alphabet): | ||
size = 1 | ||
letters = None # string of all letters in the alphabet | ||
|
||
########### Protein | ||
|
||
class ProteinAlphabet(SingleLetterAlphabet): | ||
pass | ||
|
||
generic_protein = ProteinAlphabet() | ||
|
||
########### DNA | ||
class NucleotideAlphabet(SingleLetterAlphabet): | ||
pass | ||
|
||
generic_nucleotide = NucleotideAlphabet() | ||
|
||
class DNAAlphabet(NucleotideAlphabet): | ||
pass | ||
|
||
generic_dna = DNAAlphabet() | ||
|
||
|
||
########### RNA | ||
|
||
class RNAAlphabet(NucleotideAlphabet): | ||
pass | ||
|
||
generic_rna = RNAAlphabet() | ||
|
||
|
||
|
||
########### Other per-sequence encodings | ||
|
||
class SecondaryStructure(SingleLetterAlphabet): | ||
letters = "HSTC" | ||
|
||
class ThreeLetterProtein(Alphabet): | ||
size = 3 | ||
letters = [ | ||
"Ala", "Asx", "Cys", "Asp", "Glu", "Phe", "Gly", "His", "Ile", | ||
"Lys", "Leu", "Met", "Asn", "Pro", "Gln", "Arg", "Ser", "Thr", | ||
"Sec", "Val", "Trp", "Xaa", "Tyr", "Glx", | ||
] | ||
|
||
###### Non per-sequence modifications | ||
|
||
# (These are Decorator classes) | ||
|
||
class AlphabetEncoder: | ||
def __init__(self, alphabet, new_letters): | ||
self.alphabet = alphabet | ||
if alphabet.letters is not None: | ||
self.letters = alphabet.letters + new_letters | ||
else: | ||
self.letters = None | ||
def __getattr__(self, key): | ||
return getattr(self.alphabet, key) | ||
|
||
def contains(self, other): | ||
return 0 | ||
|
||
class Gapped(AlphabetEncoder): | ||
gap_char = '-' | ||
def __init__(self, alphabet, gap_char = gap_char): | ||
AlphabetEncoder.__init__(self, alphabet, gap_char) | ||
|
||
def contains(self, other): | ||
return other.gap_char == self.gap_char and \ | ||
self.alphabet.contains(other.alphabet) | ||
|
||
class HasStopCodon(AlphabetEncoder): | ||
stop_symbol = "*" | ||
def __init__(self, alphabet, stop_symbol = stop_symbol): | ||
AlphabetEncoder.__init__(self, alphabet, stop_symbol) | ||
def __cmp__(self, other): | ||
x = cmp(self.alphabet, other.alphabet) | ||
if x == 0: | ||
return cmp(self.stop_symbol, other.stop_symbol) | ||
return x | ||
|
||
def contains(self, other): | ||
return other.stop_symbol == self.stop_symbol and \ | ||
self.alphabet.contains(other.alphabet) |
Oops, something went wrong.