From 27a04ff8a5924071698cb915c4df3dccb1de4fa5 Mon Sep 17 00:00:00 2001 From: MarkusPiotrowski Date: Wed, 26 Apr 2017 23:51:12 +0200 Subject: [PATCH] Restriction: Bugfixes, Python 3 compatibility, PEP8/PEP257 issues, more tests. --- Bio/Restriction/Restriction.py | 811 ++++++++++++++++----------------- NEWS.rst | 1 + Tests/test_Restriction.py | 124 +++-- 3 files changed, 482 insertions(+), 454 deletions(-) diff --git a/Bio/Restriction/Restriction.py b/Bio/Restriction/Restriction.py index 1b8337b5165..58273dd531d 100644 --- a/Bio/Restriction/Restriction.py +++ b/Bio/Restriction/Restriction.py @@ -93,13 +93,9 @@ import itertools from Bio.Seq import Seq, MutableSeq - from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict from Bio.Restriction.Restriction_Dictionary import typedict from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict -from Bio.Restriction.RanaConfig import ConsoleWidth, NameWidth, Indent, MaxSize -from Bio.Restriction.RanaConfig import ftp_proxy, ftp_Rebase -from Bio.Restriction.RanaConfig import ftp_emb_e, ftp_emb_s, ftp_emb_r from Bio.Restriction.PrintFormat import PrintFormat from Bio import BiopythonWarning @@ -146,15 +142,13 @@ class FormattedSeq(object): Translate a Bio.Seq into a formatted sequence to be used with Restriction. - Roughly: - remove anything which is not IUPAC alphabet and then add a space - in front of the sequence to get a biological index instead of a - python index (i.e. index of the first base is 1 not 0). + Roughly: remove anything which is not IUPAC alphabet and then add a space + in front of the sequence to get a biological index instead of a + python index (i.e. index of the first base is 1 not 0). - Retains information about the shape of the molecule linear (default) - or circular. Restriction sites are search over the edges of circular - sequence. - """ + Retains information about the shape of the molecule linear (default) or + circular. Restriction sites are search over the edges of circular sequence. + """ def __init__(self, seq, linear=True): """FormattedSeq(seq, [linear=True])-> new FormattedSeq. @@ -196,43 +190,39 @@ def __eq__(self, other): return False def circularise(self): - """FS.circularise() -> circularise FS""" + """Circularise sequence in place.""" self.linear = False return def linearise(self): - """FS.linearise() -> linearise FS""" + """Linearise sequence in place.""" self.linear = True return def to_linear(self): - """FS.to_linear() -> new linear FS instance""" + """Make a new instnace of sequence as linear.""" new = self.__class__(self) new.linear = True return new def to_circular(self): - """FS.to_circular() -> new circular FS instance""" + """Make a new instance of sequence as circular.""" new = self.__class__(self) new.linear = False return new def is_linear(self): - """FS.is_linear() -> bool. - - True if the sequence will analysed as a linear sequence. - """ + """Return if sequence is linear (True) or circular (False).""" return self.linear def finditer(self, pattern, size): - """FS.finditer(pattern, size) -> list. + """Return a list of a given pattern which occurs in the sequence. - return a list of pattern into the sequence. - the list is made of tuple (location, pattern.group). - the latter is used with non palindromic sites. - pattern is the regular expression pattern corresponding to the + The list is made of tuple (location, pattern.group). + The latter is used with non palindromic sites. + Pattern is the regular expression pattern corresponding to the enzyme restriction site. - size is the size of the restriction enzyme recognition-site size. + Size is the size of the restriction enzyme recognition-site size. """ if self.is_linear(): data = self.data @@ -247,18 +237,17 @@ def __getitem__(self, i): class RestrictionType(type): - """RestrictionType. Type from which derives all enzyme classes. + """RestrictionType. Type from which all enzyme classes are derived. Implement the operator methods. """ def __init__(cls, name='', bases=(), dct=None): - """RE(name, bases, dct) -> RestrictionType instance. + """Initialize RestrictionType instance. Not intended to be used in normal operation. The enzymes are instantiated when importing the module. - - see below. + See below. """ if "-" in name: raise ValueError("Problem with hyphen in %s as enzyme name" @@ -268,15 +257,15 @@ def __init__(cls, name='', bases=(), dct=None): # super(RestrictionType, cls).__init__(cls, name, bases, dct) try: cls.compsite = re.compile(cls.compsite) - except Exception as err: + except Exception: raise ValueError("Problem with regular expression, re.compiled(%s)" % repr(cls.compsite)) def __add__(cls, other): - """RE.__add__(other) -> RestrictionBatch(). + """Add restriction enzyme to a RestrictionBatch(). - if other is an enzyme returns a batch of the two enzymes. - if other is already a RestrictionBatch add enzyme to it. + If other is an enzyme returns a batch of the two enzymes. + If other is already a RestrictionBatch add enzyme to it. """ if isinstance(other, RestrictionType): return RestrictionBatch([cls, other]) @@ -286,72 +275,68 @@ def __add__(cls, other): raise TypeError def __div__(cls, other): - """RE.__div__(other) -> list. + """Override '/' operator to use as search method. - RE/other - returns RE.search(other). + >>> EcoRI/Seq('GAATTC') + [2] + Returns RE.search(other). """ return cls.search(other) def __rdiv__(cls, other): - """RE.__rdiv__(other) -> list. + """Override division with reversed operands to use as search method. - other/RE - returns RE.search(other). + >>> Seq('GAATTC')/EcoRI + [2] + Returns RE.search(other). """ return cls.search(other) def __truediv__(cls, other): - """RE.__truediv__(other) -> list. + """Override Python 3 division operator to use as search method. - RE/other - returns RE.search(other). + Like __div__. """ return cls.search(other) def __rtruediv__(cls, other): - """RE.__rtruediv__(other) -> list. + """As __truediv___, with reversed operands. - other/RE - returns RE.search(other). + Like __rdiv__. """ return cls.search(other) def __floordiv__(cls, other): - """RE.__floordiv__(other) -> list. + """Override '//' operator to use as catalyse method. - RE//other - returns RE.catalyse(other). + >>> EcoRI//Seq('GAATTC') + (Seq('G', Alphabet()), Seq('AATTC', Alphabet())) + Returns RE.catalyse(other). """ return cls.catalyse(other) def __rfloordiv__(cls, other): - """RE.__rfloordiv__(other) -> list. + """As __floordiv__, with reversed operands. - other//RE - returns RE.catalyse(other). + >>> Seq('GAATTC')//EcoRI + (Seq('G', Alphabet()), Seq('AATTC', Alphabet())) + Returns RE.catalyse(other). """ return cls.catalyse(other) def __str__(cls): - """RE.__str__() -> str. - - return the name of the enzyme. - """ + """Return the name of the enzyme as string.""" return cls.__name__ def __repr__(cls): - """RE.__repr__() -> str. + """Implement repr method. - used with eval or exec will instantiate the enzyme. + Used with eval or exec will instantiate the enzyme. """ return "%s" % cls.__name__ def __len__(cls): - """RE.__len__() -> int. - - length of the recognition site. - """ + """Return lenght of recognition site of enzyme as int.""" return cls.size def __hash__(cls): @@ -360,7 +345,7 @@ def __hash__(cls): return id(cls) def __eq__(cls, other): - """RE == other -> bool + """Override '==' operator. True if RE and other are the same enzyme. @@ -370,11 +355,16 @@ def __eq__(cls, other): return id(cls) == id(other) def __ne__(cls, other): - """RE != other -> bool. - isoschizomer strict, same recognition site, same restriction -> False - all the other-> True + """Override '!=' operator. + + Isoschizomer strict (same recognition site, same restriction) -> False + All the other-> True - WARNING - This is not the inverse of the __eq__ method. + WARNING - This is not the inverse of the __eq__ method + >>> SacI != SstI # true isoschizomers + False + >>> SacI == SstI + False """ if not isinstance(other, RestrictionType): return True @@ -384,10 +374,12 @@ def __ne__(cls, other): return True def __rshift__(cls, other): - """RE >> other -> bool. + """Override '>>' operator to test for neoschizomers. neoschizomer : same recognition site, different restriction. -> True all the others : -> False + >>> SmaI >> XmaI + True """ if not isinstance(other, RestrictionType): return False @@ -397,10 +389,11 @@ def __rshift__(cls, other): return False def __mod__(cls, other): - """a % b -> bool. + """Override '%' operator to test for compatible overhangs. - Test compatibility of the overhang of a and b. True if a and b have compatible overhang. + >>> XhoI % SalI + True """ if not isinstance(other, RestrictionType): raise TypeError( @@ -408,11 +401,17 @@ def __mod__(cls, other): return cls._mod1(other) def __ge__(cls, other): - """a >= b -> bool. - - a is greater or equal than b if the a site is longer than b site. - if their site have the same length sort by alphabetical order of their - names. + """Compare length of recognition site of two enzymes. + + Override '>='. a is greater or equal than b if the a site is longer + than b site. If their site have the same length sort by alphabetical + order of their names. + >>> EcoRI.size + 6 + >>> EcoRV.size + 6 + >>> EcoRI >= EcoRV + False """ if not isinstance(other, RestrictionType): raise NotImplementedError @@ -424,12 +423,11 @@ def __ge__(cls, other): return False def __gt__(cls, other): - """a > b -> bool. - - sorting order: - 1. size of the recognition site. - 2. if equal size, alphabetical order of the names. + """Compare length of recognition site of two enzymes. + Override '>'. Sorting order: + 1. size of the recognition site. + 2. if equal size, alphabetical order of the names. """ if not isinstance(other, RestrictionType): raise NotImplementedError @@ -441,11 +439,11 @@ def __gt__(cls, other): return False def __le__(cls, other): - """a <= b -> bool. + """Compare length of recognition site of two enzymes. - sorting order: - 1. size of the recognition site. - 2. if equal size, alphabetical order of the names. + Override '<='. Sorting order: + 1. size of the recognition site. + 2. if equal size, alphabetical order of the names. """ if not isinstance(other, RestrictionType): raise NotImplementedError @@ -457,11 +455,11 @@ def __le__(cls, other): return False def __lt__(cls, other): - """a < b -> bool. + """Compare length of recognition site of two enzymes. - sorting order: - 1. size of the recognition site. - 2. if equal size, alphabetical order of the names. + Override '<'. Sorting order: + 1. size of the recognition site. + 2. if equal size, alphabetical order of the names. """ if not isinstance(other, RestrictionType): raise NotImplementedError @@ -483,14 +481,13 @@ class AbstractCut(RestrictionType): @classmethod def search(cls, dna, linear=True): - """RE.search(dna, linear=True) -> list. + """Return a list of cutting sites of the enzyme in the sequence. - return a list of all the site of RE in dna. Compensate for circular - sequences and so on. + Compensate for circular sequences and so on. dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. - if linear is False, the restriction sites than span over the boundaries + If linear is False, the restriction sites that span over the boundaries will be included. The positions are the first base of the 3' fragment, @@ -512,52 +509,57 @@ def search(cls, dna, linear=True): @classmethod def all_suppliers(cls): - """RE.all_suppliers -> print all the suppliers of R""" + """Print all the suppliers of restriction enzyme.""" supply = sorted(x[0] for x in suppliers_dict.values()) print(",\n".join(supply)) return @classmethod def is_equischizomer(cls, other): - """RE.is_equischizomers(other) -> bool. + """Test for real isoschizomer. - True if other is an isoschizomer of RE. - False else. + True if other is an isoschizomer of RE, but not an neoschizomer, + else False. - equischizomer <=> same site, same position of restriction. + Equischizomer: same site, same position of restriction. + >>> SacI.is_equischizomer(SstI) + True + >>> SmaI.is_equischizomer(XmaI) + False """ return not cls != other @classmethod def is_neoschizomer(cls, other): - """RE.is_neoschizomers(other) -> bool. - - True if other is an isoschizomer of RE. - False else. + """Test for neoschizomer. - neoschizomer <=> same site, different position of restriction. + True if other is an isoschizomer of RE, else False. + Neoschizomer: same site, different position of restriction. """ return cls >> other @classmethod def is_isoschizomer(cls, other): - """RE.is_isoschizomers(other) -> bool. + """Test for same recognition site. - True if other is an isoschizomer of RE. - False else. + True if other has the same recognition site, else False. - isoschizomer <=> same site. + Isoschizomer: same site. + >>> SacI.is_isoschizomer(SstI) + True + >>> SmaI.is_isoschizomer(XmaI) + True """ return (not cls != other) or cls >> other @classmethod def equischizomers(cls, batch=None): - """RE.equischizomers([batch]) -> list. + """List equischizomers of the enzyme. - return a tuple of all the isoschizomers of RE. - if batch is supplied it is used instead of the default AllEnzymes. + Return a tuple of all the isoschizomers of RE. + If batch is supplied it is used instead of the default AllEnzymes. - equischizomer <=> same site, same position of restriction. + Equischizomer: same site, same position of restriction. """ if not batch: batch = AllEnzymes @@ -569,12 +571,12 @@ def equischizomers(cls, batch=None): @classmethod def neoschizomers(cls, batch=None): - """RE.neoschizomers([batch]) -> list. + """List neoschizomers of the enzyme. - return a tuple of all the neoschizomers of RE. - if batch is supplied it is used instead of the default AllEnzymes. + Return a tuple of all the neoschizomers of RE. + If batch is supplied it is used instead of the default AllEnzymes. - neoschizomer <=> same site, different position of restriction. + Neoschizomer: same site, different position of restriction. """ if not batch: batch = AllEnzymes @@ -583,10 +585,10 @@ def neoschizomers(cls, batch=None): @classmethod def isoschizomers(cls, batch=None): - """RE.isoschizomers([batch]) -> list. + """List all isoschizomers of the enzyme. - return a tuple of all the equischizomers and neoschizomers of RE. - if batch is supplied it is used instead of the default AllEnzymes. + Return a tuple of all the equischizomers and neoschizomers of RE. + If batch is supplied it is used instead of the default AllEnzymes. """ if not batch: batch = AllEnzymes @@ -598,9 +600,9 @@ def isoschizomers(cls, batch=None): @classmethod def frequency(cls): - """RE.frequency() -> int. + """Return the theoretically cutting frequency of the enzyme. - frequency of the site. + Frequency of the site, given as 'one cut per x bases' (int). """ return cls.freq @@ -625,7 +627,7 @@ class NoCut(AbstractCut): @classmethod def cut_once(cls): - """RE.cut_once() -> bool. + """Return if the cutting pattern has one cut. True if the enzyme cut the sequence one time on each strand. """ @@ -633,7 +635,7 @@ def cut_once(cls): @classmethod def cut_twice(cls): - """RE.cut_twice() -> bool. + """Return if the cutting pattern has two cuts. True if the enzyme cut the sequence twice on each strand. """ @@ -641,15 +643,15 @@ def cut_twice(cls): @classmethod def _modify(cls, location): - """RE._modify(location) -> int. + """Return a generator that moves the cutting position by 1. - for internal use only. + For internal use only. location is an integer corresponding to the location of the match for the enzyme pattern in the sequence. _modify returns the real place where the enzyme will cut. - example:: + Example:: EcoRI pattern : GAATTC EcoRI will cut after the G. @@ -662,25 +664,25 @@ def _modify(cls, location): EcoRI cut after the G so: EcoRI._modify(10) -> 11. - if the enzyme cut twice _modify will returns two integer corresponding + If the enzyme cut twice _modify will returns two integer corresponding to each cutting site. """ yield location @classmethod def _rev_modify(cls, location): - """RE._rev_modify(location) -> generator of int. + """Return a generator that moves the cutting position by 1. - for internal use only. + For internal use only. - as _modify for site situated on the antiparallel strand when the - enzyme is not palindromic + As _modify for site situated on the antiparallel strand when the + enzyme is not palindromic. """ yield location @classmethod def characteristic(cls): - """RE.characteristic() -> tuple. + """Return a list of the enzyme's characteristics as tuple. the tuple contains the attributes: fst5 -> first 5' cut ((current strand) or None @@ -693,7 +695,7 @@ def characteristic(cls): class OneCut(AbstractCut): - """Implement the methods specific to the enzymes that cut the DNA only once + """Implement the methods for enzymes that cut the DNA only once. Correspond to ncuts values of 2 in emboss_e.### @@ -702,7 +704,7 @@ class OneCut(AbstractCut): @classmethod def cut_once(cls): - """RE.cut_once() -> bool. + """Return if the cutting pattern has one cut. True if the enzyme cut the sequence one time on each strand. """ @@ -710,7 +712,7 @@ def cut_once(cls): @classmethod def cut_twice(cls): - """RE.cut_twice() -> bool. + """Return if the cutting pattern has two cuts. True if the enzyme cut the sequence twice on each strand. """ @@ -718,15 +720,15 @@ def cut_twice(cls): @classmethod def _modify(cls, location): - """RE._modify(location) -> int. + """Return a generator that moves the cutting position by 1. - for internal use only. + For internal use only. location is an integer corresponding to the location of the match for the enzyme pattern in the sequence. _modify returns the real place where the enzyme will cut. - example:: + Example:: EcoRI pattern : GAATTC EcoRI will cut after the G. @@ -746,31 +748,31 @@ def _modify(cls, location): @classmethod def _rev_modify(cls, location): - """RE._rev_modify(location) -> generator of int. + """Return a generator that moves the cutting position by 1. - for internal use only. + For internal use only. - as _modify for site situated on the antiparallel strand when the + As _modify for site situated on the antiparallel strand when the enzyme is not palindromic """ yield location - cls.fst3 @classmethod def characteristic(cls): - """RE.characteristic() -> tuple. + """Return a list of the enzyme's characteristics as tuple. - the tuple contains the attributes: + The tuple contains the attributes: fst5 -> first 5' cut ((current strand) or None fst3 -> first 3' cut (complementary strand) or None scd5 -> second 5' cut (current strand) or None scd5 -> second 3' cut (complementary strand) or None site -> recognition site. - """ + """ return cls.fst5, cls.fst3, None, None, cls.site class TwoCuts(AbstractCut): - """Implement the methods specific to the enzymes that cut the DNA twice + """Implement the methods for enzymes that cut the DNA twice. Correspond to ncuts values of 4 in emboss_e.### @@ -779,7 +781,7 @@ class TwoCuts(AbstractCut): @classmethod def cut_once(cls): - """RE.cut_once() -> bool. + """Return if the cutting pattern has one cut. True if the enzyme cut the sequence one time on each strand. """ @@ -787,7 +789,7 @@ def cut_once(cls): @classmethod def cut_twice(cls): - """RE.cut_twice() -> bool. + """Return if the cutting pattern has two cuts. True if the enzyme cut the sequence twice on each strand. """ @@ -795,9 +797,9 @@ def cut_twice(cls): @classmethod def _modify(cls, location): - """RE._modify(location) -> int. + """Return a generator that moves the cutting position by 1. - for internal use only. + For internal use only. location is an integer corresponding to the location of the match for the enzyme pattern in the sequence. @@ -824,7 +826,7 @@ def _modify(cls, location): @classmethod def _rev_modify(cls, location): - """RE._rev_modify(location) -> generator of int. + """Return a generator that moves the cutting position by 1. for internal use only. @@ -836,7 +838,7 @@ def _rev_modify(cls, location): @classmethod def characteristic(cls): - """RE.characteristic() -> tuple. + """Return a list of the enzyme's characteristics as tuple. the tuple contains the attributes: fst5 -> first 5' cut ((current strand) or None @@ -856,7 +858,7 @@ class Meth_Dep(AbstractCut): @classmethod def is_methylable(cls): - """RE.is_methylable() -> bool. + """Return if recognition site can be methylated. True if the recognition site is a methylable. """ @@ -871,7 +873,7 @@ class Meth_Undep(AbstractCut): @classmethod def is_methylable(cls): - """RE.is_methylable() -> bool. + """Return if recognition site can be methylated. True if the recognition site is a methylable. """ @@ -879,7 +881,7 @@ def is_methylable(cls): class Palindromic(AbstractCut): - """Implement the methods specific to the enzymes which are palindromic + """Implement methods for enzymes with palindromic recognition sites. palindromic means : the recognition site and its reverse complement are identical. @@ -892,11 +894,11 @@ class Palindromic(AbstractCut): @classmethod def _search(cls): - """RE._search() -> list. + """Return a list of cutting sites of the enzyme in the sequence. - for internal use only. + For internal use only. - implement the search method for palindromic and non palindromic enzyme. + Implement the search method for palindromic enzymes. """ siteloc = cls.dna.finditer(cls.compsite, cls.size) cls.results = [r for s, g in siteloc for r in cls._modify(s)] @@ -906,17 +908,14 @@ def _search(cls): @classmethod def is_palindromic(cls): - """RE.is_palindromic() -> bool. - - True if the recognition site is a palindrom. - """ + """Return if the enzyme has a palindromic recoginition site.""" return True class NonPalindromic(AbstractCut): - """Implement the methods specific to the enzymes which are not palindromic + """Implement methods for enzymes with non-palindromic recognition sites. - palindromic means : the recognition site and its reverse complement are + Palindromic means : the recognition site and its reverse complement are identical. Internal use only. Not meant to be instantiated. @@ -924,11 +923,11 @@ class NonPalindromic(AbstractCut): @classmethod def _search(cls): - """RE._search() -> list. + """Return a list of cutting sites of the enzyme in the sequence. - for internal use only. + For internal use only. - implement the search method for palindromic and non palindromic enzyme. + Implement the search method for non palindromic enzymes. """ iterator = cls.dna.finditer(cls.compsite, cls.size) cls.results = [] @@ -949,16 +948,12 @@ def _search(cls): @classmethod def is_palindromic(cls): - """RE.is_palindromic() -> bool. - - True if the recognition site is a palindrom. - """ + """Return if the enzyme has a palindromic recoginition site.""" return False class Unknown(AbstractCut): - """Implement the methods specific to the enzymes for which the overhang - is unknown. + """Implement methods for enzymes that produce unknown overhangs. These enzymes are also NotDefined and NoCut. @@ -967,15 +962,16 @@ class Unknown(AbstractCut): @classmethod def catalyse(cls, dna, linear=True): - """RE.catalyse(dna, linear=True) -> tuple of DNA. + """List the sequence fragments after cutting dna with enzyme. + RE.catalyze(dna, linear=True) -> tuple of DNA. - return a tuple of dna as will be produced by using RE to restrict the + Return a tuple of dna as will be produced by using RE to restrict the dna. dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. - if linear is False, the sequence is considered to be circular and the + If linear is False, the sequence is considered to be circular and the output will be modified accordingly. """ raise NotImplementedError('%s restriction is unknown.' @@ -984,7 +980,7 @@ def catalyse(cls, dna, linear=True): @classmethod def is_blunt(cls): - """RE.is_blunt() -> bool. + """Return if the enzyme produces blunt ends. True if the enzyme produces blunt end. @@ -998,7 +994,7 @@ def is_blunt(cls): @classmethod def is_5overhang(cls): - """RE.is_5overhang() -> bool. + """Return if the enzymes produces 5' overhanging ends. True if the enzyme produces 5' overhang sticky end. @@ -1012,7 +1008,7 @@ def is_5overhang(cls): @classmethod def is_3overhang(cls): - """RE.is_3overhang() -> bool. + """Return if the enzyme produces 3' overhanging ends. True if the enzyme produces 3' overhang sticky end. @@ -1026,34 +1022,30 @@ def is_3overhang(cls): @classmethod def overhang(cls): - """RE.overhang() -> str. type of overhang of the enzyme., + """Return the type of the enzyme's overhang as string. - can be "3' overhang", "5' overhang", "blunt", "unknown" + Can be "3' overhang", "5' overhang", "blunt", "unknown". """ return 'unknown' @classmethod def compatible_end(cls): - """RE.compatible_end() -> list. - - list of all the enzymes that share compatible end with RE. - """ + """List all enzymes that produce compatible ends for the enzyme.""" return [] @classmethod def _mod1(cls, other): - """RE._mod1(other) -> bool. + """Test if other enzyme produces compatible ends for enzyme. - for internal use only + For internal use only. - test for the compatibility of restriction ending of RE and other. + Test for the compatibility of restriction ending of RE and other. """ return False class Blunt(AbstractCut): - """Implement the methods specific to the enzymes for which the overhang - is blunt. + """Implement methods for enzymes that produce blunt ends. The enzyme cuts the + strand and the - strand of the DNA at the same place. @@ -1063,15 +1055,16 @@ class Blunt(AbstractCut): @classmethod def catalyse(cls, dna, linear=True): - """RE.catalyse(dna, linear=True) -> tuple of DNA. + """List the sequence fragments after cutting dna with enzyme. + RE.catalyze(dna, linear=True) -> tuple of DNA. - return a tuple of dna as will be produced by using RE to restrict the + Return a tuple of dna as will be produced by using RE to restrict the dna. dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. - if linear is False, the sequence is considered to be circular and the + If linear is False, the sequence is considered to be circular and the output will be modified accordingly. """ r = cls.search(dna, linear) @@ -1113,7 +1106,7 @@ def catalyse(cls, dna, linear=True): @classmethod def is_blunt(cls): - """RE.is_blunt() -> bool. + """Return if the enzyme produces blunt ends. True if the enzyme produces blunt end. @@ -1127,7 +1120,7 @@ def is_blunt(cls): @classmethod def is_5overhang(cls): - """RE.is_5overhang() -> bool. + """Return if the enzymes produces 5' overhanging ends. True if the enzyme produces 5' overhang sticky end. @@ -1141,7 +1134,7 @@ def is_5overhang(cls): @classmethod def is_3overhang(cls): - """RE.is_3overhang() -> bool. + """Return if the enzyme produces 3' overhanging ends. True if the enzyme produces 3' overhang sticky end. @@ -1155,18 +1148,15 @@ def is_3overhang(cls): @classmethod def overhang(cls): - """RE.overhang() -> str. type of overhang of the enzyme., + """Return the type of the enzyme's overhang as string. - can be "3' overhang", "5' overhang", "blunt", "unknown" + Can be "3' overhang", "5' overhang", "blunt", "unknown". """ return 'blunt' @classmethod def compatible_end(cls, batch=None): - """RE.compatible_end() -> list. - - list of all the enzymes that share compatible end with RE. - """ + """List all enzymes that produce compatible ends for the enzyme.""" if not batch: batch = AllEnzymes r = sorted(x for x in iter(AllEnzymes) if x.is_blunt()) @@ -1174,18 +1164,17 @@ def compatible_end(cls, batch=None): @staticmethod def _mod1(other): - """RE._mod1(other) -> bool. + """Test if other enzyme produces compatible ends for enzyme. - for internal use only + For internal use only - test for the compatibility of restriction ending of RE and other. + Test for the compatibility of restriction ending of RE and other. """ return issubclass(other, Blunt) class Ov5(AbstractCut): - """Implement the methods specific to the enzymes for which the overhang - is recessed in 3'. + """Implement methods for enzymes that produce 5' overhanging ends. The enzyme cuts the + strand after the - strand of the DNA. @@ -1194,15 +1183,16 @@ class Ov5(AbstractCut): @classmethod def catalyse(cls, dna, linear=True): - """RE.catalyse(dna, linear=True) -> tuple of DNA. + """List the sequence fragments after cutting dna with enzyme. + RE.catalyze(dna, linear=True) -> tuple of DNA. - return a tuple of dna as will be produced by using RE to restrict the + Return a tuple of dna as will be produced by using RE to restrict the dna. dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. - if linear is False, the sequence is considered to be circular and the + If linear is False, the sequence is considered to be circular and the output will be modified accordingly. """ r = cls.search(dna, linear) @@ -1244,7 +1234,7 @@ def catalyse(cls, dna, linear=True): @classmethod def is_blunt(cls): - """RE.is_blunt() -> bool. + """Return if the enzyme produces blunt ends. True if the enzyme produces blunt end. @@ -1258,7 +1248,7 @@ def is_blunt(cls): @classmethod def is_5overhang(cls): - """RE.is_5overhang() -> bool. + """Return if the enzymes produces 5' overhanging ends. True if the enzyme produces 5' overhang sticky end. @@ -1272,7 +1262,7 @@ def is_5overhang(cls): @classmethod def is_3overhang(cls): - """RE.is_3overhang() -> bool. + """Return if the enzyme produces 3' overhanging ends. True if the enzyme produces 3' overhang sticky end. @@ -1286,18 +1276,15 @@ def is_3overhang(cls): @classmethod def overhang(cls): - """RE.overhang() -> str. type of overhang of the enzyme., + """Return the type of the enzyme's overhang as string. - can be "3' overhang", "5' overhang", "blunt", "unknown" + Can be "3' overhang", "5' overhang", "blunt", "unknown". """ return "5' overhang" @classmethod def compatible_end(cls, batch=None): - """RE.compatible_end() -> list. - - list of all the enzymes that share compatible end with RE. - """ + """List all enzymes that produce compatible ends for the enzyme.""" if not batch: batch = AllEnzymes r = sorted(x for x in iter(AllEnzymes) if x.is_5overhang() and @@ -1306,11 +1293,11 @@ def compatible_end(cls, batch=None): @classmethod def _mod1(cls, other): - """RE._mod1(other) -> bool. + """Test if other enzyme produces compatible ends for enzyme. - for internal use only + For internal use only. - test for the compatibility of restriction ending of RE and other. + Test for the compatibility of restriction ending of RE and other. """ if issubclass(other, Ov5): return cls._mod2(other) @@ -1319,8 +1306,7 @@ def _mod1(cls, other): class Ov3(AbstractCut): - """Implement the methods specific to the enzymes for which the overhang - is recessed in 5'. + """Implement methods for enzymes that produce 3' overhanging ends. The enzyme cuts the - strand after the + strand of the DNA. @@ -1329,15 +1315,16 @@ class Ov3(AbstractCut): @classmethod def catalyse(cls, dna, linear=True): - """RE.catalyse(dna, linear=True) -> tuple of DNA. + """List the sequence fragments after cutting dna with enzyme. + RE.catalyze(dna, linear=True) -> tuple of DNA. - return a tuple of dna as will be produced by using RE to restrict the + Return a tuple of dna as will be produced by using RE to restrict the dna. dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. - if linear is False, the sequence is considered to be circular and the + If linear is False, the sequence is considered to be circular and the output will be modified accordingly. """ r = cls.search(dna, linear) @@ -1379,7 +1366,7 @@ def catalyse(cls, dna, linear=True): @classmethod def is_blunt(cls): - """RE.is_blunt() -> bool. + """Return if the enzyme produces blunt ends. True if the enzyme produces blunt end. @@ -1393,7 +1380,7 @@ def is_blunt(cls): @classmethod def is_5overhang(cls): - """RE.is_5overhang() -> bool. + """Return if the enzymes produces 5' overhanging ends. True if the enzyme produces 5' overhang sticky end. @@ -1407,7 +1394,7 @@ def is_5overhang(cls): @classmethod def is_3overhang(cls): - """RE.is_3overhang() -> bool. + """Return if the enzyme produces 3' overhanging ends. True if the enzyme produces 3' overhang sticky end. @@ -1421,18 +1408,15 @@ def is_3overhang(cls): @classmethod def overhang(cls): - """RE.overhang() -> str. type of overhang of the enzyme., + """Return the type of the enzyme's overhang as string. - can be "3' overhang", "5' overhang", "blunt", "unknown" + Can be "3' overhang", "5' overhang", "blunt", "unknown". """ return "3' overhang" @classmethod def compatible_end(cls, batch=None): - """RE.compatible_end() -> list. - - list of all the enzymes that share compatible end with RE. - """ + """List all enzymes that produce compatible ends for the enzyme.""" if not batch: batch = AllEnzymes r = sorted(x for x in iter(AllEnzymes) if x.is_3overhang() and @@ -1441,11 +1425,11 @@ def compatible_end(cls, batch=None): @classmethod def _mod1(cls, other): - """RE._mod1(other) -> bool. + """Test if other enzyme produces compatible ends for enzyme. - for internal use only + For internal use only. - test for the compatibility of restriction ending of RE and other. + Test for the compatibility of restriction ending of RE and other. """ # # called by RE._mod1(other) when the one of the enzyme is ambiguous @@ -1457,26 +1441,25 @@ def _mod1(cls, other): class Defined(AbstractCut): - """Implement the methods specific to the enzymes for which the overhang - and the cut are not variable. + """Implement methods for enzymes with defined recognition site and cut. Typical example : EcoRI -> G^AATT_C The overhang will always be AATT Notes: - Blunt enzymes are always defined. even if there site is GGATCCNNN^_N - There overhang is always the same : blunt! + Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N + Their overhang is always the same : blunt! Internal use only. Not meant to be instantiated. """ @classmethod def _drop(cls): - """RE._drop() -> list. + """Remove cuts that are outsite of the sequence. - for internal use only. + For internal use only. - drop the site that are situated outside the sequence in linear - sequence. modify the index for site in circular sequences. + Drop the site that are situated outside the sequence in linear + sequence. Modify the index for site in circular sequences. """ # # remove or modify the results that are outside the sequence. @@ -1491,7 +1474,7 @@ def _drop(cls): take = itertools.takewhile if cls.dna.is_linear(): cls.results = [x for x in drop(lambda x:x < 1, cls.results)] - cls.results = [x for x in take(lambda x:x < length, cls.results)] + cls.results = [x for x in take(lambda x:x <= length, cls.results)] else: for index, location in enumerate(cls.results): if location < 1: @@ -1507,7 +1490,7 @@ def _drop(cls): @classmethod def is_defined(cls): - """RE.is_defined() -> bool. + """Return if recognition sequence and cut are defined. True if the sequence recognised and cut is constant, i.e. the recognition site is not degenerated AND the enzyme cut inside @@ -1522,7 +1505,7 @@ def is_defined(cls): @classmethod def is_ambiguous(cls): - """RE.is_ambiguous() -> bool. + """Return if recognition sequence and cut may be ambiguous. True if the sequence recognised and cut is ambiguous, i.e. the recognition site is degenerated AND/OR the enzyme cut outside @@ -1537,7 +1520,7 @@ def is_ambiguous(cls): @classmethod def is_unknown(cls): - """RE.is_unknown() -> bool. + """Return if recognition sequence is unknown. True if the sequence is unknown, i.e. the recognition site has not been characterised yet. @@ -1551,9 +1534,9 @@ def is_unknown(cls): @classmethod def elucidate(cls): - """RE.elucidate() -> str + """Return a string representing the recognition site and cuttings. - return a representation of the site with the cut on the (+) strand + Return a representation of the site with the cut on the (+) strand represented as '^' and the cut on the (-) strand as '_'. ie: >>> EcoRI.elucidate() # 5' overhang @@ -1589,11 +1572,11 @@ def elucidate(cls): @classmethod def _mod2(cls, other): - """RE._mod2(other) -> bool. + """Test if other enzyme produces compatible ends for enzyme. - for internal use only + For internal use only. - test for the compatibility of restriction ending of RE and other. + Test for the compatibility of restriction ending of RE and other. """ # # called by RE._mod1(other) when the one of the enzyme is ambiguous @@ -1607,15 +1590,14 @@ def _mod2(cls, other): class Ambiguous(AbstractCut): - """Implement the methods specific to the enzymes for which the overhang - is variable. + """Implement methods for enzymes that produce variable overhangs. Typical example : BstXI -> CCAN_NNNN^NTGG The overhang can be any sequence of 4 bases. Notes: - Blunt enzymes are always defined. even if there site is GGATCCNNN^_N - There overhang is always the same : blunt! + Blunt enzymes are always defined. Even if their site is GGATCCNNN^_N + Their overhang is always the same : blunt! Internal use only. Not meant to be instantiated. @@ -1623,20 +1605,19 @@ class Ambiguous(AbstractCut): @classmethod def _drop(cls): - """RE._drop() -> list. + """Remove cuts that are outsite of the sequence. - for internal use only. + For internal use only. - drop the site that are situated outside the sequence in linear - sequence. modify the index for site in circular sequences. + Drop the site that are situated outside the sequence in linear + sequence. Modify the index for site in circular sequences. """ length = len(cls.dna) drop = itertools.dropwhile take = itertools.takewhile if cls.dna.is_linear(): cls.results = [x for x in drop(lambda x: x < 1, cls.results)] - cls.results = [x for x in take(lambda x: x < - length, cls.results)] + cls.results = [x for x in take(lambda x: x <= length, cls.results)] else: for index, location in enumerate(cls.results): if location < 1: @@ -1652,7 +1633,7 @@ def _drop(cls): @classmethod def is_defined(cls): - """RE.is_defined() -> bool. + """Return if recognition sequence and cut are defined. True if the sequence recognised and cut is constant, i.e. the recognition site is not degenerated AND the enzyme cut inside @@ -1667,7 +1648,7 @@ def is_defined(cls): @classmethod def is_ambiguous(cls): - """RE.is_ambiguous() -> bool. + """Return if recognition sequence and cut may be ambiguous. True if the sequence recognised and cut is ambiguous, i.e. the recognition site is degenerated AND/OR the enzyme cut outside @@ -1682,7 +1663,7 @@ def is_ambiguous(cls): @classmethod def is_unknown(cls): - """RE.is_unknown() -> bool. + """Return if recognition sequence is unknown. True if the sequence is unknown, i.e. the recognition site has not been characterised yet. @@ -1696,11 +1677,11 @@ def is_unknown(cls): @classmethod def _mod2(cls, other): - """RE._mod2(other) -> bool. + """Test if other enzyme produces compatible ends for enzyme. - for internal use only + For internal use only - test for the compatibility of restriction ending of RE and other. + Test for the compatibility of restriction ending of RE and other. """ # # called by RE._mod1(other) when the one of the enzyme is ambiguous @@ -1724,9 +1705,9 @@ def _mod2(cls, other): @classmethod def elucidate(cls): - """RE.elucidate() -> str + """Return a string representing the recognition site and cuttings. - return a representation of the site with the cut on the (+) strand + Return a representation of the site with the cut on the (+) strand represented as '^' and the cut on the (-) strand as '_'. ie: >>> EcoRI.elucidate() # 5' overhang @@ -1793,8 +1774,7 @@ def elucidate(cls): class NotDefined(AbstractCut): - """Implement the methods specific to the enzymes for which the overhang - is not characterised. + """Implement methods for enzymes with non-characterized overhangs. Correspond to NoCut and Unknown. @@ -1803,13 +1783,12 @@ class NotDefined(AbstractCut): @classmethod def _drop(cls): - """RE._drop() -> list. - - for internal use only. + """Remove cuts that are outsite of the sequence. - drop the site that are situated outside the sequence in linear - sequence. modify the index for site in circular sequences. + For internal use only. + Drop the site that are situated outside the sequence in linear + sequence. Modify the index for site in circular sequences. """ if cls.dna.is_linear(): return @@ -1829,7 +1808,7 @@ def _drop(cls): @classmethod def is_defined(cls): - """RE.is_defined() -> bool. + """Return if recognition sequence and cut are defined. True if the sequence recognised and cut is constant, i.e. the recognition site is not degenerated AND the enzyme cut inside @@ -1844,7 +1823,7 @@ def is_defined(cls): @classmethod def is_ambiguous(cls): - """RE.is_ambiguous() -> bool. + """Return if recognition sequence and cut may be ambiguous. True if the sequence recognised and cut is ambiguous, i.e. the recognition site is degenerated AND/OR the enzyme cut outside @@ -1859,7 +1838,7 @@ def is_ambiguous(cls): @classmethod def is_unknown(cls): - """RE.is_unknown() -> bool. + """Return if recognition sequence is unknown. True if the sequence is unknown, i.e. the recognition site has not been characterised yet. @@ -1868,16 +1847,16 @@ def is_unknown(cls): RE.is_defined() RE.is_ambiguous() - """ + """ return True @classmethod def _mod2(cls, other): - """RE._mod2(other) -> bool. + """Test if other enzyme produces compatible ends for enzyme. - for internal use only + For internal use only. - test for the compatibility of restriction ending of RE and other. + Test for the compatibility of restriction ending of RE and other. """ # # Normally we should not arrive here. But well better safe than @@ -1891,9 +1870,9 @@ def _mod2(cls, other): @classmethod def elucidate(cls): - """RE.elucidate() -> str + """Return a string representing the recognition site and cuttings. - return a representation of the site with the cut on the (+) strand + Return a representation of the site with the cut on the (+) strand represented as '^' and the cut on the (-) strand as '_'. ie: >>> EcoRI.elucidate() # 5' overhang @@ -1910,42 +1889,39 @@ def elucidate(cls): class Commercially_available(AbstractCut): + """Implement methods for enzymes which are commercially available. + + Internal use only. Not meant to be instantiated. + """ + # # Recent addition to Rebase make this naming convention uncertain. # May be better to says enzymes which have a supplier. # - """Implement the methods specific to the enzymes which are commercially - available. - - Internal use only. Not meant to be instantiated. - """ @classmethod def suppliers(cls): - """RE.suppliers() -> print the suppliers of RE.""" + """Print a list of suppliers of the enzyme.""" for s in cls.suppl: print(suppliers_dict[s][0] + ',') return @classmethod def supplier_list(cls): - """RE.supplier_list() -> list. - - list of the supplier names for RE. - """ + """Return a list of suppliers of the enzyme.""" return [v[0] for k, v in suppliers_dict.items() if k in cls.suppl] @classmethod def buffers(cls, supplier): - """RE.buffers(supplier) -> string. + """Return the recommended buffer of the supplier for this enzyme. - not implemented yet. + Not implemented yet. """ return @classmethod def is_comm(cls): - """RE.iscomm() -> bool. + """Return if enzyme is commercially available. True if RE has suppliers. """ @@ -1953,36 +1929,32 @@ def is_comm(cls): class Not_available(AbstractCut): - """Implement the methods specific to the enzymes which are not commercially - available. + """Implement methods for enzymes which are not commercially available. Internal use only. Not meant to be instantiated. """ @staticmethod def suppliers(): - """RE.suppliers() -> print the suppliers of RE.""" + """Print a list of suppliers of the enzyme.""" return None @classmethod def supplier_list(cls): - """RE.supplier_list() -> list. - - list of the supplier names for RE. - """ + """Return a list of suppliers of the enzyme.""" return [] @classmethod def buffers(cls, supplier): - """RE.buffers(supplier) -> string. + """Return the recommended buffer of the supplier for this enzyme. - not implemented yet. + Not implemented yet. """ raise TypeError("Enzyme not commercially available.") @classmethod def is_comm(cls): - """RE.iscomm() -> bool. + """Return if enzyme is commercially available. True if RE has suppliers. """ @@ -1997,14 +1969,16 @@ def is_comm(cls): class RestrictionBatch(set): + """This class holds methods for operations with more than one enzyme.""" def __init__(self, first=(), suppliers=()): - """RestrictionBatch([sequence]) -> new RestrictionBatch.""" + """Initialize empty RB or pre-fill with enzymes (from supplier).""" first = [self.format(x) for x in first] first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]] set.__init__(self, first) self.mapping = dict.fromkeys(self) self.already_mapped = None + self.suppliers = [x for x in suppliers if x in suppliers_dict] def __str__(self): if len(self) < 5: @@ -2024,17 +1998,33 @@ def __contains__(self, other): return set.__contains__(self, other) def __div__(self, other): + """Override '/' operator to use as search method.""" return self.search(other) def __rdiv__(self, other): + """Override division with reversed operands to use as search method.""" + return self.search(other) + + def __truediv__(self, other): + """Override Python 3 division operator to use as search method. + + Like __div__. + """ + return self.search(other) + + def __rtruediv__(self, other): + """As __truediv___, with reversed operands. + + Like __rdiv__. + """ return self.search(other) def get(self, enzyme, add=False): - """B.get(enzyme[, add]) -> enzyme class. + """Check if enzyme is in batch and return it. - if add is True and enzyme is not in B add enzyme to B. - if add is False (which is the default) only return enzyme. - if enzyme is not a RestrictionType or can not be evaluated to + If add is True and enzyme is not in batch add enzyme to batch. + If add is False (which is the default) only return enzyme. + If enzyme is not a RestrictionType or can not be evaluated to a RestrictionType, raise a ValueError. """ e = self.format(enzyme) @@ -2048,9 +2038,9 @@ def get(self, enzyme, add=False): % e.__name__) def lambdasplit(self, func): - """B.lambdasplit(func) -> RestrictionBatch . + """Filter enzymes in batch with supplied function. - the new batch will contains only the enzymes for which + The new batch will contain only the enzymes for which func return True. """ d = [x for x in filter(func, self)] @@ -2059,12 +2049,12 @@ def lambdasplit(self, func): return new def add_supplier(self, letter): - """B.add_supplier(letter) -> add a new set of enzyme to B. + """Add all enzymes from a given supplier to batch. letter represents the suppliers as defined in the dictionary RestrictionDictionary.suppliers - return None. - raise a KeyError if letter is not a supplier code. + Returns None. + Raise a KeyError if letter is not a supplier code. """ supplier = suppliers_dict[letter] self.suppliers.append(letter) @@ -2073,55 +2063,60 @@ def add_supplier(self, letter): return def current_suppliers(self): - """B.current_suppliers() -> add a new set of enzyme to B. + """List the current suppliers for the restriction batch. - return a sorted list of the suppliers which have been used to + Return a sorted list of the suppliers which have been used to create the batch. """ suppl_list = sorted(suppliers_dict[x][0] for x in self.suppliers) return suppl_list def __iadd__(self, other): - """ b += other -> add other to b, check the type of other.""" + """Override '+=' for use with sets. + + b += other -> add other to b, check the type of other. + """ self.add(other) return self def __add__(self, other): - """ b + other -> new RestrictionBatch.""" + """Overide '+' for use with sets. + + b + other -> new RestrictionBatch. + """ new = self.__class__(self) new.add(other) return new def remove(self, other): - """B.remove(other) -> remove other from B if other is a - RestrictionType. + """Remove enzyme from restriction batch. Safe set.remove method. Verify that other is a RestrictionType or can be evaluated to a RestrictionType. - raise a ValueError if other can not be evaluated to a RestrictionType. - raise a KeyError if other is not in B. + Raise a ValueError if other can not be evaluated to a RestrictionType. + Raise a KeyError if other is not in B. """ return set.remove(self, self.format(other)) def add(self, other): - """B.add(other) -> add other to B if other is a RestrictionType. + """Add a restriction enzyme to the restriction batch. Safe set.add method. Verify that other is a RestrictionType or can be evaluated to a RestrictionType. - raise a ValueError if other can not be evaluated to a RestrictionType. + Raise a ValueError if other can not be evaluated to a RestrictionType. """ return set.add(self, self.format(other)) def add_nocheck(self, other): - """B.add_nocheck(other) -> add other to B. don't check type of other.""" + """Add restriction enzyme to batch without checking its type.""" return set.add(self, other) def format(self, y): - """B.format(y) -> RestrictionType or raise ValueError. + """Evaluate enzyme (name) and return it (as RestrictionType). - if y is a RestrictionType return y - if y can be evaluated to a RestrictionType return eval(y) - raise a Value Error in all other case. + If y is a RestrictionType return y. + If y can be evaluated to a RestrictionType return eval(y). + Raise a ValueError in all other case. """ try: if isinstance(y, RestrictionType): @@ -2135,17 +2130,19 @@ def format(self, y): raise ValueError('%s is not a RestrictionType' % y.__class__) def is_restriction(self, y): - """B.is_restriction(y) -> bool. + """Return if enzyme (name) is a known enzyme. - True is y or eval(y) is a RestrictionType. + True if y or eval(y) is a RestrictionType. """ return (isinstance(y, RestrictionType) or isinstance(eval(str(y)), RestrictionType)) def split(self, *classes, **bool): - """B.split(class, [class.__name__ = True]) -> new RestrictionBatch. + """Extract enzymes of a certain class and put in new RestrictionBatch. + + B.split(class, [class.__name__ = True]) -> new RestrictionBatch. - it works but it is slow, so it has really an interest when splitting + It works but it is slow, so it has really an interest when splitting over multiple conditions. """ def splittest(element): @@ -2167,38 +2164,38 @@ def splittest(element): return new def elements(self): - """B.elements() -> tuple. + """List the enzymes of the RestrictionBatch as tuple. - give all the names of the enzymes in B sorted alphabetically. + Give all the names of the enzymes in B sorted alphabetically. """ l = sorted(str(e) for e in self) return l def as_string(self): - """B.as_string() -> list. + """List the names of the enzymes of the RestrictionBatch. - return a list of the name of the elements of B. + Return a list of the name of the elements of the batch. """ return [str(e) for e in self] @classmethod def suppl_codes(cls): - """B.suppl_codes() -> dict + """Return a dicionary with supplier codes. - letter code for the suppliers + Letter code for the suppliers. """ supply = dict((k, v[0]) for k, v in suppliers_dict.items()) return supply @classmethod def show_codes(cls): - """B.show_codes() -> letter codes for the suppliers""" + """Print a list of supplier codes.""" supply = [' = '.join(i) for i in cls.suppl_codes().items()] print('\n'.join(supply)) return def search(self, dna, linear=True): - """B.search(dna) -> dict.""" + """Return a dic of cutting sites in the seq for the batch enzymes.""" # # here we replace the search method of the individual enzymes # with one unique testing method. @@ -2237,15 +2234,20 @@ def search(self, dna, linear=True): class Analysis(RestrictionBatch, PrintFormat): + """Provide methods for enhanced analysis and pretty printing.""" def __init__(self, restrictionbatch=RestrictionBatch(), sequence=DNA(''), linear=True): - """Analysis([restrictionbatch [, sequence] linear=True]) -> New Analysis class. + """Initialize an Analysis with RestrictionBatch and sequence. + + Analysis([restrictionbatch [, sequence] linear=True]) + -> New Analysis class. - For most of the method of this class if a dictionary is given it will + For most of the methods of this class if a dictionary is given it will be used as the base to calculate the results. - If no dictionary is given a new analysis using the Restriction Batch - which has been given when the Analysis class has been instantiated. + If no dictionary is given a new analysis using the RestrictionBatch + which has been given when the Analysis class has been instantiated, + will be carried out and used. """ RestrictionBatch.__init__(self, restrictionbatch) self.rb = restrictionbatch @@ -2259,17 +2261,20 @@ def __repr__(self): (repr(self.rb), repr(self.sequence), self.linear) def _sub_set(self, wanted): - """A._sub_set(other_set) -> dict. + """Filter result for keys which are in wanted. + + A._sub_set(other_set) -> dict. Internal use only. - screen the results through wanted set. + Screen the results through wanted set. Keep only the results for which the enzymes is in wanted set. """ + # It seems that this method is not used in the whole class! return dict((k, v) for k, v in self.mapping.items() if k in wanted) def _boundaries(self, start, end): - """A._boundaries(start, end) -> tuple. + """Set boundaries to correct values. Format the boundaries for use with the methods that limit the search to only part of the sequence given to analyse. @@ -2290,23 +2295,23 @@ def _boundaries(self, start, end): return start, end, self._test_normal def _test_normal(self, start, end, site): - """A._test_normal(start, end, site) -> bool. + """Test if site is between start and end. Internal use only - Test if site is in between start and end. """ return start <= site < end def _test_reverse(self, start, end, site): - """A._test_reverse(start, end, site) -> bool. + """Test if site is between end and start (for circular sequences). - Internal use only - Test if site is in between end and start (for circular sequences). + Internal use only. """ return start <= site <= len(self.sequence) or 1 <= site < end def format_output(self, dct=None, title='', s1=''): - """A.format_output([dct[, title[, s1]]]) -> dct. + """Collect data and pass to PrintFormat. + + A.format_output([dct[, title[, s1]]]) -> dct. If dct is not given the full dictionary is used. """ @@ -2315,17 +2320,22 @@ def format_output(self, dct=None, title='', s1=''): return PrintFormat.format_output(self, dct, title, s1) def print_that(self, dct=None, title='', s1=''): - """A.print_that([dct[, title[, s1[,print_]]]]) -> print the results + """Print the output of the analysis. + + A.print_that([dct[, title[, s1[,print_]]]]) -> print the results from dct. If dct is not given the full dictionary is used. + s1: Title for non-cutting enzymes This method prints the output of A.format_output() and it is here for backwards compatibility. """ print(self.format_output(dct, title, s1)) def change(self, **what): - """A.change(**attribute_name) -> Change attribute of Analysis. + """Change parameters of print output. + + A.change(**attribute_name) -> Change attribute of Analysis. It is possible to change the width of the shell by setting self.ConsoleWidth to what you want. @@ -2359,85 +2369,64 @@ def change(self, **what): return def full(self, linear=True): - """A.full() -> dict. + """Perform analysis with all enzymes of batch and return all results. + + A.full() -> dict. Full Restriction Map of the sequence. """ return self.mapping def blunt(self, dct=None): - """A.blunt([dct]) -> dict. - - Only the enzymes which have a 3'overhang restriction site. - """ + """Return only cuts that have blunt ends.""" if not dct: dct = self.mapping return dict((k, v) for k, v in dct.items() if k.is_blunt()) def overhang5(self, dct=None): - """A.overhang5([dct]) -> dict. - - Only the enzymes which have a 5' overhang restriction site. - """ + """Return only cuts that have 5' overhangs.""" if not dct: dct = self.mapping return dict((k, v) for k, v in dct.items() if k.is_5overhang()) def overhang3(self, dct=None): - """A.Overhang3([dct]) -> dict. - - Only the enzymes which have a 3'overhang restriction site. - """ + """Return only cuts that have 3' overhangs.""" if not dct: dct = self.mapping return dict((k, v) for k, v in dct.items() if k.is_3overhang()) def defined(self, dct=None): - """A.defined([dct]) -> dict. - - Only the enzymes that have a defined restriction site in Rebase. - """ + """Return only results from enzymes that produce defined overhangs.""" if not dct: dct = self.mapping return dict((k, v) for k, v in dct.items() if k.is_defined()) def with_sites(self, dct=None): - """A.with_sites([dct]) -> dict. - - Enzymes which have at least one site in the sequence. - """ + """Return only results from enzyme with at least one cut.""" if not dct: dct = self.mapping return dict((k, v) for k, v in dct.items() if v) def without_site(self, dct=None): - """A.without_site([dct]) -> dict. - - Enzymes which have no site in the sequence. - """ + """Return only results from enzymes that don't cut the sequence.""" if not dct: dct = self.mapping return dict((k, v) for k, v in dct.items() if not v) def with_N_sites(self, N, dct=None): - """A.With_N_Sites(N [, dct]) -> dict. - - Enzymes which cut N times the sequence. - """ + """Return only results from enzymes that cut the sequence N times.""" if not dct: dct = self.mapping return dict((k, v) for k, v in dct.items()if len(v) == N) def with_number_list(self, list, dct=None): + """Return only results from enzymes that cut (x,y,z,...) times.""" if not dct: dct = self.mapping return dict((k, v) for k, v in dct.items() if len(v) in list) def with_name(self, names, dct=None): - """A.with_name(list_of_names [, dct]) -> - - Limit the search to the enzymes named in list_of_names. - """ + """Return only results from enzymes which names are listed.""" for i, enzyme in enumerate(names): if enzyme not in AllEnzymes: warnings.warn("no data for the enzyme: %s" % enzyme, @@ -2448,20 +2437,14 @@ def with_name(self, names, dct=None): return dict((n, dct[n]) for n in names if n in dct) def with_site_size(self, site_size, dct=None): - """A.with_site_size(site_size [, dct]) -> - - Limit the search to the enzymes whose site is of size . - """ + """Return only results form enzymes with a given site size.""" sites = [name for name in self if name.size == site_size] if not dct: return RestrictionBatch(sites).search(self.sequence) return dict((k, v) for k, v in dct.items() if k in site_size) def only_between(self, start, end, dct=None): - """A.only_between(start, end[, dct]) -> dict. - - Enzymes that cut the sequence only in between start and end. - """ + """Return only results from enzymes that only cut within start, end.""" start, end, test = self._boundaries(start, end) if not dct: dct = self.mapping @@ -2479,7 +2462,7 @@ def only_between(self, start, end, dct=None): return d def between(self, start, end, dct=None): - """A.between(start, end [, dct]) -> dict. + """Return only results from enzymes that cut at least within borders. Enzymes that cut the sequence at least in between start and end. They may cut outside as well. @@ -2497,10 +2480,10 @@ def between(self, start, end, dct=None): return d def show_only_between(self, start, end, dct=None): - """A.show_only_between(start, end [, dct]) -> dict. + """Return only results from within start, end. - Enzymes that cut the sequence outside of the region - in between start and end but do not cut inside. + Enzymes must cut inside start/end and may also cut outside. However, + only the cutting positions within start/end will be returned. """ d = [] if start <= end: @@ -2512,7 +2495,7 @@ def show_only_between(self, start, end, dct=None): return dict(d) def only_outside(self, start, end, dct=None): - """A.only_outside(start, end [, dct]) -> dict. + """Return only results from enzymes that only cut outside start, end. Enzymes that cut the sequence outside of the region in between start and end but do not cut inside. @@ -2534,10 +2517,10 @@ def only_outside(self, start, end, dct=None): return d def outside(self, start, end, dct=None): - """A.outside((start, end [, dct]) -> dict. + """Return only results from enzymes that at least cut outside borders. Enzymes that cut outside the region in between start and end. - No test is made to know if they cut or not inside this region. + They may cut inside as well. """ start, end, test = self._boundaries(start, end) if not dct: @@ -2553,10 +2536,7 @@ def outside(self, start, end, dct=None): return d def do_not_cut(self, start, end, dct=None): - """A.do_not_cut(start, end [, dct]) -> dict. - - Enzymes that do not cut the region in between start and end. - """ + """Return only results from enzymes that don't cut between borders.""" if not dct: dct = self.mapping d = self.without_site() @@ -2631,7 +2611,8 @@ def do_not_cut(self, start, end, dct=None): # # AllEnzymes is a RestrictionBatch with all the enzymes from Rebase. # -AllEnzymes = CommOnly | NonComm +AllEnzymes = CommOnly +AllEnzymes.update(NonComm) # # Now, place the enzymes in locals so they can be imported. # diff --git a/NEWS.rst b/NEWS.rst index 7db57e31a71..fc9bfddce9f 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -33,6 +33,7 @@ possible, especially the following contributors: - Jimmy O'Donnell (first contribution) - John Kern (first contribution) - João Rodrigues +- Markus Piotrowski - Mateusz Korycinski (first contribution) - Maximilian Greil - morrme (first contribution) diff --git a/Tests/test_Restriction.py b/Tests/test_Restriction.py index f455388b154..f2f51935028 100644 --- a/Tests/test_Restriction.py +++ b/Tests/test_Restriction.py @@ -3,29 +3,31 @@ # as part of this package. # -"""Testing code for Restriction enzyme classes of Biopython. -""" +"""Testing code for Restriction enzyme classes of Biopython.""" from Bio.Restriction import Analysis, Restriction, RestrictionBatch -from Bio.Restriction import Acc65I, Asp718I, EcoRI, EcoRV, KpnI, SmaI +from Bio.Restriction import CommOnly, NonComm, AllEnzymes +from Bio.Restriction import (Acc65I, Asp718I, EcoRI, EcoRV, KpnI, SmaI, + MluCI, McrI, SacI, AanI) from Bio.Seq import Seq from Bio.Alphabet.IUPAC import IUPACAmbiguousDNA from Bio import BiopythonWarning + from sys import version_info if version_info[0] < 3: try: import unittest2 as unittest except ImportError: from Bio import MissingPythonDependencyError - raise MissingPythonDependencyError("Under Python 2 this test needs the unittest2 library") + raise MissingPythonDependencyError("Under Python 2 this test needs " + "the unittest2 library") else: import unittest class SimpleEnzyme(unittest.TestCase): - """Tests for dealing with basic enzymes using the Restriction package. - """ + """Tests for dealing with basic enzymes using the Restriction package.""" def setUp(self): base_seq = Seq("AAAA", IUPACAmbiguousDNA()) @@ -33,8 +35,7 @@ def setUp(self): IUPACAmbiguousDNA()) + base_seq def test_eco_cutting(self): - """Test basic cutting with EcoRI. - """ + """Test basic cutting with EcoRI.""" self.assertEqual(EcoRI.site, 'GAATTC') self.assertFalse(EcoRI.is_blunt()) self.assertTrue(EcoRI.is_5overhang()) @@ -49,29 +50,38 @@ def test_eco_cutting(self): self.assertEqual(len(parts), 2) def test_circular_sequences(self): - """Deal with cutting circular sequences. - """ + """Deal with cutting circular sequences.""" parts = EcoRI.catalyse(self.ecosite_seq, linear=False) self.assertEqual(len(parts), 1) locations = EcoRI.search(parts[0], linear=False) self.assertEqual(locations, [1]) + def test_shortcuts(self): + """Check if '/' and '//' work as '.search' and '.catalyse'.""" + self.assertEqual(EcoRI / self.ecosite_seq, [6]) + self.assertEqual(self.ecosite_seq / EcoRI, [6]) + self.assertEqual(len(EcoRI // self.ecosite_seq), 2) + self.assertEqual(len(self.ecosite_seq // EcoRI), 2) + + def test_cutting_penultimate_position(self): + """Check if cutting penultimate position works.""" + seq = Seq('TATGAGCTC') + # Will be cut here: TATGAGCT|C + self.assertEqual(SacI.search(seq), [9]) + class EnzymeComparison(unittest.TestCase): - """Tests for comparing various enzymes. - """ + """Tests for comparing various enzymes.""" def test_basic_isochizomers(self): - """Test to be sure isochizomer and neoschizomers are as expected. - """ + """Test to be sure isochizomer and neoschizomers are as expected.""" self.assertEqual(Acc65I.isoschizomers(), [Asp718I, KpnI]) self.assertEqual(Acc65I.elucidate(), 'G^GTAC_C') self.assertEqual(Asp718I.elucidate(), 'G^GTAC_C') self.assertEqual(KpnI.elucidate(), 'G_GTAC^C') def test_comparisons(self): - """Comparison operators between iso and neoschizomers. - """ + """Comparison operators between iso and neoschizomers.""" self.assertEqual(Acc65I, Acc65I) self.assertNotEqual(Acc65I, KpnI) self.assertFalse(Acc65I == Asp718I) @@ -87,8 +97,7 @@ def test_comparisons(self): class RestrictionBatchPrintTest(unittest.TestCase): - """Tests Restriction.Analysis printing functionality. - """ + """Tests Restriction.Analysis printing functionality.""" def createAnalysis(self, seq_str, batch_ary): """Restriction.Analysis creation helper method.""" @@ -97,7 +106,11 @@ def createAnalysis(self, seq_str, batch_ary): return Restriction.Analysis(rb, seq) def assertAnalysisFormat(self, analysis, expected): - """Asserts that the Restriction.Analysis make_format(print_that) matches some string.""" + """Test make_format. + + Test that the Restriction.Analysis make_format(print_that) matches + some string. + """ dct = analysis.mapping ls, nc = [], [] for k, v in dct.items(): @@ -109,7 +122,9 @@ def assertAnalysisFormat(self, analysis, expected): self.assertEqual(result.replace(' ', ''), expected.replace(' ', '')) def test_make_format_map1(self): - """Make sure print_as('map'); print_that() does not error on wrap round with no markers. + """Test that print_as('map'); print_that() correctly wraps round. + + 1. With no marker. """ analysis = self.createAnalysis( 'CCAGTCTATAATTCG' + @@ -133,7 +148,9 @@ def test_make_format_map1(self): self.assertAnalysisFormat(analysis, '\n'.join(expected)) def test_make_format_map2(self): - """Make sure print_as('map'); print_that() does not error on wrap round with marker. + """Test that print_as('map'); print_that() correctly wraps round. + + 2. With marker. """ analysis = self.createAnalysis( 'CCAGTCTATAATTCG' + @@ -165,7 +182,9 @@ def test_make_format_map2(self): self.assertAnalysisFormat(analysis, '\n'.join(expected)) def test_make_format_map3(self): - """Make sure print_as('map'); print_that() does not error on wrap round with marker restricted. + """Test that print_as('map'); print_that() correctly wraps round. + + 3. With marker restricted. """ analysis = self.createAnalysis( 'CCAGTCTATAATTCG' + @@ -196,12 +215,10 @@ def test_make_format_map3(self): class RestrictionBatches(unittest.TestCase): - """Tests for dealing with batches of restriction enzymes. - """ + """Tests for dealing with batches of restriction enzymes.""" def test_creating_batch(self): - """Creating and modifying a restriction batch. - """ + """Creating and modifying a restriction batch.""" batch = RestrictionBatch([EcoRI]) batch.add(KpnI) batch += EcoRV @@ -225,9 +242,19 @@ def test_creating_batch(self): self.assertNotIn(EcoRV, batch) self.assertNotIn('EcoRV', batch) + # Create a batch with suppliers and other supplier related methods + # These tests may be 'update sensitive' since company names and + # products may change often... + batch = RestrictionBatch((), ('S')) # Sigma + self.assertEqual(batch.current_suppliers(), + ['Sigma Chemical Corporation']) + self.assertIn(EcoRI, batch) + self.assertNotIn(AanI, batch) + batch.add_supplier('B') # Life Technologies + self.assertIn(AanI, batch) + def test_batch_analysis(self): - """Sequence analysis with a restriction batch. - """ + """Sequence analysis with a restriction batch.""" seq = Seq("AAAA" + EcoRV.site + "AAAA" + EcoRI.site + "AAAA", IUPACAmbiguousDNA()) batch = RestrictionBatch([EcoRV, EcoRI]) @@ -236,27 +263,46 @@ def test_batch_analysis(self): self.assertEqual(hits[EcoRV], [8]) self.assertEqual(hits[EcoRI], [16]) + def test_premade_batches(self): + """Test search with pre-made batches CommOnly, NoComm, AllEnzymes.""" + seq = Seq('ACCCGAATTCAAAACTGACTGATCGATCGTCGACTG', IUPACAmbiguousDNA()) + search = AllEnzymes.search(seq) + self.assertEqual(search[MluCI], [6]) + # Check if '/' operator works as 'search': + search = CommOnly / seq + self.assertEqual(search[MluCI], [6]) + # Also in reverse order: + search = seq / NonComm + self.assertEqual(search[McrI], [28]) + def test_analysis_restrictions(self): - """Test Fancier restriction analysis - """ - new_seq = Seq('TTCAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAA', IUPACAmbiguousDNA()) + """Test Fancier restriction analysis.""" + new_seq = Seq('TTCAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAA', + IUPACAmbiguousDNA()) rb = RestrictionBatch([EcoRI, KpnI, EcoRV]) ana = Analysis(rb, new_seq, linear=False) - self.assertEqual(ana.blunt(), {EcoRV: []}) # output only the result for enzymes which cut blunt + # Output only the result for enzymes which cut blunt: + self.assertEqual(ana.blunt(), {EcoRV: []}) self.assertEqual(ana.full(), {KpnI: [], EcoRV: [], EcoRI: [33]}) - self.assertEqual(ana.with_sites(), {EcoRI: [33]}) # output only the result for enzymes which have a site - self.assertEqual(ana.without_site(), {KpnI: [], EcoRV: []}) # output only the enzymes which have no site + # Output only the result for enzymes which have a site: + self.assertEqual(ana.with_sites(), {EcoRI: [33]}) + # Output only the enzymes which have no site: + self.assertEqual(ana.without_site(), {KpnI: [], EcoRV: []}) self.assertEqual(ana.with_site_size([32]), {}) - self.assertEqual(ana.only_between(1, 20), {}) # the enzymes which cut between position 1 and 20 - self.assertEqual(ana.only_between(20, 34), {EcoRI: [33]}) # etc... - self.assertEqual(ana.only_between(34, 20), {EcoRI: [33]}) # mix start end order + # The enzymes which cut between position x and y: + self.assertEqual(ana.only_between(1, 20), {}) + self.assertEqual(ana.only_between(20, 34), {EcoRI: [33]}) + # Mix start/end order: + self.assertEqual(ana.only_between(34, 20), {EcoRI: [33]}) self.assertEqual(ana.only_outside(20, 34), {}) with self.assertWarns(BiopythonWarning): ana.with_name(['fake']) self.assertEqual(ana.with_name([EcoRI]), {EcoRI: [33]}) self.assertEqual((ana._boundaries(1, 20)[:2]), (1, 20)) - self.assertEqual((ana._boundaries(20, 1)[:2]), (1, 20)) # reverse order - self.assertEqual((ana._boundaries(-1, 20)[:2]), (20, 33)) # fix negative start + # Reverse order: + self.assertEqual((ana._boundaries(20, 1)[:2]), (1, 20)) + # Fix negative start: + self.assertEqual((ana._boundaries(-1, 20)[:2]), (20, 33)) if __name__ == "__main__":