Skip to content
Browse files

Various whitespace fixes, mostly around commas and colons.

  • Loading branch information...
1 parent 9c220e5 commit 83f9b75384d81bb7c8a4b351a5eae81baf271822 @cbrueffer cbrueffer committed with peterjc
View
20 Bio/Affy/CelFile.py
@@ -39,27 +39,27 @@ def read(handle):
for line in handle:
if not line.strip():
continue
- if line[:8]=="[HEADER]":
+ if line[:8] == "[HEADER]":
section = "HEADER"
- elif line[:11]=="[INTENSITY]":
+ elif line[:11] == "[INTENSITY]":
section = "INTENSITY"
record.intensities = numpy.zeros((record.nrows, record.ncols))
record.stdevs = numpy.zeros((record.nrows, record.ncols))
record.npix = numpy.zeros((record.nrows, record.ncols), int)
- elif line[0]=="[":
+ elif line[0] == "[":
section = ""
- elif section=="HEADER":
+ elif section == "HEADER":
keyword, value = line.split("=", 1)
- if keyword=="Cols":
+ if keyword == "Cols":
record.ncols = int(value)
- elif keyword=="Rows":
+ elif keyword == "Rows":
record.nrows = int(value)
- elif section=="INTENSITY":
+ elif section == "INTENSITY":
if "=" in line:
continue
words = line.split()
y, x = map(int, words[:2])
- record.intensities[x,y] = float(words[2])
- record.stdevs[x,y] = float(words[3])
- record.npix[x,y] = int(words[4])
+ record.intensities[x, y] = float(words[2])
+ record.stdevs[x, y] = float(words[3])
+ record.npix[x, y] = int(words[4])
return record
View
10 Bio/Application/__init__.py
@@ -73,7 +73,7 @@ def __init__(self, returncode, cmd, stdout="", stderr=""):
def __str__(self):
#get first line of any stderr message
try:
- msg = self.stderr.lstrip().split("\n",1)[0].rstrip()
+ msg = self.stderr.lstrip().split("\n", 1)[0].rstrip()
except:
msg = ""
if msg:
@@ -195,13 +195,13 @@ def __init__(self, cmd, **kwargs):
#Beware of binding-versus-assignment confusion issues
def getter(name):
- return lambda x : x._get_parameter(name)
+ return lambda x: x._get_parameter(name)
def setter(name):
- return lambda x, value : x.set_parameter(name, value)
+ return lambda x, value: x.set_parameter(name, value)
def deleter(name):
- return lambda x : x._clear_parameter(name)
+ return lambda x: x._clear_parameter(name)
doc = p.description
if isinstance(p, _Switch):
@@ -334,7 +334,7 @@ def _check_value(self, value, name, check_function):
"""
if check_function is not None:
is_good = check_function(value) # May raise an exception
- assert is_good in [0,1,True,False]
+ assert is_good in [0, 1, True, False]
if not is_good:
raise ValueError("Invalid parameter value %r for parameter %s"
% (value, name))
View
14 Bio/Cluster/__init__.py
@@ -83,7 +83,7 @@ def _savetree(jobname, tree, order, transpose):
order1 = nodeorder[index1]
counts1 = nodecounts[index1]
outputfile.write(nodeID[index1]+"\t")
- nodedist[nodeindex] = max(nodedist[nodeindex],nodedist[index1])
+ nodedist[nodeindex] = max(nodedist[nodeindex], nodedist[index1])
else:
order1 = order[min1]
counts1 = 1
@@ -93,7 +93,7 @@ def _savetree(jobname, tree, order, transpose):
order2 = nodeorder[index2]
counts2 = nodecounts[index2]
outputfile.write(nodeID[index2]+"\t")
- nodedist[nodeindex] = max(nodedist[nodeindex],nodedist[index2])
+ nodedist[nodeindex] = max(nodedist[nodeindex], nodedist[index2])
else:
order2 = order[min2]
counts2 = 1
@@ -493,7 +493,7 @@ def save(self, jobname, geneclusters=None, expclusters=None):
calculated by kcluster.
"""
- (ngenes,nexps) = numpy.shape(self.data)
+ (ngenes, nexps) = numpy.shape(self.data)
if self.gorder is None:
gorder = numpy.arange(ngenes)
else:
@@ -538,7 +538,7 @@ def save(self, jobname, geneclusters=None, expclusters=None):
else:
expindex = numpy.argsort(eorder)
filename = filename + postfix
- self._savedata(filename,gid,aid,geneindex,expindex)
+ self._savedata(filename, gid, aid, geneindex, expindex)
def _savekmeans(self, filename, clusterids, order, transpose):
# Save a k-means clustering solution
@@ -582,7 +582,7 @@ def _savedata(self, jobname, gid, aid, geneindex, expindex):
if self.mask is not None:
mask = self.mask
else:
- mask = numpy.ones((ngenes,nexps), int)
+ mask = numpy.ones((ngenes, nexps), int)
if self.gweight is not None:
gweight = self.gweight
else:
@@ -621,8 +621,8 @@ def _savedata(self, jobname, gid, aid, geneindex, expindex):
(self.geneid[i], genename[i], gweight[i]))
for j in expindex:
outputfile.write('\t')
- if mask[i,j]:
- outputfile.write(str(self.data[i,j]))
+ if mask[i, j]:
+ outputfile.write(str(self.data[i, j]))
outputfile.write('\n')
outputfile.close()
View
2 Bio/Compass/__init__.py
@@ -157,7 +157,7 @@ def __read_names(record, line):
record.hit = m.group(2)
-def __read_threshold(record,line):
+def __read_threshold(record, line):
if not line.startswith("Threshold"):
raise ValueError("Line does not start with 'Threshold':\n%s" % line)
m = __regex["threshold"].search(line)
View
24 Bio/Entrez/__init__.py
@@ -94,8 +94,8 @@ def epost(db, **keywds):
Raises an IOError exception if there's a network error.
"""
- cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/epost.fcgi'
- variables = {'db' : db}
+ cgi = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/epost.fcgi'
+ variables = {'db': db}
variables.update(keywds)
return _open(cgi, variables, post=True)
@@ -125,8 +125,8 @@ def efetch(db, **keywds):
Warning: The NCBI changed the default retmode in Feb 2012, so many
databases which previously returned text output now give XML.
"""
- cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
- variables = {'db' : db}
+ cgi = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
+ variables = {'db': db}
keywords = keywds
if "id" in keywds and isinstance(keywds["id"], list):
#Fix for NCBI change (probably part of EFetch 2,0, Feb 2012) where
@@ -168,9 +168,9 @@ def esearch(db, term, **keywds):
True
"""
- cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
- variables = {'db' : db,
- 'term' : term}
+ cgi = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
+ variables = {'db': db,
+ 'term': term}
variables.update(keywds)
return _open(cgi, variables)
@@ -208,7 +208,7 @@ def elink(**keywds):
This is explained in much more detail in the Biopython Tutorial.
"""
- cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi'
+ cgi = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi'
variables = {}
variables.update(keywds)
return _open(cgi, variables)
@@ -236,7 +236,7 @@ def einfo(**keywds):
True
"""
- cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi'
+ cgi = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi'
variables = {}
variables.update(keywds)
return _open(cgi, variables)
@@ -268,7 +268,7 @@ def esummary(**keywds):
Computational biology and chemistry
"""
- cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi'
+ cgi = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi'
variables = {}
variables.update(keywds)
return _open(cgi, variables)
@@ -302,7 +302,7 @@ def egquery(**keywds):
True
"""
- cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/egquery.fcgi'
+ cgi = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/egquery.fcgi'
variables = {}
variables.update(keywds)
return _open(cgi, variables)
@@ -331,7 +331,7 @@ def espell(**keywds):
biopython
"""
- cgi='http://eutils.ncbi.nlm.nih.gov/entrez/eutils/espell.fcgi'
+ cgi = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/espell.fcgi'
variables = {}
variables.update(keywds)
return _open(cgi, variables)
View
4 Bio/ExPASy/__init__.py
@@ -80,7 +80,7 @@ def sprot_search_ful(text, make_wild=None, swissprot=1, trembl=None,
Search SwissProt by full text.
"""
- variables = {'SEARCH' : text}
+ variables = {'SEARCH': text}
if make_wild:
variables['makeWild'] = 'on'
if swissprot:
@@ -102,7 +102,7 @@ def sprot_search_de(text, swissprot=1, trembl=None,
organelle.
"""
- variables = {'SEARCH' : text}
+ variables = {'SEARCH': text}
if swissprot:
variables['S'] = 'on'
if trembl:
View
8 Bio/FSSP/FSSPTools.py
@@ -17,13 +17,13 @@ def __init__(self):
self.data = {}
-def mult_align(sum_dict,align_dict):
+def mult_align(sum_dict, align_dict):
"""Returns a biopython multiple alignment instance (Bio.Align.Generic)"""
mult_align_dict = {}
for j in align_dict.abs(1).pos_align_dict:
mult_align_dict[j] = ''
- for i in range(1,len(align_dict)+1):
+ for i in range(1, len(align_dict)+1):
# loop on positions
for j in align_dict.abs(i).pos_align_dict:
# loop within a position
@@ -53,7 +53,7 @@ def mult_align(sum_dict,align_dict):
# Took me ~160 seconds for the largest FSSP file (1reqA.fssp)
#
-def filter(sum_dict,align_dict,filter_attribute,low_bound, high_bound):
+def filter(sum_dict, align_dict, filter_attribute, low_bound, high_bound):
"""filters a passed summary section and alignment section according to a numeric
attribute in the summary section. Returns new summary and alignment sections"""
new_sum_dict = FSSP.FSSPSumDict()
@@ -62,7 +62,7 @@ def filter(sum_dict,align_dict,filter_attribute,low_bound, high_bound):
# new_align_dict[i] = copy.copy(align_dict[i])
# new_align_dict = copy.copy(align_dict)
for prot_num in sum_dict:
- attr_value = getattr(sum_dict[prot_num],filter_attribute)
+ attr_value = getattr(sum_dict[prot_num], filter_attribute)
if attr_value >= low_bound and attr_value <= high_bound:
new_sum_dict[prot_num] = sum_dict[prot_num]
prot_numbers = new_sum_dict.keys()
View
30 Bio/FSSP/__init__.py
@@ -16,7 +16,7 @@
from Bio import Alphabet
fff_rec = fssp_rec.fff_rec
header_records = {
- 'database' : re.compile('^DATABASE'),
+ 'database': re.compile('^DATABASE'),
'pdbid': re.compile('^PDBID'),
'header': re.compile('^HEADER'),
'compnd': re.compile('^COMPND'),
@@ -44,24 +44,24 @@ def __init__(self):
self.seqlength = 0
self.nalign = 0
- def fill_header(self,inline):
+ def fill_header(self, inline):
for i in header_records:
if header_records[i].match(inline):
if i == 'database' or i == 'seqlength' or i == 'nalign':
- setattr(self,i,int(inline.split()[1]))
+ setattr(self, i, int(inline.split()[1]))
elif i == 'compnd' or i == 'author':
- setattr(self,i,inline.split()[1:])
+ setattr(self, i, inline.split()[1:])
elif i == 'source' or i == 'header':
attr = inline[inline.find(' ')+1:].strip()
- setattr(self,i,attr)
+ setattr(self, i, attr)
else:
- setattr(self,i,inline.split()[1])
+ setattr(self, i, inline.split()[1])
class PosAlign(object):
- def __init__(self,inStr):
+ def __init__(self, inStr):
inStr = inStr.strip()
- if len(inStr) != 1 and len(inStr)!= 2:
+ if len(inStr) != 1 and len(inStr) != 2:
raise ValueError('PosAlign: length not 2 chars' + inStr)
if inStr == '..':
self.aa = '-'
@@ -88,7 +88,7 @@ def __repr__(self):
class FSSPSumRec(object):
""" Contains info from an FSSP summary record"""
- def __init__(self,in_str):
+ def __init__(self, in_str):
self.raw = in_str
in_rec = in_str.strip().split()
# print in_rec
@@ -127,7 +127,7 @@ def __repr__(self):
class FSSPAlignRec(object):
- def __init__(self,in_fff_rec):
+ def __init__(self, in_fff_rec):
# print in_fff_rec
self.abs_res_num = int(in_fff_rec[fssp_rec.align.abs_res_num])
self.pdb_res_num = in_fff_rec[fssp_rec.align.pdb_res_num].strip()
@@ -144,7 +144,7 @@ def __init__(self,in_fff_rec):
self.pos_align_dict = {}
self.PosAlignList = []
- def add_align_list(self,align_list):
+ def add_align_list(self, align_list):
for i in align_list:
self.PosAlignList.append(PosAlign(i))
@@ -171,16 +171,16 @@ def build_resnum_list(self):
# Given an absolute residue number & chain, returns the relevant fssp
# record
- def abs(self,num):
+ def abs(self, num):
return self[self.abs_res_dict[num]]
# Given an PDB residue number & chain, returns the relevant fssp
# record
- def pdb(self,num):
+ def pdb(self, num):
return self[self.pdb_res_dict[num]]
# Returns a sequence string
- def sequence(self,num):
+ def sequence(self, num):
s = ''
sorted_pos_nums = self.abs_res_dict.keys()
sorted_pos_nums.sort()
@@ -259,7 +259,7 @@ def read_fssp(fssp_handle):
curline = fssp_handle.readline() # Read the next line
while alignments_rec.match(curline):
align_rec = FSSPAlignRec(fff_rec(curline))
- key = align_rec.chain_id+align_rec.res_name+str(align_rec.pdb_res_num)
+ key = align_rec.chain_id + align_rec.res_name + str(align_rec.pdb_res_num)
align_list = curline[fssp_rec.align.start_aa_list:].strip().split()
if key not in align_dict:
align_dict[key] = align_rec
View
12 Bio/FSSP/fssp_rec.py
@@ -1,6 +1,6 @@
# A superclass for reading [f]ixed-column type [f]lat-[f]ile records. (e.g.
class fff_rec:
- def __init__(self,inrec=''):
+ def __init__(self, inrec=''):
self.data = inrec
def __repr__(self):
@@ -10,7 +10,7 @@ def __repr__(self):
def __len__(self):
return len(self.data)
- def __getitem__(self,index):
+ def __getitem__(self, index):
if isinstance(index, slice):
return self.data[index]
elif (isinstance(index, tuple) or isinstance(index, list)) \
@@ -23,13 +23,13 @@ def __getitem__(self,index):
# Definition of the align section in a FSSP file
class align(object):
- abs_res_num = (0,4)
- pdb_res_num = (4,9)
+ abs_res_num = (0, 4)
+ pdb_res_num = (4, 9)
chain_id = 10
res_name = 12
ss1 = 15
turn3 = 17
turn4 = 18
- turn5 = (20,22)
- acc = (34,37)
+ turn5 = (20, 22)
+ acc = (34, 37)
start_aa_list = 42
View
4 Bio/File.py
@@ -110,7 +110,7 @@ def next(self):
return next
def readlines(self, *args, **keywds):
- lines = self._saved + self._handle.readlines(*args,**keywds)
+ lines = self._saved + self._handle.readlines(*args, **keywds)
self._saved = []
return lines
@@ -118,7 +118,7 @@ def readline(self, *args, **keywds):
if self._saved:
line = self._saved.pop(0)
else:
- line = self._handle.readline(*args,**keywds)
+ line = self._handle.readline(*args, **keywds)
return line
def read(self, size=-1):
View
2 Bio/HotRand.py
@@ -29,7 +29,7 @@ class HotCache(object):
def __init__( self ):
# self.url = 'http://www.fourmilab.ch/cgi-bin/uncgi/Hotbits?num=5000&min=1&max=6&col=1'
self.url = 'http://www.random.org/cgi-bin/randbyte?'
- self.query = { 'nbytes' : 128, 'fmt' : 'h' }
+ self.query = { 'nbytes': 128, 'fmt': 'h' }
self.fill_hot_cache()
def fill_hot_cache( self ):
View
2 Bio/MaxEntropy.py
@@ -332,6 +332,6 @@ def udf3(ts, cl):
user_functions=[udf1, udf2, udf3] # must be an iterable type
xe=train(xcar, ycar, user_functions)
- for xv,yv in zip(xcar, ycar):
+ for xv, yv in zip(xcar, ycar):
xc=classify(xe, xv)
print 'Pred:', xv, 'gives', xc, 'y is', yv
View
4 Bio/NaiveBayes.py
@@ -33,7 +33,7 @@ def _contents(items):
term = 1.0/len(items)
counts = {}
for item in items:
- counts[item] = counts.get(item,0) + term
+ counts[item] = counts.get(item, 0) + term
return counts
@@ -92,7 +92,7 @@ def calculate(nb, observation, scale=0):
lp_observation = 0.0 # P(observation)
if scale: # Only calculate this if requested.
# log P(observation) = log SUM_i P(observation|class_i)P(class_i)
- obs = numpy.exp(numpy.clip(lp_prior+lp_observation_class,-700,+700))
+ obs = numpy.exp(numpy.clip(lp_prior+lp_observation_class, -700, +700))
lp_observation = numpy.log(sum(obs))
# Calculate log P(class|observation).
View
24 Bio/Seq.py
@@ -41,7 +41,7 @@ def _maketrans(complement_mapping):
after = ''.join(complement_mapping.values())
before = before + before.lower()
after = after + after.lower()
- if sys.version_info[0] == 3 :
+ if sys.version_info[0] == 3:
return str.maketrans(before, after)
else:
return string.maketrans(before, after)
@@ -105,7 +105,7 @@ def __init__(self, data, alphabet = Alphabet.generic_alphabet):
# A data property is/was a Seq API requirement
# Note this is read only since the Seq object is meant to be imutable
@property
- def data(self) :
+ def data(self):
"""Sequence as a string (DEPRECATED).
This is a read only property provided for backwards compatility with
@@ -204,7 +204,7 @@ def __len__(self):
"""Returns the length of the sequence, use len(my_seq)."""
return len(self._data) # Seq API requirement
- def __getitem__(self, index) : # Seq API requirement
+ def __getitem__(self, index): # Seq API requirement
"""Returns a subsequence of single letter, use my_seq[index]."""
#Note since Python 2.0, __getslice__ is deprecated
#and __getitem__ is used instead.
@@ -282,7 +282,7 @@ def __add__(self, other):
if isinstance(other, SeqRecord):
#Get the SeqRecord's __radd__ to handle this
return NotImplemented
- else :
+ else:
raise TypeError
def __radd__(self, other):
@@ -837,7 +837,7 @@ def transcribe(self):
alphabet = IUPAC.ambiguous_rna
else:
alphabet = Alphabet.generic_rna
- return Seq(str(self).replace('T','U').replace('t','u'), alphabet)
+ return Seq(str(self).replace('T', 'U').replace('t', 'u'), alphabet)
def back_transcribe(self):
"""Returns the DNA sequence from an RNA sequence. New Seq object.
@@ -1493,9 +1493,9 @@ def ungap(self, gap=None):
"""
#Offload the alphabet stuff
s = Seq(self._character, self.alphabet).ungap()
- if s :
+ if s:
return UnknownSeq(self._length, s.alphabet, self._character)
- else :
+ else:
return Seq("", s.alphabet)
@@ -1807,7 +1807,7 @@ def complement(self):
d = ambiguous_rna_complement
else:
d = ambiguous_dna_complement
- c = dict([(x.lower(), y.lower()) for x,y in d.iteritems()])
+ c = dict([(x.lower(), y.lower()) for x, y in d.iteritems()])
d.update(c)
self.data = map(lambda c: d[c], self.data)
self.data = array.array(self.array_indicator, self.data)
@@ -1890,7 +1890,7 @@ def transcribe(dna):
elif isinstance(dna, MutableSeq):
return dna.toseq().transcribe()
else:
- return dna.replace('T','U').replace('t','u')
+ return dna.replace('T', 'U').replace('t', 'u')
def back_transcribe(rna):
@@ -1912,7 +1912,7 @@ def back_transcribe(rna):
elif isinstance(rna, MutableSeq):
return rna.toseq().back_transcribe()
else:
- return rna.replace('U','T').replace('u','t')
+ return rna.replace('U', 'T').replace('u', 't')
def _translate_str(sequence, table, stop_symbol="*", to_stop=False,
@@ -1989,7 +1989,7 @@ def _translate_str(sequence, table, stop_symbol="*", to_stop=False,
sequence = sequence[3:-3]
amino_acids = ["M"]
n = len(sequence)
- for i in xrange(0,n-n%3,3):
+ for i in xrange(0, n-n%3, 3):
codon = sequence[i:i+3]
try:
amino_acids.append(forward_table[codon])
@@ -2136,7 +2136,7 @@ def reverse_complement(sequence):
def _test():
"""Run the Bio.Seq module's doctests (PRIVATE)."""
- if sys.version_info[0:2] == (3,1):
+ if sys.version_info[0:2] == (3, 1):
print "Not running Bio.Seq doctest on Python 3.1"
print "See http://bugs.python.org/issue7490"
else:
View
13 Bio/SeqFeature.py
@@ -288,14 +288,14 @@ def extract(self, parent_sequence):
#(the MutableSeq reverse complement acts in situ)
parent_sequence = parent_sequence.toseq()
if self.sub_features:
- if self.location_operator!="join":
+ if self.location_operator != "join":
raise ValueError(self.location_operator)
if self.location.strand == -1:
#This is a special case given how the GenBank parser works.
#Must avoid doing the reverse complement twice.
parts = []
for f_sub in self.sub_features[::-1]:
- assert f_sub.location.strand==-1
+ assert f_sub.location.strand == -1
parts.append(f_sub.location.extract(parent_sequence))
else:
#This copes with mixed strand features:
@@ -303,7 +303,8 @@ def extract(self, parent_sequence):
for f_sub in self.sub_features]
#We use addition rather than a join to avoid alphabet issues:
f_seq = parts[0]
- for part in parts[1:] : f_seq += part
+ for part in parts[1:]:
+ f_seq += part
return f_seq
else:
return self.location.extract(parent_sequence)
@@ -1416,15 +1417,15 @@ def _test():
"""
import doctest
import os
- if os.path.isdir(os.path.join("..","Tests")):
+ if os.path.isdir(os.path.join("..", "Tests")):
print "Running doctests..."
cur_dir = os.path.abspath(os.curdir)
- os.chdir(os.path.join("..","Tests"))
+ os.chdir(os.path.join("..", "Tests"))
doctest.testmod()
os.chdir(cur_dir)
del cur_dir
print "Done"
- elif os.path.isdir(os.path.join("Tests")) :
+ elif os.path.isdir(os.path.join("Tests")):
print "Running doctests..."
cur_dir = os.path.abspath(os.curdir)
os.chdir(os.path.join("Tests"))
View
16 Bio/SeqRecord.py
@@ -73,7 +73,7 @@ def __init__(self, length):
def __setitem__(self, key, value):
#The check hasattr(self, "_length") is to cope with pickle protocol 2
#I couldn't seem to avoid this with __getstate__ and __setstate__
- if not hasattr(value,"__len__") or not hasattr(value,"__getitem__") \
+ if not hasattr(value, "__len__") or not hasattr(value, "__getitem__") \
or (hasattr(self, "_length") and len(value) != self._length):
raise TypeError("We only allow python sequences (lists, tuples or "
"strings) of length %i." % self._length)
@@ -240,7 +240,7 @@ def _set_per_letter_annotations(self, value):
self._per_letter_annotations = _RestrictedDict(length=0)
self._per_letter_annotations.update(value)
letter_annotations = property(
- fget=lambda self : self._per_letter_annotations,
+ fget=lambda self: self._per_letter_annotations,
fset=_set_per_letter_annotations,
doc="""Dictionary of per-letter-annotation for the sequence.
@@ -303,7 +303,7 @@ def _set_seq(self, value):
#e.g. seq is None
self._per_letter_annotations = _RestrictedDict(length=0)
- seq = property(fget=lambda self : self._seq,
+ seq = property(fget=lambda self: self._seq,
fset=_set_seq,
doc="The sequence itself, as a Seq or MutableSeq object.")
@@ -829,11 +829,11 @@ def __add__(self, other):
answer.name = self.name
if self.description == other.description:
answer.description = self.description
- for k,v in self.annotations.iteritems():
+ for k, v in self.annotations.iteritems():
if k in other.annotations and other.annotations[k] == v:
answer.annotations[k] = v
#Can append matching per-letter-annotation
- for k,v in self.letter_annotations.iteritems():
+ for k, v in self.letter_annotations.iteritems():
if k in other.letter_annotations:
answer.letter_annotations[k] = v + other.letter_annotations[k]
return answer
@@ -1109,7 +1109,7 @@ def reverse_complement(self, id=False, name=False, description=False,
#so we need to resort in case of overlapping features.
#NOTE - In the common case of gene before CDS (and similar) with
#the exact same locations, this will still maintain gene before CDS
- answer.features.sort(key=lambda x : x.location.start.position)
+ answer.features.sort(key=lambda x: x.location.start.position)
if isinstance(annotations, dict):
answer.annotations = annotations
elif annotations:
@@ -1132,10 +1132,10 @@ def _test():
"""
import doctest
import os
- if os.path.isdir(os.path.join("..","Tests")):
+ if os.path.isdir(os.path.join("..", "Tests")):
print "Running doctests..."
cur_dir = os.path.abspath(os.curdir)
- os.chdir(os.path.join("..","Tests"))
+ os.chdir(os.path.join("..", "Tests"))
doctest.testmod()
os.chdir(cur_dir)
del cur_dir
View
2 Bio/SeqUtils/CheckSum.py
@@ -107,7 +107,7 @@ def seguid(seq):
m.update(_as_bytes(seq.upper()))
try:
#For Python 3+
- return base64.encodebytes(m.digest()).decode().replace("\n","").rstrip("=")
+ return base64.encodebytes(m.digest()).decode().replace("\n", "").rstrip("=")
except AttributeError:
pass
# For all other Pythons
View
26 Bio/SeqUtils/CodonUsage.py
@@ -3,19 +3,19 @@
from Bio import SeqIO # To parse a FASTA file
-CodonsDict = {'TTT':0, 'TTC':0, 'TTA':0, 'TTG':0, 'CTT':0,
-'CTC':0, 'CTA':0, 'CTG':0, 'ATT':0, 'ATC':0,
-'ATA':0, 'ATG':0, 'GTT':0, 'GTC':0, 'GTA':0,
-'GTG':0, 'TAT':0, 'TAC':0, 'TAA':0, 'TAG':0,
-'CAT':0, 'CAC':0, 'CAA':0, 'CAG':0, 'AAT':0,
-'AAC':0, 'AAA':0, 'AAG':0, 'GAT':0, 'GAC':0,
-'GAA':0, 'GAG':0, 'TCT':0, 'TCC':0, 'TCA':0,
-'TCG':0, 'CCT':0, 'CCC':0, 'CCA':0, 'CCG':0,
-'ACT':0, 'ACC':0, 'ACA':0, 'ACG':0, 'GCT':0,
-'GCC':0, 'GCA':0, 'GCG':0, 'TGT':0, 'TGC':0,
-'TGA':0, 'TGG':0, 'CGT':0, 'CGC':0, 'CGA':0,
-'CGG':0, 'AGT':0, 'AGC':0, 'AGA':0, 'AGG':0,
-'GGT':0, 'GGC':0, 'GGA':0, 'GGG':0}
+CodonsDict = {'TTT': 0, 'TTC': 0, 'TTA': 0, 'TTG': 0, 'CTT': 0,
+'CTC': 0, 'CTA': 0, 'CTG': 0, 'ATT': 0, 'ATC': 0,
+'ATA': 0, 'ATG': 0, 'GTT': 0, 'GTC': 0, 'GTA': 0,
+'GTG': 0, 'TAT': 0, 'TAC': 0, 'TAA': 0, 'TAG': 0,
+'CAT': 0, 'CAC': 0, 'CAA': 0, 'CAG': 0, 'AAT': 0,
+'AAC': 0, 'AAA': 0, 'AAG': 0, 'GAT': 0, 'GAC': 0,
+'GAA': 0, 'GAG': 0, 'TCT': 0, 'TCC': 0, 'TCA': 0,
+'TCG': 0, 'CCT': 0, 'CCC': 0, 'CCA': 0, 'CCG': 0,
+'ACT': 0, 'ACC': 0, 'ACA': 0, 'ACG': 0, 'GCT': 0,
+'GCC': 0, 'GCA': 0, 'GCG': 0, 'TGT': 0, 'TGC': 0,
+'TGA': 0, 'TGG': 0, 'CGT': 0, 'CGC': 0, 'CGA': 0,
+'CGG': 0, 'AGT': 0, 'AGC': 0, 'AGA': 0, 'AGG': 0,
+'GGT': 0, 'GGC': 0, 'GGA': 0, 'GGG': 0}
# this dictionary shows which codons encode the same AA
View
16 Bio/SeqUtils/CodonUsageIndices.py
@@ -4,11 +4,11 @@
# sharp Ecoli index for codon adaption index.
# from Sharp & Li, Nucleic Acids Res. 1987
SharpEcoliIndex = {
-'GCA':0.586, 'GCC':0.122, 'GCG':0.424, 'GCT':1, 'AGA':0.004, 'AGG':0.002, 'CGA':0.004,
-'CGC':0.356, 'CGG':0.004, 'CGT':1, 'AAC':1, 'AAT':0.051, 'GAC':1, 'GAT':0.434, 'TGC':1,
-'TGT':0.5, 'CAA':0.124, 'CAG':1, 'GAA':1, 'GAG':0.259, 'GGA':0.01, 'GGC':0.724, 'GGG':0.019,
-'GGT':1, 'CAC':1, 'CAT':0.291, 'ATA':0.003, 'ATC':1, 'ATT':0.185, 'CTA':0.007, 'CTC':0.037,
-'CTG':1, 'CTT':0.042, 'TTA':0.02, 'TTG':0.02, 'AAA':1, 'AAG':0.253, 'ATG':1, 'TTC':1, 'TTT':0.296,
-'CCA':0.135, 'CCC':0.012, 'CCG':1, 'CCT':0.07, 'AGC':0.41, 'AGT':0.085, 'TCA':0.077, 'TCC':0.744,
-'TCG':0.017, 'TCT':1, 'ACA':0.076, 'ACC':1,'ACG':0.099, 'ACT':0.965, 'TGG':1, 'TAC':1, 'TAT':0.239,
-'GTA':0.495, 'GTC':0.066,'GTG':0.221, 'GTT':1}
+'GCA': 0.586, 'GCC': 0.122, 'GCG': 0.424, 'GCT': 1, 'AGA': 0.004, 'AGG': 0.002, 'CGA': 0.004,
+'CGC': 0.356, 'CGG': 0.004, 'CGT': 1, 'AAC': 1, 'AAT': 0.051, 'GAC': 1, 'GAT': 0.434, 'TGC': 1,
+'TGT': 0.5, 'CAA': 0.124, 'CAG': 1, 'GAA': 1, 'GAG': 0.259, 'GGA': 0.01, 'GGC': 0.724, 'GGG': 0.019,
+'GGT': 1, 'CAC': 1, 'CAT': 0.291, 'ATA': 0.003, 'ATC': 1, 'ATT': 0.185, 'CTA': 0.007, 'CTC': 0.037,
+'CTG': 1, 'CTT': 0.042, 'TTA': 0.02, 'TTG': 0.02, 'AAA': 1, 'AAG': 0.253, 'ATG': 1, 'TTC': 1, 'TTT': 0.296,
+'CCA': 0.135, 'CCC': 0.012, 'CCG': 1, 'CCT': 0.07, 'AGC': 0.41, 'AGT': 0.085, 'TCA': 0.077, 'TCC': 0.744,
+'TCG': 0.017, 'TCT': 1, 'ACA': 0.076, 'ACC': 1, 'ACG': 0.099, 'ACT': 0.965, 'TGG': 1, 'TAC': 1, 'TAT': 0.239,
+'GTA': 0.495, 'GTC': 0.066, 'GTG': 0.221, 'GTT': 1}
View
8 Bio/SeqUtils/IsoelectricPoint.py
@@ -20,10 +20,10 @@
"""
-positive_pKs = { 'Nterm': 7.5, 'K': 10.0, 'R': 12.0, 'H':5.98 }
-negative_pKs = { 'Cterm': 3.55, 'D': 4.05, 'E': 4.45, 'C':9.0, 'Y':10.0 }
-pKcterminal= {'D':4.55, 'E':4.75}
-pKnterminal = {'A':7.59, 'M':7.0, 'S':6.93, 'P':8.36, 'T':6.82, 'V':7.44, 'E':7.7}
+positive_pKs = { 'Nterm': 7.5, 'K': 10.0, 'R': 12.0, 'H': 5.98 }
+negative_pKs = { 'Cterm': 3.55, 'D': 4.05, 'E': 4.45, 'C': 9.0, 'Y': 10.0 }
+pKcterminal= { 'D': 4.55, 'E': 4.75 }
+pKnterminal = { 'A': 7.59, 'M': 7.0, 'S': 6.93, 'P': 8.36, 'T': 6.82, 'V': 7.44, 'E': 7.7 }
charged_aas = ('K', 'R', 'H', 'D', 'E', 'C', 'Y')
View
82 Bio/SeqUtils/MeltingTemp.py
@@ -9,7 +9,7 @@
import math
-def Tm_staluc(s,dnac=50,saltc=50,rna=0):
+def Tm_staluc(s, dnac=50, saltc=50, rna=0):
"""Returns DNA/DNA tm using nearest neighbor thermodynamics.
dnac is DNA concentration [nM]
@@ -61,7 +61,7 @@ def Tm_staluc(s,dnac=50,saltc=50,rna=0):
def tercorr(stri):
deltah = 0
deltas = 0
- if rna==0:
+ if rna == 0:
#DNA/DNA
#Allawi and SantaLucia (1997). Biochemistry 36 : 10581-10594
if stri.startswith('G') or stri.startswith('C'):
@@ -78,8 +78,8 @@ def tercorr(stri):
deltas -= 4.1
dhL = dh + deltah
dsL = ds + deltas
- return dsL,dhL
- elif rna==1:
+ return dsL, dhL
+ elif rna == 1:
#RNA
if stri.startswith('G') or stri.startswith('C'):
deltah -= 3.61
@@ -98,17 +98,17 @@ def tercorr(stri):
dhL = dh + deltah
dsL = ds + deltas
# print "delta h=",dhL
- return dsL,dhL
+ return dsL, dhL
else:
raise ValueError("rna = %r not supported" % rna)
- def overcount(st,p):
+ def overcount(st, p):
"""Returns how many p are on st, works even for overlapping"""
ocu = 0
x = 0
while True:
try:
- i = st.index(p,x)
+ i = st.index(p, x)
except ValueError:
break
ocu += 1
@@ -123,46 +123,46 @@ def overcount(st,p):
k = (dnac/4.0)*1e-9
#With complementary check on, the 4.0 should be changed to a variable.
- if rna==0:
+ if rna == 0:
#DNA/DNA
#Allawi and SantaLucia (1997). Biochemistry 36 : 10581-10594
- vh = vh + (overcount(sup,"AA"))*7.9 + (overcount(sup,"TT"))*\
- 7.9 + (overcount(sup,"AT"))*7.2 + (overcount(sup,"TA"))*7.2 \
- + (overcount(sup,"CA"))*8.5 + (overcount(sup,"TG"))*8.5 + \
- (overcount(sup,"GT"))*8.4 + (overcount(sup,"AC"))*8.4
- vh = vh + (overcount(sup,"CT"))*7.8+(overcount(sup,"AG"))*\
- 7.8 + (overcount(sup,"GA"))*8.2 + (overcount(sup,"TC"))*8.2
- vh = vh + (overcount(sup,"CG"))*10.6+(overcount(sup,"GC"))*\
- 9.8 + (overcount(sup,"GG"))*8 + (overcount(sup,"CC"))*8
- vs = vs + (overcount(sup,"AA"))*22.2+(overcount(sup,"TT"))*\
- 22.2 + (overcount(sup,"AT"))*20.4 + (overcount(sup,"TA"))*21.3
- vs = vs + (overcount(sup,"CA"))*22.7+(overcount(sup,"TG"))*\
- 22.7 + (overcount(sup,"GT"))*22.4 + (overcount(sup,"AC"))*22.4
- vs = vs + (overcount(sup,"CT"))*21.0+(overcount(sup,"AG"))*\
- 21.0 + (overcount(sup,"GA"))*22.2 + (overcount(sup,"TC"))*22.2
- vs = vs + (overcount(sup,"CG"))*27.2+(overcount(sup,"GC"))*\
- 24.4 + (overcount(sup,"GG"))*19.9 + (overcount(sup,"CC"))*19.9
+ vh = vh + (overcount(sup, "AA"))*7.9 + (overcount(sup, "TT"))*\
+ 7.9 + (overcount(sup, "AT"))*7.2 + (overcount(sup, "TA"))*7.2 \
+ + (overcount(sup, "CA"))*8.5 + (overcount(sup, "TG"))*8.5 + \
+ (overcount(sup, "GT"))*8.4 + (overcount(sup, "AC"))*8.4
+ vh = vh + (overcount(sup, "CT"))*7.8+(overcount(sup, "AG"))*\
+ 7.8 + (overcount(sup, "GA"))*8.2 + (overcount(sup, "TC"))*8.2
+ vh = vh + (overcount(sup, "CG"))*10.6+(overcount(sup, "GC"))*\
+ 9.8 + (overcount(sup, "GG"))*8 + (overcount(sup, "CC"))*8
+ vs = vs + (overcount(sup, "AA"))*22.2+(overcount(sup, "TT"))*\
+ 22.2 + (overcount(sup, "AT"))*20.4 + (overcount(sup, "TA"))*21.3
+ vs = vs + (overcount(sup, "CA"))*22.7+(overcount(sup, "TG"))*\
+ 22.7 + (overcount(sup, "GT"))*22.4 + (overcount(sup, "AC"))*22.4
+ vs = vs + (overcount(sup, "CT"))*21.0+(overcount(sup, "AG"))*\
+ 21.0 + (overcount(sup, "GA"))*22.2 + (overcount(sup, "TC"))*22.2
+ vs = vs + (overcount(sup, "CG"))*27.2+(overcount(sup, "GC"))*\
+ 24.4 + (overcount(sup, "GG"))*19.9 + (overcount(sup, "CC"))*19.9
ds = vs
dh = vh
- elif rna==1:
+ elif rna == 1:
#RNA/RNA hybridisation of Xia et al (1998)
#Biochemistry 37: 14719-14735
- vh = vh+(overcount(sup,"AA"))*6.82+(overcount(sup,"TT"))*6.6+\
- (overcount(sup,"AT"))*9.38 + (overcount(sup,"TA"))*7.69+\
- (overcount(sup,"CA"))*10.44 + (overcount(sup,"TG"))*10.5+\
- (overcount(sup,"GT"))*11.4 + (overcount(sup,"AC"))*10.2
- vh = vh + (overcount(sup,"CT"))*10.48 + (overcount(sup,"AG"))\
- *7.6+(overcount(sup,"GA"))*12.44+(overcount(sup,"TC"))*13.3
- vh = vh + (overcount(sup,"CG"))*10.64 + (overcount(sup,"GC"))\
- *14.88+(overcount(sup,"GG"))*13.39+(overcount(sup,"CC"))*12.2
- vs = vs + (overcount(sup,"AA"))*19.0 + (overcount(sup,"TT"))*\
- 18.4+(overcount(sup,"AT"))*26.7+(overcount(sup,"TA"))*20.5
- vs = vs + (overcount(sup,"CA"))*26.9 + (overcount(sup,"TG"))*\
- 27.8 + (overcount(sup,"GT"))*29.5 + (overcount(sup,"AC"))*26.2
- vs = vs + (overcount(sup,"CT"))*27.1 + (overcount(sup,"AG"))*\
- 19.2 + (overcount(sup,"GA"))*32.5 + (overcount(sup,"TC"))*35.5
- vs = vs + (overcount(sup,"CG"))*26.7 + (overcount(sup,"GC"))\
- *36.9 + (overcount(sup,"GG"))*32.7 + (overcount(sup,"CC"))*29.7
+ vh = vh+(overcount(sup, "AA"))*6.82+(overcount(sup, "TT"))*6.6+\
+ (overcount(sup, "AT"))*9.38 + (overcount(sup, "TA"))*7.69+\
+ (overcount(sup, "CA"))*10.44 + (overcount(sup, "TG"))*10.5+\
+ (overcount(sup, "GT"))*11.4 + (overcount(sup, "AC"))*10.2
+ vh = vh + (overcount(sup, "CT"))*10.48 + (overcount(sup, "AG"))\
+ *7.6+(overcount(sup, "GA"))*12.44+(overcount(sup, "TC"))*13.3
+ vh = vh + (overcount(sup, "CG"))*10.64 + (overcount(sup, "GC"))\
+ *14.88+(overcount(sup, "GG"))*13.39+(overcount(sup, "CC"))*12.2
+ vs = vs + (overcount(sup, "AA"))*19.0 + (overcount(sup, "TT"))*\
+ 18.4+(overcount(sup, "AT"))*26.7+(overcount(sup, "TA"))*20.5
+ vs = vs + (overcount(sup, "CA"))*26.9 + (overcount(sup, "TG"))*\
+ 27.8 + (overcount(sup, "GT"))*29.5 + (overcount(sup, "AC"))*26.2
+ vs = vs + (overcount(sup, "CT"))*27.1 + (overcount(sup, "AG"))*\
+ 19.2 + (overcount(sup, "GA"))*32.5 + (overcount(sup, "TC"))*35.5
+ vs = vs + (overcount(sup, "CG"))*26.7 + (overcount(sup, "GC"))\
+ *36.9 + (overcount(sup, "GG"))*32.7 + (overcount(sup, "CC"))*29.7
ds = vs
dh = vh
else:
View
32 Bio/SeqUtils/ProtParamData.py
@@ -1,10 +1,10 @@
# This module contains indices to be used with ProtParam
# Kyte & Doolittle index of hydrophobicity
-kd = {'A': 1.8,'R':-4.5,'N':-3.5,'D':-3.5,'C': 2.5,
- 'Q':-3.5,'E':-3.5,'G':-0.4,'H':-3.2,'I': 4.5,
- 'L': 3.8,'K':-3.9,'M': 1.9,'F': 2.8,'P':-1.6,
- 'S':-0.8,'T':-0.7,'W':-0.9,'Y':-1.3,'V': 4.2 }
+kd = {'A': 1.8, 'R':-4.5, 'N':-3.5, 'D':-3.5, 'C': 2.5,
+ 'Q':-3.5, 'E':-3.5, 'G':-0.4, 'H':-3.2, 'I': 4.5,
+ 'L': 3.8, 'K':-3.9, 'M': 1.9, 'F': 2.8, 'P':-1.6,
+ 'S':-0.8, 'T':-0.7, 'W':-0.9, 'Y':-1.3, 'V': 4.2 }
# Flexibility
# Normalized flexibility parameters (B-values), average (Vihinen et al., 1994)
@@ -17,23 +17,23 @@
# Hydrophilicity
# 1 Hopp & Wood
# Proc. Natl. Acad. Sci. U.S.A. 78:3824-3828(1981).
-hw = {'A':-0.5,'R':3.0, 'N':0.2, 'D':3.0, 'C':-1.0,
- 'Q':0.2, 'E':3.0, 'G':0.0, 'H':-0.5,'I':-1.8,
- 'L':-1.8,'K':3.0, 'M':-1.3,'F':-2.5,'P':0.0,
- 'S':0.3, 'T':-0.4,'W':-3.4,'Y':-2.3,'V':-1.5 }
+hw = {'A':-0.5, 'R': 3.0, 'N': 0.2, 'D': 3.0, 'C':-1.0,
+ 'Q': 0.2, 'E': 3.0, 'G': 0.0, 'H':-0.5, 'I':-1.8,
+ 'L':-1.8, 'K': 3.0, 'M':-1.3, 'F':-2.5, 'P': 0.0,
+ 'S': 0.3, 'T':-0.4, 'W':-3.4, 'Y':-2.3, 'V':-1.5 }
# Surface accessibility
# 1 Emini Surface fractional probability
-em = {'A':0.815,'R':1.475,'N':1.296,'D':1.283,'C':0.394,
- 'Q':1.348,'E':1.445,'G':0.714,'H':1.180,'I':0.603,
- 'L':0.603,'K':1.545,'M':0.714,'F':0.695,'P':1.236,
- 'S':1.115,'T':1.184,'W':0.808,'Y':1.089,'V':0.606 }
+em = {'A': 0.815, 'R': 1.475, 'N': 1.296, 'D': 1.283, 'C': 0.394,
+ 'Q': 1.348, 'E': 1.445, 'G': 0.714, 'H': 1.180, 'I': 0.603,
+ 'L': 0.603, 'K': 1.545, 'M': 0.714, 'F': 0.695, 'P': 1.236,
+ 'S': 1.115, 'T': 1.184, 'W': 0.808, 'Y': 1.089, 'V': 0.606 }
# 2 Janin Interior to surface transfer energy scale
-ja = {'A': 0.28,'R':-1.14,'N':-0.55,'D':-0.52,'C': 0.97,
- 'Q':-0.69,'E':-1.01,'G': 0.43,'H':-0.31,'I': 0.60,
- 'L': 0.60,'K':-1.62,'M': 0.43,'F': 0.46,'P':-0.42,
- 'S':-0.19,'T':-0.32,'W': 0.29,'Y':-0.15,'V': 0.60 }
+ja = {'A': 0.28, 'R':-1.14, 'N':-0.55, 'D':-0.52, 'C': 0.97,
+ 'Q':-0.69, 'E':-1.01, 'G': 0.43, 'H':-0.31, 'I': 0.60,
+ 'L': 0.60, 'K':-1.62, 'M': 0.43, 'F': 0.46, 'P':-0.42,
+ 'S':-0.19, 'T':-0.32, 'W': 0.29, 'Y':-0.15, 'V': 0.60 }
# A two dimentional dictionary for calculating the instability index.
View
58 Bio/SeqUtils/__init__.py
@@ -37,7 +37,7 @@ def GC(seq):
Note that this will return zero for an empty sequence.
"""
try:
- gc = sum(map(seq.count,['G','C','g','c','S','s']))
+ gc = sum(map(seq.count, ['G', 'C', 'g', 'c', 'S', 's']))
return gc*100.0/len(seq)
except ZeroDivisionError:
return 0.0
@@ -57,21 +57,21 @@ def GC123(seq):
nucleotides.
"""
d= {}
- for nt in ['A','T','G','C']:
- d[nt] = [0,0,0]
+ for nt in ['A', 'T', 'G', 'C']:
+ d[nt] = [0, 0, 0]
- for i in range(0,len(seq),3):
+ for i in range(0, len(seq), 3):
codon = seq[i:i+3]
if len(codon) < 3:
codon += ' '
- for pos in range(0,3):
- for nt in ['A','T','G','C']:
+ for pos in range(0, 3):
+ for nt in ['A', 'T', 'G', 'C']:
if codon[pos] == nt or codon[pos] == nt.lower():
d[nt][pos] += 1
gc = {}
gcall = 0
nall = 0
- for i in range(0,3):
+ for i in range(0, 3):
try:
n = d['G'][i] + d['C'][i] +d['T'][i] + d['A'][i]
gc[i] = (d['G'][i] + d['C'][i])*100.0/n
@@ -137,7 +137,7 @@ def xGC_skew(seq, window = 1000, zoom = 100,
ty +=20
canvas.create_text(X0, ty, text = 'Accumulated GC Skew', fill = 'magenta')
ty +=20
- canvas.create_oval(x1,y1, x2, y2)
+ canvas.create_oval(x1, y1, x2, y2)
acc = 0
start = 0
@@ -151,7 +151,7 @@ def xGC_skew(seq, window = 1000, zoom = 100,
y1 = Y0 + r1 * cos(alpha)
x2 = X0 + r2 * sin(alpha)
y2 = Y0 + r2 * cos(alpha)
- canvas.create_line(x1,y1,x2,y2, fill = 'blue')
+ canvas.create_line(x1, y1, x2, y2, fill = 'blue')
# accumulated GC skew
r1 = r - 50
r2 = r1 - acc
@@ -159,7 +159,7 @@ def xGC_skew(seq, window = 1000, zoom = 100,
y1 = Y0 + r1 * cos(alpha)
x2 = X0 + r2 * sin(alpha)
y2 = Y0 + r2 * cos(alpha)
- canvas.create_line(x1,y1,x2,y2, fill = 'magenta')
+ canvas.create_line(x1, y1, x2, y2, fill = 'magenta')
canvas.update()
start += window
@@ -209,13 +209,13 @@ def nt_search(seq, subseq):
######################
# {{{
-_THREECODE = {'A':'Ala', 'B':'Asx', 'C':'Cys', 'D':'Asp',
- 'E':'Glu', 'F':'Phe', 'G':'Gly', 'H':'His',
- 'I':'Ile', 'K':'Lys', 'L':'Leu', 'M':'Met',
- 'N':'Asn', 'P':'Pro', 'Q':'Gln', 'R':'Arg',
- 'S':'Ser', 'T':'Thr', 'V':'Val', 'W':'Trp',
- 'Y':'Tyr', 'Z':'Glx', 'X':'Xaa',
- 'U':'Sel', 'O':'Pyl', 'J':'Xle',
+_THREECODE = {'A': 'Ala', 'B': 'Asx', 'C': 'Cys', 'D': 'Asp',
+ 'E': 'Glu', 'F': 'Phe', 'G': 'Gly', 'H': 'His',
+ 'I': 'Ile', 'K': 'Lys', 'L': 'Leu', 'M': 'Met',
+ 'N': 'Asn', 'P': 'Pro', 'Q': 'Gln', 'R': 'Arg',
+ 'S': 'Ser', 'T': 'Thr', 'V': 'Val', 'W': 'Trp',
+ 'Y': 'Tyr', 'Z': 'Glx', 'X': 'Xaa',
+ 'U': 'Sel', 'O': 'Pyl', 'J': 'Xle',
}
@@ -340,7 +340,7 @@ def six_frame_translations(seq, genetic_code = 1):
comp = anti[::-1]
length = len(seq)
frames = {}
- for i in range(0,3):
+ for i in range(0, 3):
frames[i+1] = translate(seq[i:], genetic_code)
frames[-(i+1)] = translate(anti[i:], genetic_code)[::-1]
@@ -350,27 +350,27 @@ def six_frame_translations(seq, genetic_code = 1):
else:
short = seq
header = 'GC_Frame: '
- for nt in ['a','t','g','c']:
+ for nt in ['a', 't', 'g', 'c']:
header += '%s:%d ' % (nt, seq.count(nt.upper()))
- header += '\nSequence: %s, %d nt, %0.2f %%GC\n\n\n' % (short.lower(),length, GC(seq))
+ header += '\nSequence: %s, %d nt, %0.2f %%GC\n\n\n' % (short.lower(), length, GC(seq))
res = header
- for i in range(0,length,60):
+ for i in range(0, length, 60):
subseq = seq[i:i+60]
csubseq = comp[i:i+60]
p = i//3
res = res + '%d/%d\n' % (i+1, i/3+1)
- res = res + ' ' + ' '.join(map(None,frames[3][p:p+20])) + '\n'
- res = res + ' ' + ' '.join(map(None,frames[2][p:p+20])) + '\n'
- res = res + ' '.join(map(None,frames[1][p:p+20])) + '\n'
+ res = res + ' ' + ' '.join(map(None, frames[3][p:p+20])) + '\n'
+ res = res + ' ' + ' '.join(map(None, frames[2][p:p+20])) + '\n'
+ res = res + ' '.join(map(None, frames[1][p:p+20])) + '\n'
# seq
res = res + subseq.lower() + '%5d %%\n' % int(GC(subseq))
res = res + csubseq.lower() + '\n'
# - frames
- res = res + ' '.join(map(None,frames[-2][p:p+20])) +' \n'
- res = res + ' ' + ' '.join(map(None,frames[-1][p:p+20])) + '\n'
- res = res + ' ' + ' '.join(map(None,frames[-3][p:p+20])) + '\n\n'
+ res = res + ' '.join(map(None, frames[-2][p:p+20])) +' \n'
+ res = res + ' ' + ' '.join(map(None, frames[-1][p:p+20])) + '\n'
+ res = res + ' ' + ' '.join(map(None, frames[-3][p:p+20])) + '\n\n'
return res
# }}}
@@ -427,10 +427,10 @@ def _test():
"""Run the module's doctests (PRIVATE)."""
import os
import doctest
- if os.path.isdir(os.path.join("..","Tests")):
+ if os.path.isdir(os.path.join("..", "Tests")):
print "Running doctests..."
cur_dir = os.path.abspath(os.curdir)
- os.chdir(os.path.join("..","Tests"))
+ os.chdir(os.path.join("..", "Tests"))
doctest.testmod()
os.chdir(cur_dir)
del cur_dir
View
12 Bio/SeqUtils/lcc.py
@@ -7,7 +7,7 @@
import math
-def lcc_mult(seq,wsize):
+def lcc_mult(seq, wsize):
"""Local Composition Complexity (LCC) values over sliding window.
Returns a list of floats, the LCC values for a sliding window over
@@ -45,9 +45,9 @@ def lcc_mult(seq,wsize):
tail = seq[0]
for x in range(tamseq-wsize):
window = upper[x+1:wsize+x+1]
- if tail==window[-1]:
+ if tail == window[-1]:
lccsal.append(lccsal[-1])
- elif tail=='A':
+ elif tail == 'A':
cant_a -= 1
if window.endswith('C'):
cant_c += 1
@@ -64,7 +64,7 @@ def lcc_mult(seq,wsize):
term_a = compone[cant_a]
term_g = compone[cant_g]
lccsal.append(-(term_a+term_c+term_t+term_g))
- elif tail=='C':
+ elif tail == 'C':
cant_c -= 1
if window.endswith('A'):
cant_a += 1
@@ -81,7 +81,7 @@ def lcc_mult(seq,wsize):
term_c = compone[cant_c]
term_g = compone[cant_g]
lccsal.append(-(term_a+term_c+term_t+term_g))
- elif tail=='T':
+ elif tail == 'T':
cant_t -= 1
if window.endswith('A'):
cant_a += 1
@@ -98,7 +98,7 @@ def lcc_mult(seq,wsize):
term_t = compone[cant_t]
term_g = compone[cant_g]
lccsal.append(-(term_a+term_c+term_t+term_g))
- elif tail=='G':
+ elif tail == 'G':
cant_g -= 1
if window.endswith('A'):
cant_a += 1
View
6 Bio/SubsMat/FreqTable.py
@@ -61,7 +61,7 @@ def _alphabet_from_input(self):
s += i
return s
- def __init__(self,in_dict,dict_type,alphabet=None):
+ def __init__(self, in_dict, dict_type, alphabet=None):
self.alphabet = alphabet
if dict_type == COUNT:
self.count = in_dict
@@ -81,7 +81,7 @@ def read_count(f):
for line in f:
key, value = line.strip().split()
count[key] = int(value)
- freq_table = FreqTable(count,COUNT)
+ freq_table = FreqTable(count, COUNT)
return freq_table
@@ -90,4 +90,4 @@ def read_freq(f):
for line in f:
key, value = line.strip().split()
freq_dict[key] = float(value)
- return FreqTable(freq_dict,FREQ)
+ return FreqTable(freq_dict, FREQ)
View
2 Bio/SubsMat/MatrixInfo.py
@@ -435,7 +435,7 @@ def _temp():
('F', 'D') : -3, ('X', 'Y') : -1, ('Z', 'R') : 0, ('F', 'H') : -3,
('B', 'F') : -2, ('F', 'L') : 2, ('X', 'Q') : -1, ('B', 'B') : 5
}
-blosum35 =_temp()
+blosum35 = _temp()
del _temp
View
94 Bio/SubsMat/__init__.py
@@ -200,7 +200,7 @@ def _correct_matrix(self):
keylist = self.keys()
for key in keylist:
if key[0] > key[1]:
- self[(key[1],key[0])] = self[key]
+ self[(key[1], key[0])] = self[key]
del self[key]
def _full_to_half(self):
@@ -218,13 +218,13 @@ def _full_to_half(self):
for i in self.ab_list:
for j in self.ab_list[:self.ab_list.index(i)+1]:
if i != j:
- self[j,i] = self[j,i] + self[i,j]
- del self[i,j]
+ self[j, i] = self[j, i] + self[i, j]
+ del self[i, j]
def _init_zero(self):
for i in self.ab_list:
for j in self.ab_list[:self.ab_list.index(i)+1]:
- self[j,i] = 0.
+ self[j, i] = 0.
def make_entropy(self):
self.entropy = 0
@@ -239,15 +239,15 @@ def sum(self):
result[letter] = 0.0
for pair, value in self.iteritems():
i1, i2 = pair
- if i1==i2:
+ if i1 == i2:
result[i1] += value
else:
result[i1] += value / 2
result[i2] += value / 2
return result
- def print_full_mat(self,f=None,format="%4d",topformat="%4s",
- alphabet=None,factor=1,non_sym=None):
+ def print_full_mat(self, f=None, format="%4d", topformat="%4s",
+ alphabet=None, factor=1, non_sym=None):
f = f or sys.stdout
# create a temporary dictionary, which holds the full matrix for
# printing
@@ -256,7 +256,7 @@ def print_full_mat(self,f=None,format="%4d",topformat="%4s",
full_mat = copy.copy(self)
for i in self:
if i[0] != i[1]:
- full_mat[(i[1],i[0])] = full_mat[i]
+ full_mat[(i[1], i[0])] = full_mat[i]
if not alphabet:
alphabet = self.ab_list
topline = ''
@@ -270,7 +270,7 @@ def print_full_mat(self,f=None,format="%4d",topformat="%4s",
if alphabet.index(j) > alphabet.index(i) and non_sym is not None:
val = non_sym
else:
- val = full_mat[i,j]
+ val = full_mat[i, j]
val *= factor
if val <= -999:
cur_str = ' ND'
@@ -278,11 +278,11 @@ def print_full_mat(self,f=None,format="%4d",topformat="%4s",
cur_str = format % val
outline = outline+cur_str
- outline = outline+'\n'
+ outline = outline + '\n'
f.write(outline)
- def print_mat(self,f=None,format="%4d",bottomformat="%4s",
- alphabet=None,factor=1):
+ def print_mat(self, f=None, format="%4d", bottomformat="%4s",
+ alphabet=None, factor=1):
"""Print a nice half-matrix. f=sys.stdout to see on the screen
User may pass own alphabet, which should contain all letters in the
alphabet of the matrix, but may be in a different order. This
@@ -299,17 +299,17 @@ def print_mat(self,f=None,format="%4d",bottomformat="%4s",
outline = i
for j in alphabet[:alphabet.index(i)+1]:
try:
- val = self[j,i]
+ val = self[j, i]
except KeyError:
- val = self[i,j]
+ val = self[i, j]
val *= factor
if val == -999:
cur_str = ' ND'
else:
cur_str = format % val
- outline = outline+cur_str
- outline = outline+'\n'
+ outline = outline + cur_str
+ outline = outline + '\n'
f.write(outline)
f.write(bottomline)
@@ -324,9 +324,9 @@ def __str__(self):
for j in range(i+1):
c2 = alphabet[j]
try:
- val = self[c2,c1]
+ val = self[c2, c1]
except KeyError:
- val = self[c1,c2]
+ val = self[c1, c2]
if val == -999:
output += ' ND'
else:
@@ -335,14 +335,14 @@ def __str__(self):
output += '%4s' * n % tuple(alphabet) + "\n"
return output
- def __sub__(self,other):
+ def __sub__(self, other):
""" returns a number which is the subtraction product of the two matrices"""
mat_diff = 0
for i in self:
mat_diff += (self[i] - other[i])
return mat_diff
- def __mul__(self,other):
+ def __mul__(self, other):
""" returns a matrix for which each entry is the multiplication product of the
two matrices passed"""
new_mat = copy.copy(self)
@@ -378,7 +378,7 @@ def calculate_relative_entropy(self, obs_freq_mat):
relative_entropy = 0.
for key, value in self.iteritems():
if value > EPSILON:
- relative_entropy += obs_freq_mat[key]*log(value)
+ relative_entropy += obs_freq_mat[key] * log(value)
relative_entropy /= log(2)
return relative_entropy
@@ -386,12 +386,12 @@ def calculate_relative_entropy(self, obs_freq_mat):
class LogOddsMatrix(SeqMat):
"""Log odds matrix"""
- def calculate_relative_entropy(self,obs_freq_mat):
+ def calculate_relative_entropy(self, obs_freq_mat):
"""Calculate and return the relative entropy with respect to an
observed frequency matrix"""
relative_entropy = 0.
for key, value in self.iteritems():
- relative_entropy += obs_freq_mat[key]*value/log(2)
+ relative_entropy += obs_freq_mat[key] * value / log(2)
return relative_entropy
@@ -406,7 +406,7 @@ def _build_obs_freq_mat(acc_rep_mat):
obs_freq_mat = ObservedFrequencyMatrix(alphabet=acc_rep_mat.alphabet,
build_later=1)
for i in acc_rep_mat:
- obs_freq_mat[i] = acc_rep_mat[i]/total
+ obs_freq_mat[i] = acc_rep_mat[i] / total
return obs_freq_mat
@@ -420,7 +420,7 @@ def _exp_freq_table_from_obs_freq(obs_freq_mat):
else:
exp_freq_table[i[0]] += obs_freq_mat[i] / 2.
exp_freq_table[i[1]] += obs_freq_mat[i] / 2.
- return FreqTable.FreqTable(exp_freq_table,FreqTable.FREQ)
+ return FreqTable.FreqTable(exp_freq_table, FreqTable.FREQ)
def _build_exp_freq_mat(exp_freq_table):
@@ -440,7 +440,7 @@ def _build_exp_freq_mat(exp_freq_table):
#
# Build the substitution matrix
#
-def _build_subs_mat(obs_freq_mat,exp_freq_mat):
+def _build_subs_mat(obs_freq_mat, exp_freq_mat):
""" Build the substitution matrix """
if obs_freq_mat.ab_list != exp_freq_mat.ab_list:
raise ValueError("Alphabet mismatch in passed matrices")
@@ -453,7 +453,7 @@ def _build_subs_mat(obs_freq_mat,exp_freq_mat):
#
# Build a log-odds matrix
#
-def _build_log_odds_mat(subs_mat,logbase=2,factor=10.0,round_digit=0,keep_nd=0):
+def _build_log_odds_mat(subs_mat, logbase=2, factor=10.0, round_digit=0, keep_nd=0):
"""_build_log_odds_mat(subs_mat,logbase=10,factor=10.0,round_digit=1):
Build a log-odds matrix
logbase=2: base of logarithm used to build (default 2)
@@ -468,7 +468,7 @@ def _build_log_odds_mat(subs_mat,logbase=2,factor=10.0,round_digit=0,keep_nd=0):
if value < EPSILON:
lo_mat[key] = -999
else:
- lo_mat[key] = round(factor*log(value)/log(logbase),round_digit)
+ lo_mat[key] = round(factor*log(value)/log(logbase), round_digit)
mat_min = min(lo_mat.values())
if not keep_nd:
for i in lo_mat:
@@ -483,14 +483,14 @@ def _build_log_odds_mat(subs_mat,logbase=2,factor=10.0,round_digit=0,keep_nd=0):
# and rounding factor. Generates a log-odds matrix, calling internal SubsMat
# functions.
#
-def make_log_odds_matrix(acc_rep_mat,exp_freq_table=None,logbase=2,
- factor=1.,round_digit=9,keep_nd=0):
+def make_log_odds_matrix(acc_rep_mat, exp_freq_table=None, logbase=2,
+ factor=1., round_digit=9, keep_nd=0):
obs_freq_mat = _build_obs_freq_mat(acc_rep_mat)
if not exp_freq_table:
exp_freq_table = _exp_freq_table_from_obs_freq(obs_freq_mat)
exp_freq_mat = _build_exp_freq_mat(exp_freq_table)
subs_mat = _build_subs_mat(obs_freq_mat, exp_freq_mat)
- lo_mat = _build_log_odds_mat(subs_mat,logbase,factor,round_digit,keep_nd)
+ lo_mat = _build_log_odds_mat(subs_mat, logbase, factor, round_digit, keep_nd)
return lo_mat
@@ -529,7 +529,7 @@ def read_text_matrix(data_file):
i = 0
for field in rec[first_col:]:
col = alphabet[i]
- matrix[(row,col)] = float(field)
+ matrix[(row, col)] = float(field)
i += 1
j += 1
# delete entries with an asterisk
@@ -544,7 +544,7 @@ def read_text_matrix(data_file):
diagALL = 3
-def two_mat_relative_entropy(mat_1,mat_2,logbase=2,diag=diagALL):
+def two_mat_relative_entropy(mat_1, mat_2, logbase=2, diag=diagALL):
rel_ent = 0.
key_list_1 = sorted(mat_1)
key_list_2 = sorted(mat_2)
@@ -594,13 +594,13 @@ def two_mat_correlation(mat_1, mat_2):
except KeyError:
raise ValueError("%s is not a common key" % ab_pair)
correlation_matrix = numpy.corrcoef(values, rowvar=0)
- correlation = correlation_matrix[0,1]
+ correlation = correlation_matrix[0, 1]
return correlation
# Jensen-Shannon Distance
# Need to input observed frequency matrices
-def two_mat_DJS(mat_1,mat_2,pi_1=0.5,pi_2=0.5):
+def two_mat_DJS(mat_1, mat_2, pi_1=0.5, pi_2=0.5):
assert mat_1.ab_list == mat_2.ab_list
assert pi_1 > 0 and pi_2 > 0 and pi_1< 1 and pi_2 <1
assert not (pi_1 + pi_2 - 1.0 > EPSILON)
@@ -612,13 +612,13 @@ def two_mat_DJS(mat_1,mat_2,pi_1=0.5,pi_2=0.5):
mat_1.make_entropy()
mat_2.make_entropy()
# print mat_1.entropy, mat_2.entropy
- dJS = sum_mat.entropy - pi_1 * mat_1.entropy - pi_2 *mat_2.entropy
+ dJS = sum_mat.entropy - pi_1 * mat_1.entropy - pi_2 * mat_2.entropy
return dJS
"""
This isn't working yet. Boo hoo!
-def two_mat_print(mat_1, mat_2, f=None,alphabet=None,factor_1=1, factor_2=1,
- format="%4d",bottomformat="%4s",topformat="%4s",
+def two_mat_print(mat_1, mat_2, f=None, alphabet=None, factor_1=1, factor_2=1,
+ format="%4d", bottomformat="%4s", topformat="%4s",
topindent=7*" ", bottomindent=1*" "):
f = f or sys.stdout
if not alphabet:
@@ -636,28 +636,28 @@ def two_mat_print(mat_1, mat_2, f=None,alphabet=None,factor_1=1, factor_2=1,
f.write(topline)
for i in alphabet:
for j in alphabet:
- print_mat[i,j] = -999
+ print_mat[i, j] = -999
diag_1 = {}
diag_2 = {}
for i in alphabet:
for j in alphabet[:alphabet.index(i)+1]:
if i == j:
- diag_1[i] = mat_1[(i,i)]
+ diag_1[i] = mat_1[(i, i)]
diag_2[i] = mat_2[(alphabet[len_alphabet-alphabet.index(i)-1],
alphabet[len_alphabet-alphabet.index(i)-1])]
else:
if i > j:
- key = (j,i)
+ key = (j, i)
else:
- key = (i,j)
+ key = (i, j)
mat_2_key = [alphabet[len_alphabet-alphabet.index(key[0])-1],
alphabet[len_alphabet-alphabet.index(key[1])-1]]
# print mat_2_key
mat_2_key.sort()
mat_2_key = tuple(mat_2_key)
- # print key ,"||", mat_2_key
+ # print key, "||", mat_2_key
print_mat[key] = mat_2[mat_2_key]
- print_mat[(key[1],key[0])] = mat_1[key]
+ print_mat[(key[1], key[0])] = mat_1[key]
for i in alphabet:
outline = i
for j in alphabet:
@@ -672,12 +672,12 @@ def two_mat_print(mat_1, mat_2, f=None,alphabet=None,factor_1=1, factor_2=1,
val_2 = format % (diag_2[i]*factor_2)
cur_str = val_1 + " " + val_2
else:
- if print_mat[(i,j)] == -999:
+ if print_mat[(i, j)] == -999:
val = ' ND'
elif alphabet.index(i) > alphabet.index(j):
- val = format % (print_mat[(i,j)]*factor_1)
+ val = format % (print_mat[(i, j)]*factor_1)
else:
- val = format % (print_mat[(i,j)]*factor_2)
+ val = format % (print_mat[(i, j)]*factor_2)
cur_str = val
outline += cur_str
outline += bottomformat % (alphabet[len_alphabet-alphabet.index(i)-1] +
View
2 Bio/kNN.py
@@ -96,7 +96,7 @@ def calculate(knn, x, weight_fn=equal_weight, distance_fn=None):
# function about twice as fast.
for i in range(len(knn.xs)):
temp[:] = x - knn.xs[i]
- dist = numpy.sqrt(numpy.dot(temp,temp))
+ dist = numpy.sqrt(numpy.dot(temp, temp))
order.append((dist, i))
order.sort()
View
20 Bio/pairwise2.py
@@ -163,27 +163,27 @@ class alignment_function:
"""
# match code -> tuple of (parameters, docstring)
match2args = {
- 'x' : ([], ''),
- 'm' : (['match', 'mismatch'],
+ 'x': ([], ''),
+ 'm': (['match', 'mismatch'],
"""match is the score to given to identical characters. mismatch is
the score given to non-identical ones."""),
- 'd' : (['match_dict'],
+ 'd': (['match_dict'],
"""match_dict is a dictionary where the keys are tuples of pairs of
characters and the values are the scores, e.g. ("A", "C") : 2.5."""),
- 'c' : (['match_fn'],
+ 'c': (['match_fn'],
"""match_fn is a callback function that takes two characters and
returns the score between them."""),
}
# penalty code -> tuple of (parameters, docstring)
penalty2args = {
- 'x' : ([], ''),
- 's' : (['open', 'extend'],
+ 'x': ([], ''),
+ 's': (['open', 'extend'],
"""open and extend are the gap penalties when a gap is opened and
extended. They should be negative."""),
- 'd' : (['openA', 'extendA', 'openB', 'extendB'],
+ 'd': (['openA', 'extendA', 'openB', 'extendB'],
"""openA and extendA are the gap penalties for sequenceA, and openB
and extendB for sequeneB. The penalties should be negative."""),
- 'c' : (['gap_A_fn', 'gap_B_fn'],
+ 'c': (['gap_A_fn', 'gap_B_fn'],
"""gap_A_fn and gap_B_fn are callback functions that takes 1) the
index where the gap is opened, and 2) the length of the gap. They
should return a gap penalty."""),
@@ -711,8 +711,8 @@ def _clean_alignments(alignments):
# duplicates, make sure begin and end are set correctly, remove
# empty alignments.
unique_alignments = []
- for align in alignments :
- if align not in unique_alignments :
+ for align in alignments:
+ if align not in unique_alignments:
unique_alignments.append(align)
i = 0