Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Bio/AlignIO: PEP8 whitespace cleanup.

  • Loading branch information...
commit f90bbbdc367df2ea6c2abcc0c38715e1e902c565 1 parent e54c5da
@cbrueffer cbrueffer authored peterjc committed
View
16 Bio/AlignIO/ClustalIO.py
@@ -63,7 +63,7 @@ def write_alignment(self, alignment):
#Make sure we don't get any spaces in the record
#identifier when output in the file by replacing
#them with underscores:
- line = record.id[0:30].replace(" ","_").ljust(36)
+ line = record.id[0:30].replace(" ", "_").ljust(36)
line += str(record.seq[cur_char:(cur_char + show_num)])
output += line + "\n"
@@ -105,7 +105,7 @@ def next(self):
# find the clustal version in the header line
version = None
for word in line.split():
- if word[0]=='(' and word[-1]==')':
+ if word[0] == '(' and word[-1] == ')':
word = word[1:-1]
if word[0] in '0123456789':
version = word
@@ -152,7 +152,7 @@ def next(self):
letters = int(fields[2])
except ValueError:
raise ValueError("Could not parse line, bad sequence number:\n%s" % line)
- if len(fields[1].replace("-","")) != letters:
+ if len(fields[1].replace("-", "")) != letters:
raise ValueError("Could not parse line, invalid sequence number:\n%s" % line)
elif line[0] == " ":
#Sequence consensus line...
@@ -195,7 +195,7 @@ def next(self):
if not line:
break # end of file
- if line.split(None,1)[0] in known_headers:
+ if line.split(None, 1)[0] in known_headers:
#Found concatenated alignment.
done = True
self._header = line
@@ -231,7 +231,7 @@ def next(self):
letters = int(fields[2])
except ValueError:
raise ValueError("Could not parse line, bad sequence number:\n%s" % line)
- if len(seqs[i].replace("-","")) != letters:
+ if len(seqs[i].replace("-", "")) != letters:
raise ValueError("Could not parse line, invalid sequence number:\n%s" % line)
#Read in the next line
@@ -257,7 +257,7 @@ def next(self):
% (len(ids), self.records_per_alignment))
records = (SeqRecord(Seq(s, self.alphabet), id=i, description=i)
- for (i,s) in zip(ids, seqs))
+ for (i, s) in zip(ids, seqs))
alignment = MultipleSeqAlignment(records, self.alphabet)
#TODO - Handle alignment annotation better, for now
#mimic the old parser in Bio.Clustalw
@@ -384,7 +384,7 @@ def next(self):
handle = StringIO()
ClustalWriter(handle).write_file(alignments)
handle.seek(0)
- for i,a in enumerate(ClustalIterator(handle)):
+ for i, a in enumerate(ClustalIterator(handle)):
assert a.get_alignment_length() == alignments[i].get_alignment_length()
handle.seek(0)
@@ -393,7 +393,7 @@ def next(self):
handle = StringIO()
ClustalWriter(handle).write_file([alignment])
handle.seek(0)
- for i,a in enumerate(ClustalIterator(handle)):
+ for i, a in enumerate(ClustalIterator(handle)):
assert a.get_alignment_length() == alignment.get_alignment_length()
assert len(a) == 1
View
20 Bio/AlignIO/EmbossIO.py
@@ -94,7 +94,7 @@ def next(self):
#Read in the rest of this alignment header,
#try and discover the number of records expected
#and their length
- parts = line[1:].split(":",1)
+ parts = line[1:].split(":", 1)
key = parts[0].lower().strip()
if key == "aligned_sequences":
number_of_seqs = int(parts[1].strip())
@@ -102,7 +102,7 @@ def next(self):
# Should now expect the record identifiers...
for i in range(number_of_seqs):
line = handle.readline()
- parts = line[1:].strip().split(":",1)
+ parts = line[1:].strip().split(":", 1)
assert i+1 == int(parts[0].strip())
ids.append(parts[1].strip())
assert len(ids) == number_of_seqs
@@ -136,17 +136,17 @@ def next(self):
#(an aligned seq is broken up into multiple lines)
id, start = id_start
seq, end = seq_end
- if start==end:
+ if start == end:
#Special case, either a single letter is present,
#or no letters at all.
- if seq.replace("-","") == "":
+ if seq.replace("-", "") == "":
start = int(start)
end = int(end)
else:
start = int(start) - 1
end = int(end)
else:
- assert seq.replace("-","") != ""
+ assert seq.replace("-", "") != ""
start = int(start) - 1 # python counting
end = int(end)
@@ -154,7 +154,7 @@ def next(self):
assert 0 <= index and index < number_of_seqs, \
"Expected index %i in range [0,%i)" \
% (index, number_of_seqs)
- assert id==ids[index] or id == ids[index][:len(id)]
+ assert id == ids[index] or id == ids[index][:len(id)]
if len(seq_starts) == index:
#Record the start
@@ -162,7 +162,7 @@ def next(self):
#Check the start...
if start == end:
- assert seq.replace("-","") == "", line
+ assert seq.replace("-", "") == "", line
else:
assert start - seq_starts[index] == len(seqs[index].replace("-","")), \
"Found %i chars so far for sequence %i (%s, %s), line says start %i:\n%s" \
@@ -172,9 +172,9 @@ def next(self):
seqs[index] += seq
#Check the end ...
- assert end == seq_starts[index] + len(seqs[index].replace("-","")), \
+ assert end == seq_starts[index] + len(seqs[index].replace("-", "")), \
"Found %i chars so far for sequence %i (%s, %s, start=%i), file says end %i:\n%s" \
- % (len(seqs[index].replace("-","")), index, id, repr(seqs[index]),
+ % (len(seqs[index].replace("-", "")), index, id, repr(seqs[index]),
seq_starts[index], end, line)
index += 1
@@ -614,6 +614,6 @@ def next(self):
assert len(alignments) == 1
assert len(alignments[0]) == 2
assert [r.id for r in alignments[0]] \
- == ["asis","asis"]
+ == ["asis", "asis"]
print "Done"
View
42 Bio/AlignIO/FastaIO.py
@@ -60,7 +60,7 @@ def _extract_alignment_region(alignment_seq_with_flanking, annotation):
return align_stripped[start:end]
-def FastaM10Iterator(handle, alphabet = single_letter_alphabet):
+def FastaM10Iterator(handle, alphabet=single_letter_alphabet):
"""Alignment iterator for the FASTA tool's pairwise alignment output.
This is for reading the pairwise alignments output by Bill Pearson's
@@ -149,10 +149,10 @@ def build_hsp():
#Query
#=====
record = SeqRecord(Seq(q, alphabet),
- id = query_id,
- name = "query",
- description = query_descr,
- annotations = {"original_length" : int(query_tags["sq_len"])})
+ id=query_id,
+ name="query",
+ description=query_descr,
+ annotations={"original_length": int(query_tags["sq_len"])})
#TODO - handle start/end coordinates properly. Short term hack for now:
record._al_start = int(query_tags["al_start"])
record._al_stop = int(query_tags["al_stop"])
@@ -167,16 +167,16 @@ def build_hsp():
elif query_tags["sq_type"] == "p":
record.seq.alphabet = generic_protein
if "-" in q:
- if not hasattr(record.seq.alphabet,"gap_char"):
+ if not hasattr(record.seq.alphabet, "gap_char"):
record.seq.alphabet = Gapped(record.seq.alphabet, "-")
#Match
#=====
record = SeqRecord(Seq(m, alphabet),
- id = match_id,
- name = "match",
- description = match_descr,
- annotations = {"original_length" : int(match_tags["sq_len"])})
+ id=match_id,
+ name="match",
+ description=match_descr,
+ annotations={"original_length": int(match_tags["sq_len"])})
#TODO - handle start/end coordinates properly. Short term hack for now:
record._al_start = int(match_tags["al_start"])
record._al_stop = int(match_tags["al_stop"])
@@ -189,7 +189,7 @@ def build_hsp():
elif match_tags["sq_type"] == "p":
record.seq.alphabet = generic_protein
if "-" in m:
- if not hasattr(record.seq.alphabet,"gap_char"):
+ if not hasattr(record.seq.alphabet, "gap_char"):
record.seq.alphabet = Gapped(record.seq.alphabet, "-")
return alignment
@@ -215,7 +215,7 @@ def build_hsp():
yield build_hsp()
state = state_NONE
query_descr = line[line.find(">>>")+3:].strip()
- query_id = query_descr.split(None,1)[0]
+ query_id = query_descr.split(None, 1)[0]
match_id = None
header_tags = {}
align_tags = {}
@@ -256,7 +256,7 @@ def build_hsp():
elif line.startswith(">>>"):
#Should be start of a match!
assert query_id is not None
- assert line[3:].split(", ",1)[0] == query_id, line
+ assert line[3:].split(", ", 1)[0] == query_id, line
assert match_id is None
assert not header_tags
assert not align_tags
@@ -277,7 +277,7 @@ def build_hsp():
match_seq = ""
cons_seq = ""
match_descr = line[2:].strip()
- match_id = match_descr.split(None,1)[0]
+ match_id = match_descr.split(None, 1)[0]
state = state_ALIGN_HEADER
elif line.startswith(">--"):
#End of one HSP
@@ -297,13 +297,13 @@ def build_hsp():
#Should be start of query alignment seq...
assert query_id is not None, line
assert match_id is not None, line
- assert query_id.startswith(line[1:].split(None,1)[0]), line
+ assert query_id.startswith(line[1:].split(None, 1)[0]), line
state = state_ALIGN_QUERY
elif state == state_ALIGN_QUERY:
#Should be start of match alignment seq
assert query_id is not None, line
assert match_id is not None, line
- assert match_id.startswith(line[1:].split(None,1)[0]), line
+ assert match_id.startswith(line[1:].split(None, 1)[0]), line
state = state_ALIGN_MATCH
elif state == state_NONE:
#Can get > as the last line of a histogram
@@ -316,14 +316,14 @@ def build_hsp():
#Next line(s) should be consensus seq...
elif line.startswith("; "):
if ": " in line:
- key, value = [s.strip() for s in line[2:].split(": ",1)]
+ key, value = [s.strip() for s in line[2:].split(": ", 1)]
else:
import warnings
#Seen in lalign36, specifically version 36.3.4 Apr, 2011
#Fixed in version 36.3.5b Oct, 2011(preload8)
warnings.warn("Missing colon in line: %r" % line)
try:
- key, value = [s.strip() for s in line[2:].split(" ",1)]
+ key, value = [s.strip() for s in line[2:].split(" ", 1)]
except ValueError:
raise ValueError("Bad line: %r" % line)
if state == state_QUERY_HEADER:
@@ -613,9 +613,9 @@ def build_hsp():
if os.path.splitext(filename)[-1] == ".m10":
print
print filename
- print "="*len(filename)
- for i,a in enumerate(FastaM10Iterator(open(os.path.join(path,filename)))):
- print "#%i, %s" % (i+1,a)
+ print "=" * len(filename)
+ for i, a in enumerate(FastaM10Iterator(open(os.path.join(path, filename)))):
+ print "#%i, %s" % (i+1, a)
for r in a:
if "-" in r.seq:
assert r.seq.alphabet.gap_char == "-"
View
2  Bio/AlignIO/NexusIO.py
@@ -199,7 +199,7 @@ def _classify_alphabet_for_nexus(self, alphabet):
handle = StringIO()
try:
- NexusWriter(handle).write_file([a,a])
+ NexusWriter(handle).write_file([a, a])
assert False, "Should have rejected more than one alignment!"
except ValueError:
pass
View
88 Bio/AlignIO/PhylipIO.py
@@ -60,7 +60,7 @@ def write_alignment(self, alignment, id_width=_PHYLIP_ID_WIDTH):
"""
handle = self.handle
- if len(alignment)==0:
+ if len(alignment) == 0:
raise ValueError("Must have at least one sequence")
length_of_seqs = alignment.get_alignment_length()
for record in alignment:
@@ -93,9 +93,9 @@ def write_alignment(self, alignment, id_width=_PHYLIP_ID_WIDTH):
#Either remove the banned characters, or map them to something
#else like an underscore "_" or pipe "|" character...
for char in "[](),":
- name = name.replace(char,"")
+ name = name.replace(char, "")
for char in ":;":
- name = name.replace(char,"|")
+ name = name.replace(char, "|")
name = name[:id_width]
if name in names:
raise ValueError("Repeated name %r (originally %r), "
@@ -109,10 +109,10 @@ def write_alignment(self, alignment, id_width=_PHYLIP_ID_WIDTH):
# format, separated by blanks". We'll use spaces to keep EMBOSS
# happy.
handle.write(" %i %s\n" % (len(alignment), length_of_seqs))
- block=0
+ block = 0
while True:
for name, record in zip(names, alignment):
- if block==0:
+ if block == 0:
#Write name (truncated/padded to id_width characters)
#Now truncate and right pad to expected length.
handle.write(name[:id_width].ljust(id_width))
@@ -124,7 +124,7 @@ def write_alignment(self, alignment, id_width=_PHYLIP_ID_WIDTH):
if "." in sequence:
raise ValueError("PHYLIP format no longer allows dots in "
"sequence")
- for chunk in range(0,5):
+ for chunk in range(0, 5):
i = block*50 + chunk*10
seq_segment = sequence[i:i+10]
#TODO - Force any gaps to be '-' character? Look at the
@@ -134,7 +134,7 @@ def write_alignment(self, alignment, id_width=_PHYLIP_ID_WIDTH):
if i+10 > length_of_seqs:
break
handle.write("\n")
- block=block+1
+ block = block+1
if block*50 > length_of_seqs:
break
handle.write("\n")
@@ -159,14 +159,14 @@ class PhylipIterator(AlignmentIterator):
def _is_header(self, line):
line = line.strip()
parts = filter(None, line.split())
- if len(parts)!=2:
- return False # First line should have two integers
+ if len(parts) != 2:
+ return False # First line should have two integers
try:
number_of_seqs = int(parts[0])
length_of_seqs = int(parts[1])
return True
except ValueError:
- return False # First line should have two integers
+ return False # First line should have two integers
def _split_id(self, line):
"""
@@ -197,7 +197,7 @@ def next(self):
raise StopIteration
line = line.strip()
parts = filter(None, line.split())
- if len(parts)!=2:
+ if len(parts) != 2:
raise ValueError("First line should have two integers")
try:
number_of_seqs = int(parts[0])
@@ -226,10 +226,10 @@ def next(self):
seqs.append([s])
#Look for further blocks
- line=""
+ line = ""
while True:
#Skip any blank lines between blocks...
- while ""==line.strip():
+ while "" == line.strip():
line = handle.readline()
if not line:
break # end of file
@@ -243,7 +243,7 @@ def next(self):
#print "New block..."
for i in xrange(number_of_seqs):
- s = line.strip().replace(" ","")
+ s = line.strip().replace(" ", "")
if "." in s:
raise ValueError("PHYLIP format no longer allows dots in sequence")
seqs[i].append(s)
@@ -255,7 +255,7 @@ def next(self):
records = (SeqRecord(Seq("".join(s), self.alphabet),
id=i, name=i, description=i)
- for (i,s) in zip(ids, seqs))
+ for (i, s) in zip(ids, seqs))
return MultipleSeqAlignment(records, self.alphabet)
@@ -312,7 +312,7 @@ class SequentialPhylipWriter(SequentialAlignmentWriter):
def write_alignment(self, alignment, id_width=_PHYLIP_ID_WIDTH):
handle = self.handle
- if len(alignment)==0:
+ if len(alignment) == 0:
raise ValueError("Must have at least one sequence")
length_of_seqs = alignment.get_alignment_length()
for record in alignment:
@@ -329,9 +329,9 @@ def write_alignment(self, alignment, id_width=_PHYLIP_ID_WIDTH):
#Either remove the banned characters, or map them to something
#else like an underscore "_" or pipe "|" character...
for char in "[](),":
- name = name.replace(char,"")
+ name = name.replace(char, "")
for char in ":;":
- name = name.replace(char,"|")
+ name = name.replace(char, "|")
name = name[:id_width]
if name in names:
raise ValueError("Repeated name %r (originally %r), "
@@ -382,7 +382,7 @@ def next(self):
raise StopIteration
line = line.strip()
parts = filter(None, line.split())
- if len(parts)!=2:
+ if len(parts) != 2:
raise ValueError("First line should have two integers")
try:
number_of_seqs = int(parts[0])
@@ -431,14 +431,14 @@ def next(self):
records = (SeqRecord(Seq(s, self.alphabet),
id=i, name=i, description=i)
- for (i,s) in zip(ids, seqs))
+ for (i, s) in zip(ids, seqs))
return MultipleSeqAlignment(records, self.alphabet)
-if __name__=="__main__":
+if __name__ == "__main__":
print "Running short mini-test"
- phylip_text=""" 8 286
+ phylip_text = """ 8 286
V_Harveyi_ --MKNWIKVA VAAIA--LSA A--------- ---------T VQAATEVKVG
B_subtilis MKMKKWTVLV VAALLAVLSA CG-------- ----NGNSSS KEDDNVLHVG
B_subtilis MKKALLALFM VVSIAALAAC GAGNDNQSKD NAKDGDLWAS IKKKGVLTVG
@@ -496,24 +496,24 @@ def next(self):
from cStringIO import StringIO
handle = StringIO(phylip_text)
- count=0
+ count = 0
for alignment in PhylipIterator(handle):
for record in alignment:
- count=count+1
+ count = count+1
print record.id
#print str(record.seq)
assert count == 8
- expected="""mkklvlslsl vlafssataa faaipqniri gtdptyapfe sknsqgelvg
+ expected = """mkklvlslsl vlafssataa faaipqniri gtdptyapfe sknsqgelvg
fdidlakelc krintqctfv enpldalips lkakkidaim sslsitekrq qeiaftdkly
aadsrlvvak nsdiqptves lkgkrvgvlq gttqetfgne hwapkgieiv syqgqdniys
dltagridaafqdevaaseg flkqpvgkdy kfggpsvkde klfgvgtgmg lrkednelre
- alnkafaemradgtyeklak kyfdfdvygg""".replace(" ","").replace("\n","").upper()
- assert str(record.seq).replace("-","") == expected
+ alnkafaemradgtyeklak kyfdfdvygg""".replace(" ", "").replace("\n", "").upper()
+ assert str(record.seq).replace("-", "") == expected
#From here:
#http://atgc.lirmm.fr/phyml/usersguide.html
- phylip_text2="""5 60
+ phylip_text2 = """5 60
Tax1 CCATCTCACGGTCGGTACGATACACCTGCTTTTGGCAG
Tax2 CCATCTCACGGTCAGTAAGATACACCTGCTTTTGGCGG
Tax3 CCATCTCCCGCTCAGTAAGATACCCCTGCTGTTGGCGG
@@ -526,7 +526,7 @@ def next(self):
GAAATGGTCAATCTTAAAAGGT
GAAATGGTCAATATTAAAAGGT"""
- phylip_text3="""5 60
+ phylip_text3 = """5 60
Tax1 CCATCTCACGGTCGGTACGATACACCTGCTTTTGGCAGGAAATGGTCAATATTACAAGGT
Tax2 CCATCTCACGGTCAGTAAGATACACCTGCTTTTGGCGGGAAATGGTCAACATTAAAAGAT
Tax3 CCATCTCCCGCTCAGTAAGATACCCCTGCTGTTGGCGGGAAATCGTCAATATTAAAAGGT
@@ -536,23 +536,23 @@ def next(self):
handle = StringIO(phylip_text2)
list2 = list(PhylipIterator(handle))
handle.close()
- assert len(list2)==1
- assert len(list2[0])==5
+ assert len(list2) == 1
+ assert len(list2[0]) == 5
handle = StringIO(phylip_text3)
list3 = list(PhylipIterator(handle))
handle.close()
- assert len(list3)==1
- assert len(list3[0])==5
+ assert len(list3) == 1
+ assert len(list3[0]) == 5
- for i in range(0,5):
+ for i in range(0, 5):
list2[0][i].id == list3[0][i].id
str(list2[0][i].seq) == str(list3[0][i].seq)
#From here:
#http://evolution.genetics.washington.edu/phylip/doc/sequence.html
#Note the lack of any white space between names 2 and 3 and their seqs.
- phylip_text4=""" 5 42
+ phylip_text4 = """ 5 42
Turkey AAGCTNGGGC ATTTCAGGGT
Salmo gairAAGCCTTGGC AGTGCAGGGT
H. SapiensACCGGTTGGC CGTTCAGGGT
@@ -567,7 +567,7 @@ def next(self):
#From here:
#http://evolution.genetics.washington.edu/phylip/doc/sequence.html
- phylip_text5=""" 5 42
+ phylip_text5 = """ 5 42
Turkey AAGCTNGGGC ATTTCAGGGT
GAGCCCGGGC AATACAGGGT AT
Salmo gairAAGCCTTGGC AGTGCAGGGT
@@ -579,7 +579,7 @@ def next(self):
Gorilla AAACCCTTGC CGGTACGCTT
AAACCATTGC CGGTACGCTT AA"""
- phylip_text5a=""" 5 42
+ phylip_text5a = """ 5 42
Turkey AAGCTNGGGC ATTTCAGGGT GAGCCCGGGC AATACAGGGT AT
Salmo gairAAGCCTTGGC AGTGCAGGGT GAGCCGTGGC CGGGCACGGT AT
H. SapiensACCGGTTGGC CGTTCAGGGT ACAGGTTGGC CGTTCAGGGT AA
@@ -589,14 +589,14 @@ def next(self):
handle = StringIO(phylip_text4)
list4 = list(PhylipIterator(handle))
handle.close()
- assert len(list4)==1
- assert len(list4[0])==5
+ assert len(list4) == 1
+ assert len(list4[0]) == 5
handle = StringIO(phylip_text5)
try:
list5 = list(PhylipIterator(handle))
- assert len(list5)==1
- assert len(list5[0])==5
+ assert len(list5) == 1
+ assert len(list5[0]) == 5
print "That should have failed..."
except ValueError:
print "Evil multiline non-interlaced example failed as expected"
@@ -605,8 +605,8 @@ def next(self):
handle = StringIO(phylip_text5a)
list5 = list(PhylipIterator(handle))
handle.close()
- assert len(list5)==1
- assert len(list4[0])==5
+ assert len(list5) == 1
+ assert len(list4[0]) == 5
print "Concatenation"
handle = StringIO(phylip_text4 + "\n" + phylip_text4)
@@ -623,7 +623,7 @@ def next(self):
handle.seek(0)
list6 = list(PhylipIterator(handle))
assert len(list5) == len(list6)
- for a1,a2 in zip(list5, list6):
+ for a1, a2 in zip(list5, list6):
assert len(a1) == len(a2)
for r1, r2 in zip(a1, a2):
assert r1.id == r2.id
View
78 Bio/AlignIO/StockholmIO.py
@@ -141,17 +141,17 @@ class StockholmWriter(SequentialAlignmentWriter):
#These dictionaries should be kept in sync with those
#defined in the StockholmIterator class.
- pfam_gr_mapping = {"secondary_structure" : "SS",
- "surface_accessibility" : "SA",
- "transmembrane" : "TM",
- "posterior_probability" : "PP",
- "ligand_binding" : "LI",
- "active_site" : "AS",
- "intron" : "IN"}
+ pfam_gr_mapping = {"secondary_structure": "SS",
+ "surface_accessibility": "SA",
+ "transmembrane": "TM",
+ "posterior_probability": "PP",
+ "ligand_binding": "LI",
+ "active_site": "AS",
+ "intron": "IN"}
#Following dictionary deliberately does not cover AC, DE or DR
- pfam_gs_mapping = {"organism" : "OS",
- "organism_classification" : "OC",
- "look" : "LO"}
+ pfam_gs_mapping = {"organism": "OS",
+ "organism_classification": "OC",
+ "look": "LO"}
def write_alignment(self, alignment):
"""Use this to write (another) single alignment to an open file.
@@ -192,7 +192,7 @@ def _write_record(self, record):
seq_name = record.name
#In the Stockholm file format, spaces are not allowed in the id
- seq_name = seq_name.replace(" ","_")
+ seq_name = seq_name.replace(" ", "_")
if "start" in record.annotations \
and "end" in record.annotations:
@@ -253,7 +253,7 @@ def _write_record(self, record):
#GR = per row per column sequence annotation
for key, value in record.letter_annotations.iteritems():
- if key in self.pfam_gr_mapping and len(str(value))==len(record.seq):
+ if key in self.pfam_gr_mapping and len(str(value)) == len(record.seq):
data = self.clean(str(value))
if data:
self.handle.write("#=GR %s %s %s\n"
@@ -298,17 +298,17 @@ class StockholmIterator(AlignmentIterator):
#These dictionaries should be kept in sync with those
#defined in the PfamStockholmWriter class.
- pfam_gr_mapping = {"SS" : "secondary_structure",
- "SA" : "surface_accessibility",
- "TM" : "transmembrane",
- "PP" : "posterior_probability",
- "LI" : "ligand_binding",
- "AS" : "active_site",
- "IN" : "intron"}
+ pfam_gr_mapping = {"SS": "secondary_structure",
+ "SA": "surface_accessibility",
+ "TM": "transmembrane",
+ "PP": "posterior_probability",
+ "LI": "ligand_binding",
+ "AS": "active_site",
+ "IN": "intron"}
#Following dictionary deliberately does not cover AC, DE or DR
- pfam_gs_mapping = {"OS" : "organism",
- "OC" : "organism_classification",
- "LO" : "look"}
+ pfam_gs_mapping = {"OS": "organism",
+ "OC": "organism_classification",
+ "LO": "look"}
def next(self):
try:
@@ -354,7 +354,7 @@ def next(self):
#Sequence
#Format: "<seqname> <sequence>"
assert not passed_end_alignment
- parts = [x.strip() for x in line.split(" ",1)]
+ parts = [x.strip() for x in line.split(" ", 1)]
if len(parts) != 2:
#This might be someone attempting to store a zero length sequence?
raise ValueError("Could not split line into identifier "
@@ -363,13 +363,13 @@ def next(self):
if id not in ids:
ids.append(id)
seqs.setdefault(id, '')
- seqs[id] += seq.replace(".","-")
+ seqs[id] += seq.replace(".", "-")
elif len(line) >= 5:
#Comment line or meta-data
if line[:5] == "#=GF ":
#Generic per-File annotation, free text
#Format: #=GF <feature> <free text>
- feature, text = line[5:].strip().split(None,1)
+ feature, text = line[5:].strip().split(None, 1)
#Each feature key could be used more than once,
#so store the entries as a list of strings.
if feature not in gf:
@@ -383,7 +383,7 @@ def next(self):
elif line[:5] == '#=GS ':
#Generic per-Sequence annotation, free text
#Format: "#=GS <seqname> <feature> <free text>"
- id, feature, text = line[5:].strip().split(None,2)
+ id, feature, text = line[5:].strip().split(None, 2)
#if id not in ids:
# ids.append(id)
if id not in gs:
@@ -395,14 +395,14 @@ def next(self):
elif line[:5] == "#=GR ":
#Generic per-Sequence AND per-Column markup
#Format: "#=GR <seqname> <feature> <exactly 1 char per column>"
- id, feature, text = line[5:].strip().split(None,2)
+ id, feature, text = line[5:].strip().split(None, 2)
#if id not in ids:
# ids.append(id)
if id not in gr:
gr[id] = {}
if feature not in gr[id]:
gr[id][feature] = ""
- gr[id][feature] += text.strip() # append to any previous entry
+ gr[id][feature] += text.strip() # append to any previous entry
#TODO - Should we check the length matches the alignment length?
# For iterlaced sequences the GR data can be split over
# multiple lines
@@ -432,11 +432,11 @@ def next(self):
raise ValueError("Sequences have different lengths, or repeated identifier")
name, start, end = self._identifier_split(id)
record = SeqRecord(Seq(seq, self.alphabet),
- id = id, name = name, description = id,
- annotations = {"accession":name})
+ id=id, name=name, description=id,
+ annotations={"accession": name})
#Accession will be overridden by _populate_meta_data if an explicit
#accession is provided:
- record.annotations["accession"]=name
+ record.annotations["accession"] = name
if start is not None:
record.annotations["start"] = start
@@ -458,8 +458,8 @@ def next(self):
def _identifier_split(self, identifier):
"""Returns (name,start,end) string tuple from an identier."""
if '/' in identifier:
- name, start_end = identifier.rsplit("/",1)
- if start_end.count("-")==1:
+ name, start_end = identifier.rsplit("/", 1)
+ if start_end.count("-") == 1:
try:
start, end = map(int, start_end.split("-"))
return (name, start, end)
@@ -488,7 +488,7 @@ def _get_meta_data(self, identifier, meta_dict):
This function will return an empty dictionary if no data is found."""
name, start, end = self._identifier_split(identifier)
- if name==identifier:
+ if name == identifier:
identifier_keys = [identifier]
else:
identifier_keys = [identifier, name]
@@ -509,12 +509,12 @@ def _populate_meta_data(self, identifier, record):
seq_data = self._get_meta_data(identifier, self.seq_annotation)
for feature in seq_data:
#Note this dictionary contains lists!
- if feature=="AC": # ACcession number
- assert len(seq_data[feature])==1
- record.annotations["accession"]=seq_data[feature][0]
- elif feature=="DE": # DEscription
+ if feature == "AC": # ACcession number
+ assert len(seq_data[feature]) == 1
+ record.annotations["accession"] = seq_data[feature][0]
+ elif feature == "DE": # DEscription
record.description = "\n".join(seq_data[feature])
- elif feature=="DR": # Database Reference
+ elif feature == "DR": # Database Reference
#Should we try and parse the strings?
record.dbxrefs = seq_data[feature]
elif feature in self.pfam_gs_mapping:
View
32 Bio/AlignIO/__init__.py
@@ -155,24 +155,24 @@
#Please use the same names as BioPerl and EMBOSS where possible.
_FormatToIterator = { # "fasta" is done via Bio.SeqIO
- "clustal" : ClustalIO.ClustalIterator,
- "emboss" : EmbossIO.EmbossIterator,
- "fasta-m10" : FastaIO.FastaM10Iterator,
- "nexus" : NexusIO.NexusIterator,
- "phylip" : PhylipIO.PhylipIterator,
- "phylip-sequential" : PhylipIO.SequentialPhylipIterator,
- "phylip-relaxed" : PhylipIO.RelaxedPhylipIterator,
- "stockholm" : StockholmIO.StockholmIterator,
+ "clustal": ClustalIO.ClustalIterator,
+ "emboss": EmbossIO.EmbossIterator,
+ "fasta-m10": FastaIO.FastaM10Iterator,
+ "nexus": NexusIO.NexusIterator,
+ "phylip": PhylipIO.PhylipIterator,
+ "phylip-sequential": PhylipIO.SequentialPhylipIterator,
+ "phylip-relaxed": PhylipIO.RelaxedPhylipIterator,
+ "stockholm": StockholmIO.StockholmIterator,
}
_FormatToWriter = { # "fasta" is done via Bio.SeqIO
# "emboss" : EmbossIO.EmbossWriter, (unfinished)
- "nexus" : NexusIO.NexusWriter,
- "phylip" : PhylipIO.PhylipWriter,
- "phylip-sequential" : PhylipIO.SequentialPhylipWriter,
- "phylip-relaxed" : PhylipIO.RelaxedPhylipWriter,
- "stockholm" : StockholmIO.StockholmWriter,
- "clustal" : ClustalIO.ClustalWriter,
+ "nexus": NexusIO.NexusWriter,
+ "phylip": PhylipIO.PhylipWriter,
+ "phylip-sequential": PhylipIO.SequentialPhylipWriter,
+ "phylip-relaxed": PhylipIO.RelaxedPhylipWriter,
+ "stockholm": StockholmIO.StockholmWriter,
+ "clustal": ClustalIO.ClustalWriter,
}
@@ -347,7 +347,7 @@ def parse(handle, format, seq_count=None, alphabet=None):
#Map the file format to a sequence iterator:
if format in _FormatToIterator:
iterator_generator = _FormatToIterator[format]
- if alphabet is None :
+ if alphabet is None:
i = iterator_generator(fp, seq_count)
else:
try:
@@ -432,7 +432,7 @@ def read(handle, format, seq_count=None, alphabet=None):
if second is not None:
raise ValueError("More than one record found in handle")
if seq_count:
- assert len(first)==seq_count
+ assert len(first) == seq_count
return first
Please sign in to comment.
Something went wrong with that request. Please try again.