From dd4f8e1109be42f8a79d34a325f144174e84ddb5 Mon Sep 17 00:00:00 2001 From: Christian Brueffer Date: Wed, 26 Dec 2012 12:32:25 +0800 Subject: [PATCH] Bio/SwissProt: PEP8 whitespace cleanup. --- Bio/SwissProt/KeyWList.py | 4 +-- Bio/SwissProt/__init__.py | 76 +++++++++++++++++++-------------------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/Bio/SwissProt/KeyWList.py b/Bio/SwissProt/KeyWList.py index 46159c17973..99fdb9a1f08 100644 --- a/Bio/SwissProt/KeyWList.py +++ b/Bio/SwissProt/KeyWList.py @@ -63,12 +63,12 @@ def parse(handle): # We have reached the footer break key = line[:2] - if key=="//": + if key == "//": record["DE"] = " ".join(record["DE"]) record["SY"] = " ".join(record["SY"]) yield record record = Record() - elif line[2:5]==" ": + elif line[2:5] == " ": value = line[5:].strip() if key in ("ID", "IC", "AC", "CA"): record[key] = value diff --git a/Bio/SwissProt/__init__.py b/Bio/SwissProt/__init__.py index c0e53b162ab..3407b23cc65 100644 --- a/Bio/SwissProt/__init__.py +++ b/Bio/SwissProt/__init__.py @@ -144,7 +144,7 @@ def _read(handle): if unread: value = unread + " " + value unread = "" - if key=='**': + if key == '**': #See Bug 2353, some files from the EBI have extra lines #starting "**" (two asterisks/stars). They appear #to be unofficial automated annotations. e.g. @@ -152,86 +152,86 @@ def _read(handle): #** ################# INTERNAL SECTION ################## #**HA SAM; Annotated by PicoHamap 1.88; MF_01138.1; 09-NOV-2003. pass - elif key=='ID': + elif key == 'ID': record = Record() _read_id(record, line) _sequence_lines = [] - elif key=='AC': + elif key == 'AC': accessions = [word for word in value.rstrip(";").split("; ")] record.accessions.extend(accessions) - elif key=='DT': + elif key == 'DT': _read_dt(record, line) - elif key=='DE': + elif key == 'DE': record.description.append(value.strip()) - elif key=='GN': + elif key == 'GN': if record.gene_name: record.gene_name += " " record.gene_name += value - elif key=='OS': + elif key == 'OS': record.organism.append(value) - elif key=='OG': + elif key == 'OG': record.organelle += line[5:] - elif key=='OC': + elif key == 'OC': cols = [col for col in value.rstrip(";.").split("; ")] record.organism_classification.extend(cols) - elif key=='OX': + elif key == 'OX': _read_ox(record, line) - elif key=='OH': + elif key == 'OH': _read_oh(record, line) - elif key=='RN': + elif key == 'RN': reference = Reference() _read_rn(reference, value) record.references.append(reference) - elif key=='RP': + elif key == 'RP': assert record.references, "RP: missing RN" record.references[-1].positions.append(value) - elif key=='RC': + elif key == 'RC': assert record.references, "RC: missing RN" reference = record.references[-1] unread = _read_rc(reference, value) - elif key=='RX': + elif key == 'RX': assert record.references, "RX: missing RN" reference = record.references[-1] _read_rx(reference, value) - elif key=='RL': + elif key == 'RL': assert record.references, "RL: missing RN" reference = record.references[-1] reference.location.append(value) # In UniProt release 1.12 of 6/21/04, there is a new RG # (Reference Group) line, which references a group instead of # an author. Each block must have at least 1 RA or RG line. - elif key=='RA': + elif key == 'RA': assert record.references, "RA: missing RN" reference = record.references[-1] reference.authors.append(value) - elif key=='RG': + elif key == 'RG': assert record.references, "RG: missing RN" reference = record.references[-1] reference.authors.append(value) - elif key=="RT": + elif key == "RT": assert record.references, "RT: missing RN" reference = record.references[-1] reference.title.append(value) - elif key=='CC': + elif key == 'CC': _read_cc(record, line) - elif key=='DR': + elif key == 'DR': _read_dr(record, value) - elif key=='PE': + elif key == 'PE': #TODO - Record this information? pass - elif key=='KW': + elif key == 'KW': cols = value.rstrip(";.").split('; ') record.keywords.extend(cols) - elif key=='FT': + elif key == 'FT': _read_ft(record, line) - elif key=='SQ': + elif key == 'SQ': cols = value.split() assert len(cols) == 7, "I don't understand SQ line %s" % line # Do more checking here? record.seqinfo = int(cols[1]), int(cols[3]), cols[5] - elif key==' ': + elif key == ' ': _sequence_lines.append(value.replace(" ", "").rstrip()) - elif key=='//': + elif key == '//': # Join multiline data into one string record.description = " ".join(record.description) record.organism = " ".join(record.organism) @@ -268,7 +268,7 @@ def _read_id(record, line): record.molecule_type = None record.sequence_length = int(cols[2]) else: - raise ValueError("ID line has unrecognised format:\n"+line) + raise ValueError("ID line has unrecognised format:\n" + line) # check if the data class is one of the allowed values allowed = ('STANDARD', 'PRELIMINARY', 'IPI', 'Reviewed', 'Unreviewed') if record.data_class not in allowed: @@ -398,7 +398,7 @@ def _read_oh(record, line): # Line type OH (Organism Host) for viral hosts assert line[5:].startswith("NCBI_TaxID="), "Unexpected %s" % line line = line[16:].rstrip() - assert line[-1]=="." and line.count(";")==1, line + assert line[-1] == "." and line.count(";") == 1, line taxid, name = line[:-1].split(";") record.host_taxonomy_id.append(taxid.strip()) record.host_organism.append(name.strip()) @@ -411,7 +411,7 @@ def _read_rn(reference, rn): def _read_rc(reference, value): cols = value.split(';') - if value[-1]==';': + if value[-1] == ';': unread = "" else: cols, unread = cols[:-1], cols[-1] @@ -420,7 +420,7 @@ def _read_rc(reference, value): return # The token is everything before the first '=' character. i = col.find("=") - if i>=0: + if i >= 0: token, text = col[:i], col[i+1:] comment = token.lstrip(), text reference.comments.append(comment) @@ -440,7 +440,7 @@ def _read_rx(reference, value): # have extraneous information in the RX line. Check for # this and chop it out of the line. # (noticed by katel@worldpath.net) - value = value.replace(' [NCBI, ExPASy, Israel, Japan]','') + value = value.replace(' [NCBI, ExPASy, Israel, Japan]', '') # RX lines can also be used of the form # RX PubMed=9603189; @@ -478,9 +478,9 @@ def _read_rx(reference, value): def _read_cc(record, line): key, value = line[5:8], line[9:].rstrip() - if key=='-!-': # Make a new comment + if key == '-!-': # Make a new comment record.comments.append(value) - elif key==' ': # add to the previous comment + elif key == ' ': # add to the previous comment if not record.comments: # TCMO_STRGA in Release 37 has comment with no topic record.comments.append(value) @@ -509,15 +509,15 @@ def _read_ft(record, line): except ValueError: to_res = line[16:22].lstrip() #if there is a feature_id (FTId), store it away - if line[29:35]==r"/FTId=": + if line[29:35] == r"/FTId=": ft_id = line[35:70].rstrip()[:-1] description = "" else: - ft_id ="" + ft_id = "" description = line[29:70].rstrip() if not name: # is continuation of last one assert not from_res and not to_res - name, from_res, to_res, old_description,old_ft_id = record.features[-1] + name, from_res, to_res, old_description, old_ft_id = record.features[-1] del record.features[-1] description = ("%s %s" % (old_description, description)).strip() @@ -543,7 +543,7 @@ def _read_ft(record, line): second_seq = second_seq.replace(" ", "") # reassemble the description description = first_seq + " -> " + second_seq + extra_info - record.features.append((name, from_res, to_res, description,ft_id)) + record.features.append((name, from_res, to_res, description, ft_id)) if __name__ == "__main__":