Skip to content

Commit

Permalink
Added EMBL parsing.
Browse files Browse the repository at this point in the history
Renamed GenBankIO.py to InsdcIO.py named after the International Nucleotide
Sequence Database Collaboration (INSDB) which includes EMBL, GenBank and DDBJ.
  • Loading branch information
peterc committed Feb 7, 2007
1 parent 42b83f3 commit a169bb0
Showing 1 changed file with 19 additions and 16 deletions.
35 changes: 19 additions & 16 deletions Bio/SeqIO/__init__.py
Expand Up @@ -99,11 +99,6 @@
# For most file formats reading such files is fine; The stockholm
# parser would fail.
#
# - EMBL sequence format, ideally combined with GenBank nicely.
# http://www.bioperl.org/wiki/EMBL_sequence_format
# Possibly do this in Bio.GenBank?
# See http://bugzilla.open-bio.org/show_bug.cgi?id=2059#c11
#
# - MSF multiple alignment format, aka GCG, aka PileUp format (*.msf)
# http://www.bioperl.org/wiki/MSF_multiple_alignment_format
#
Expand Down Expand Up @@ -145,8 +140,7 @@
from Bio.Align.Generic import Alignment

import FastaIO
#import EmblGenBankIO
import GenBankIO
import InsdcIO #EMBL and GenBank
import StockholmIO
import ClustalIO
import PhylipIO
Expand All @@ -168,6 +162,7 @@
"genbank" : "genbank",
"gbk" : "genbank", #Used by the NCBI
"gb" : "genbank",
"embl" : "embl",
"aln" : "clustal", #aln is almost always clustal format
"phy" : "phylip", #phy is used by clustal
"phylip" : "phylip",
Expand All @@ -180,12 +175,10 @@
}

_FormatToIterator ={"fasta" : FastaIO.FastaIterator,
"genbank" : GenBankIO.GenBankIterator,
#See http://bugzilla.open-bio.org/show_bug.cgi?id=2059#c11
#"genbank" : EmblGenBankIO.GenBankIterator,
#"genbank-cds" : EmblGenBankIO.GenBankCdsFeatureIterator,
#"embl" : EmblGenBankIO.EmblIterator, #Not written yet
#"embl-cds" : EmblGenBankIO.EmblCdsFeatureIterator,
"genbank" : InsdcIO.GenBankIterator,
"genbank-cds" : InsdcIO.GenBankCdsFeatureIterator,
"embl" : InsdcIO.EmblIterator,
"embl-cds" : InsdcIO.EmblCdsFeatureIterator,
"clustal" : ClustalIO.ClustalIterator,
"phylip" : PhylipIO.PhylipIterator,
"nexus" : NexusIO.NexusIterator,
Expand Down Expand Up @@ -1248,7 +1241,7 @@ def SequencesToAlignment(sequences, alphabet=generic_alphabet, strict=True) :
print "# Sequence Input Tests #"
print "#########################################################"

#ToDo - Check alphabet, or at least DNA/amino acid, or those
#ToDo - Check alphabet, or at least DNA/amino acid, for those
# filetype that specify it (e.g. Nexus, GenBank)
tests = [
(aln_example, "clustal", 8, "HISJ_E_COLI",
Expand Down Expand Up @@ -1294,6 +1287,12 @@ def SequencesToAlignment(sequences, alphabet=generic_alphabet, strict=True) :
"MESTLGSDLARLVRVWRALIDHRLKPLELTQTHWVTLHNINRLPPEQSQIQLAKAIGIEQ" + \
"PSLVRTLDQLEEKGLITRHTCANDRRAKRIKLTEQSSPIIEQVDGVICSTRKEILGGISP" + \
"DEIELLSGLIDKLERNIIQLQSK", True),
(gbk_example, "genbank-cds", 3, "AAA98667.1",
'MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQFVPINRHPALIDYIEE' + \
'LILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVDKDDQIITETEVFDEFRSS' + \
'LNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNRRVDSLEEKAEIERDSNWVKC' + \
'QEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEKLISGDDKILNGVYSQYEEGESI' + \
'FGSLF', True),
(swiss_example,"swiss", 3, "Q43495",
"MASVKSSSSSSSSSFISLLLLILLVIVLQSQVIECQPQQSCTASLTGLNVCAPFLVPGSP" + \
"TASTECCNAVQSINHDCMCNTMRIAAQIPAQCNLPPLSCSAN", True),
Expand All @@ -1309,8 +1308,12 @@ def SequencesToAlignment(sequences, alphabet=generic_alphabet, strict=True) :
#This uses "for x in iterator" interally.
iterator = SequenceIterator(StringIO(data), format=format)
as_list = list(iterator)
assert len(as_list) == rec_count
assert as_list[-1].id == last_id
assert len(as_list) == rec_count, \
"Expected %i records, found %i" \
% (rec_count, len(as_list))
assert as_list[-1].id == last_id, \
"Expected '%s' as last record ID, found '%s'" \
% (last_id, as_list[-1].id)
if last_seq :
assert as_list[-1].seq.tostring() == last_seq

Expand Down

0 comments on commit a169bb0

Please sign in to comment.