Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Removing modules Bio.SGMLExtractor, Bio.CDD, Bio.Gobase and Bio.Rebas…

…e which were deprecated in Biopython 1.46
  • Loading branch information...
commit edb3ac6b74a2fb2dde1b2a877ae6c36c4e198a0a 1 parent 890e1d5
@peterjc peterjc authored
View
108 Bio/CDD/Record.py
@@ -1,108 +0,0 @@
-# Copyright 2001 by Katharine Lindner. All rights reserved.
-# This code is part of the Biopython distribution and governed by its
-# license. Please see the LICENSE file that should have been included
-# as part of this package.
-"""Hold CDD data in a straightforward format.
-
-classes:
-o Record - All of the information in a CDD record.
-"""
-
-# standard library
-import string
-from Bio.Seq import Seq
-
-class Record( dict ):
- """Hold CDD information in a format similar to the original record.
-
- The Record class is meant to make data easy to get to when you are
- just interested in looking at CDD data.
-
- Attributes:
- cd
- description
- status
- source
- date
- reference
- taxonomy
- aligned
- representative
- range
- sequence
- """
- def __init__(self):
- dict.__init__( self )
- self[ 'references' ] = []
- self[ 'alignment_lookup' ] = {}
-
- def __str__( self ):
- output = ''
- keys = self.keys()
- keys.sort()
- for key in keys:
- output = output + '%s:\n\n' % key.upper()
- contents = self[ key ]
- if( type( contents ) == type( '' ) ):
- if( key == 'Sequence' ):
- output = output + out_multiline( contents )
- else:
- output = output + '%s\n' % contents
- elif( type( contents ) == type( {} ) ):
- output = output + output_dict( contents, 1 )
- elif( type( contents ) == type( [] ) ):
- output = output + output_list( contents, 1 )
- elif( isinstance( contents, Seq ) ):
- output = output + out_multiline( contents.data )
- output = output + '\n\n'
- return output
-
-def output_dict( dict, level = 0 ):
- output = ''
- prefix = ''
- for j in range( 0, level ):
- prefix = prefix + ' '
- keys = dict.keys()
- keys.sort()
- for key in keys:
- contents = dict[ key ]
- if( type( contents ) == type( '' ) ):
- output = output + '%s%s = %s\n' % ( prefix, key, contents )
- elif( type( contents ) == type( {} ) ):
- output = output + output_dict( contents, level + 1 )
- elif( type( contents ) == type( [] ) ):
- output = output + output_list( contents, level + 1 )
- output = output + '\n'
- return output
-
-def output_list( items, level = 0 ):
- output = ''
- prefix = ''
- for j in range( 0, level ):
- prefix = prefix + ' '
- for item in items:
- if( type( item ) == type( '' ) ):
- output = output + '%s%s\n' % ( prefix, item )
- elif( type( item ) == type( {} ) ):
- output = output + output_dict( item, level + 1 )
- elif( type( item ) == type( [] ) ):
- output = output + output_list( item, level + 1 )
- output = output + '\n'
- return output
-
-def out_multiline( multiline ):
- output = ''
- for j in range( 0, len( multiline ), 80 ):
- output = output + '%s\n' % multiline[ j: j + 80 ]
- output = output + '\n'
- return output
-
-
-
-
-
-
-
-
-
-
View
338 Bio/CDD/__init__.py
@@ -1,338 +0,0 @@
-# Copyright 2002 by Katharine Lindner. All rights reserved.
-# This code is part of the Biopython distribution and governed by its
-# license. Please see the LICENSE file that should have been included
-# as part of this package.
-"""Deal with Conserved Domain Database (CDD) entries from NCBI.
-"""
-
-import warnings
-warnings.warn("Bio.CDD was deprecated, as it cannot parse recent HTML files from the CDD database. If you want to continue to use this module, please get in contact with the Biopython developers at biopython-dev@biopython.org to avoid permanent removal of this module from Biopython", DeprecationWarning)
-
-
-# standard library
-import string
-import array
-import os
-import re
-import sgmllib
-import urlparse
-
-
-# XML from python 2.0
-from xml.sax import handler
-
-# Martel
-import Martel
-from Martel import RecordReader
-
-from Bio.FilteredReader import FilteredReader
-from Bio.FilteredReader import remove_empty_line
-from Bio.FilteredReader import remove_leading_whitespace
-from Bio.SGMLExtractor import SGMLExtractorHandle
-from Bio import File
-from Bio.Seq import Seq
-from Martel.Dispatch import Dispatcher
-import cdd_format
-import Record
-
-class Iterator:
- """Iterator interface to move over a file of CDD entries one at a time.
- Iterator expects a handle to an sgml file. It extracts data bracketed
- by specified tag pairs, then removes blank lines and leading white space.
- The parser operates on the filtered data.
- """
- def __init__(self, handle, parser = None):
- """Initialize the iterator.
-
- Arguments:
- o handle - A handle with CDD entries to iterate through.
- o parser - An optional parser to pass the entries through before
- returning them. If None, then the raw entry will be returned.
- """
- record_handle = SGMLExtractorHandle( handle, [ 'title', 'table', ] )
- filtered_handle = FilteredReader( record_handle )
- filtered_handle.filter_chain = [ remove_empty_line, remove_leading_whitespace ]
- self.handle = File.UndoHandle( filtered_handle )
- self._reader = RecordReader.Everything( self.handle )
- self._parser = parser
-
- def next(self):
- """Return the next CDD record from the handle.
-
- Will return None if we ran out of records.
- """
- data = self._reader.next()
-
- if self._parser is not None:
- if data:
- dumpfile = open( 'dump', 'w' )
- dumpfile.write( data )
- dumpfile.close()
- return self._parser.parse(File.StringHandle(data))
-
- return data
-
- def __iter__(self):
- return iter(self.next, None)
-
-class _Scanner:
- """Start up Martel to do the scanning of the file.
-
- This initialzes the Martel based parser and connects it to a handler
- that will generate events for a Feature Consumer.
- """
- def __init__(self, debug_level = 0):
- """Initialize the scanner by setting up our caches.
-
- Creating the parser takes a long time, so we want to cache it
- to reduce parsing time.
-
- Arguments:
- o debug - The level of debugging that the parser should
- display. Level 0 is no debugging, Level 2 displays the most
- debugging info (but is much slower). See Martel documentation
- for more info on this.
- """
- # a listing of all tags we are interested in scanning for
- # in the MartelParser
- self.interest_tags = [ "cd_tag", \
- "description_tag", \
- "status_tag", \
- "source_tag", \
- "date_tag", \
- "taxonomy_tag", \
- "aligned_tag", \
- "representative_tag", \
- "range_tag", \
- "sequence_tag", \
- "description_contents_multiline", \
- "status_contents_multiline", \
- "source_contents_multiline", \
- "date_contents_multiline", \
- "reference_contents_multiline", \
- "taxonomy_contents_multiline", \
- "aligned_contents_multiline", \
- "representative_contents_multiline", \
- "range_contents_multiline", \
- "cd_contents_multiline", \
- "sequence_contents_multiline", \
- "table_entry" ]
-
- # make a parser that returns only the tags we are interested in
- expression = Martel.select_names( cdd_format.cdd_record, self.interest_tags)
- self._parser = expression.make_parser(debug_level )
-
- def feed(self, handle, consumer):
- """Feeed a set of data into the scanner.
-
- Arguments:
- o handle - A handle with the information to parse.
- o consumer - The consumer that should be informed of events.
- """
- consumer.set_interest_tags( self.interest_tags )
- self._parser.setContentHandler( consumer )
-# self._parser.setErrorHandler(handle.ErrorHandler())
-
- self._parser.parseFile(handle)
-
-class _RecordConsumer( Dispatcher ):
- """Create a CDD Record object from scanner generated information.
- """
- def __init__(self):
- Dispatcher.__init__( self )
- self.data = Record.Record()
- self._pending_key = ''
-
-
- def set_interest_tags( self, interest_tags ):
- self.interest_tags = interest_tags
-
- def start_cd_tag( self, line, attrs ):
- self.save_characters()
-
- def end_cd_tag( self, cdd_record ):
- key = self.save_key()
-
- def start_cd_contents_multiline( self, text, attrs ):
- self.save_characters()
-
- def end_cd_contents_multiline( self, cdd_record ):
- self.add_entry()
-
- def start_description_tag( self, text, attrs ):
- self.save_characters()
-
- def end_description_tag( self, cdd_record ):
- key = self.save_key()
-
- def start_description_contents_multiline( self, text, attrs ):
- self.save_characters()
-
- def end_description_contents_multiline( self, cdd_record ):
- self.add_entry()
-
- def start_status_tag( self, text, attrs ):
- self.save_characters()
-
- def end_status_tag( self, cdd_record ):
- key = self.save_key()
-
- def start_status_contents_multiline( self, text, attrs ):
- self.save_characters()
-
- def end_status_contents_multiline( self, cdd_record ):
- self.add_entry()
-
- def start_source_tag( self, text, attrs ):
- self.save_characters()
-
- def end_source_tag( self, cdd_record ):
- key = self.save_key()
-
- def start_source_contents_multiline( self, text, attrs ):
- self.save_characters()
-
- def end_source_contents_multiline( self, cdd_record ):
- self.add_entry()
-
- def start_date_tag( self, text, attrs ):
- self.save_characters()
-
- def end_date_tag( self, cdd_record ):
- key = self.save_key()
-
- def start_date_contents_multiline( self, text, attrs ):
- self.save_characters()
-
- def end_date_contents_multiline( self, cdd_record ):
- self.add_entry()
-
- def start_reference_contents_multiline( self, text, attrs ):
- self.save_characters()
-
- def end_reference_contents_multiline( self, cdd_record ):
- reference = self.get_characters()
- self.data[ 'references' ].append( reference )
-
- def start_taxonomy_tag( self, text, attrs ):
- self.save_characters()
-
- def end_taxonomy_tag( self, cdd_record ):
- key = self.save_key()
-
- def start_taxonomy_contents_multiline( self, text, attrs ):
- self.save_characters()
-
- def end_taxonomy_contents_multiline( self, cdd_record ):
- self.add_entry()
-
- def start_aligned_tag( self, text, attrs ):
- self.save_characters()
-
- def end_aligned_tag( self, cdd_record ):
- key = self.save_key()
-
- def start_aligned_contents_multiline( self, text, attrs ):
- self.save_characters()
-
- def end_aligned_contents_multiline( self, cdd_record ):
- self.add_entry()
-
- def start_representative_tag( self, text, attrs ):
- self.save_characters()
-
- def end_representative_tag( self, cdd_record ):
- key = self.save_key()
-
- def start_representative_contents_multiline( self, text, attrs ):
- self.save_characters()
-
- def end_representative_contents_multiline( self, cdd_record ):
- self.add_entry()
-
- def start_range_tag( self, text, attrs ):
- self.save_characters()
-
- def end_range_tag( self, cdd_record ):
- key = self.save_key()
-
- def start_range_contents_multiline( self, text, attrs ):
- self.save_characters()
-
- def end_range_contents_multiline( self, cdd_record ):
- self.add_entry()
-
- def start_sequence_tag( self, text, attrs ):
- self.save_characters()
-
- def end_sequence_tag( self, cdd_record ):
- key = self.save_key()
-
- def start_sequence_contents_multiline( self, text, attrs ):
- self.save_characters()
-
- def end_sequence_contents_multiline( self, cdd_record ):
- line = self.get_characters()
- ( lines ) = line.splitlines()
- key = self._pending_key
- val = ''
- for line in lines:
- line = line.strip()
- val = val + line
- self.data[ key ] = Seq( val )
-
- def start_table_entry( self, text, attrs ):
- self.save_characters()
-
- def end_table_entry( self, cdd_record ):
- line = self.get_characters()
- ( lines ) = line.splitlines()
- key = ''
- val = ''
- state = 'key'
- for line in lines:
- line = line.strip()
- upper_line = line.upper()
- if( upper_line.endswith( '[CD]' ) ):
- line = line[ :-4 ]
- state = 'val'
- elif( len( line ) > 60 ):
- state = 'val'
- else:
- state = 'key'
- if( state == 'key' ):
- key = key + line
- else:
- val = val + line
- self.data[ 'alignment_lookup' ][ key ] = val
-
- def save_key( self ):
- key = self.get_characters()
- self._pending_key = key[ : -1 ]
-
- def add_entry( self ):
- key = self._pending_key
- self._pending_key = ""
- self.data[ key ] = self.get_characters()
-
-class RecordParser:
- """Parse CDD files into Record objects
- """
- def __init__(self, debug_level = 0):
- """Initialize the parser.
-
- Arguments:
- o debug_level - An optional argument that specifies the amount of
- debugging information Martel should spit out. By default we have
- no debugging info (the fastest way to do things), but if you want
- you can set this as high as two and see exactly where a parse fails.
- """
- self._scanner = _Scanner(debug_level)
-
- def parse(self, handle):
- """Parse the specified handle into an NBRF record.
- """
- self._consumer = _RecordConsumer()
- self._scanner.feed(handle, self._consumer)
- return self._consumer.data
-
View
154 Bio/CDD/cdd_format.py
@@ -1,154 +0,0 @@
-# Copyright 2001 by Katharine Lindner. All rights reserved.
-# This code is part of the Biopython distribution and governed by its
-# license. Please see the LICENSE file that should have been included
-# as part of this package.
-
-"""Martel based parser to read CDD formatted files.
-
-This is a huge regular regular expression for CDD, built using
-the 'regular expressiona on steroids' capabilities of Martel.
-
-http://www.ncbi.nlm.nih.gov/Structure/cdd/cdd.shtml
-Notes:
-Just so I remember -- the new end of line syntax is:
- New regexp syntax - \R
- \R means "\n|\r\n?"
- [\R] means "[\n\r]"
-
-This helps us have endlines be consistent across platforms.
-
-# standard library
-http://www.ncbi.nlm.nih.gov/Structure/cdd/cdd.shtml
-"""
-import string
-
-# Martel
-import Martel
-from Martel import RecordReader
-from Martel import Str
-from Martel import AnyEol
-from Martel import ToEol
-from Martel import Group
-from Martel import Alt
-from Martel import Opt
-from Martel import Rep
-from Martel import Rep1
-from Martel import Any
-from Martel import AnyBut
-from Martel import Assert
-from Martel import AssertNot
-
-
-
-
-
-# --- first set up some helper constants and functions
-# Copyright 2002 by Katharine Lindner. All rights reserved.
-# This code is part of the Biopython distribution and governed by its
-# license. Please see the LICENSE file that should have been included
-# as part of this package.
-
-upper_alpha = Any( "ABCDEFGHIJKLMNOPQRSTUVWXYZ" )
-white_space = Any( "\t " )
-eols = chr( 13 ) + chr( 10 )
-white_spaces = Rep( white_space )
-summary_line = Str( "CD summary" ) + ToEol()
-
-cd_tag = Group( "cd_tag", Str( "CD:" ) )
-description_tag = Group( "description_tag", Str( "Description:" ) )
-status_tag = Group( "status_tag", Str( "CD status:" ) )
-source_tag = Group( "source_tag", Str( "Source:" ) )
-date_tag = Group( "date_tag", Str( "Created:" ) )
-reference_tag = Group( "reference_tag", Str( "References:" ) )
-taxonomy_tag = Group( "taxonomy_tag", Str( "Taxonomy spanned:" ) )
-aligned_tag = Group( "aligned_tag", Str( "Aligned sequences:" ) )
-representative_tag = Group( "representative_tag", Str( "Representative:" ) )
-range_tag = Group( "range_tag", Str( "Aligned range:" ) )
-sequence_tag = Group( "sequence_tag", Str( "Sequence:" ) )
-has_tag = Alt( cd_tag, description_tag, status_tag, source_tag, date_tag, \
- reference_tag, taxonomy_tag, aligned_tag, representative_tag, range_tag, sequence_tag )
-
-cd_key_line = cd_tag + white_spaces + AnyEol()
-description_key_line = description_tag + white_spaces + AnyEol()
-status_key_line = status_tag + white_spaces + AnyEol()
-source_key_line = source_tag + white_spaces + AnyEol()
-date_key_line = date_tag + white_spaces + AnyEol()
-reference_key_line = reference_tag + white_spaces + AnyEol()
-taxonomy_key_line = taxonomy_tag + white_spaces + AnyEol()
-aligned_key_line = aligned_tag + white_spaces + AnyEol()
-representative_key_line = representative_tag + white_spaces + AnyEol()
-range_key_line = range_tag + white_spaces + AnyEol()
-sequence_key_line = sequence_tag + white_spaces + AnyEol()
-
-cd_contents_line = Group( "cd_contents_line", AssertNot( has_tag ) + ToEol() )
-description_contents_line = AssertNot( has_tag ) + ToEol()
-status_contents_line = AssertNot( has_tag ) + ToEol()
-source_contents_line = AssertNot( has_tag ) + ToEol()
-date_contents_line = AssertNot( has_tag ) + ToEol()
-reference_contents_line = AssertNot( has_tag ) + ToEol()
-taxonomy_contents_line = AssertNot( has_tag ) + ToEol()
-aligned_contents_line = AssertNot( has_tag ) + ToEol()
-representative_contents_line = AssertNot( has_tag ) + ToEol()
-range_contents_line = AssertNot( has_tag ) + ToEol()
-sequence_contents_line = Group( "sequence_contents_line", \
- white_spaces + Rep1( upper_alpha ) + white_spaces + AnyEol() )
-sentinel_line = white_spaces + Str( "Definition" ) + white_spaces + AnyEol()
-boiler_plate = AssertNot( sentinel_line ) + ToEol()
-definition_line = Group( "definition_line", \
- Rep( AnyBut( eols + '[' ) ) + Str( '[CD]' ) + white_spaces + AnyEol() )
-pdb_id_line = AssertNot( definition_line ) + ToEol()
-pdb_id_multiline = Group( "pdb_id_multiline", Rep1( pdb_id_line ) )
-table_entry = Group( "table_entry", \
- pdb_id_multiline + definition_line )
-table = Group( "table", Rep1( table_entry ) )
-
-cd_contents_multiline = Group( "cd_contents_multiline", \
- Rep( cd_contents_line ) )
-description_contents_multiline = Group( "description_contents_multiline", \
- Rep( description_contents_line ) )
-status_contents_multiline = Group( "status_contents_multiline", \
- Rep( status_contents_line ) )
-source_contents_multiline = Group( "source_contents_multiline", \
- Rep( source_contents_line ) )
-date_contents_multiline = Group( "date_contents_multiline", \
- Rep( date_contents_line ) )
-reference_contents_multiline = Group( "reference_contents_multiline", \
- Rep( reference_contents_line ) )
-taxonomy_contents_multiline = Group( "taxonomy_contents_multiline", \
- Rep( taxonomy_contents_line ) )
-aligned_contents_multiline = Group( "aligned_contents_multiline", \
- Rep( aligned_contents_line ) )
-representative_contents_multiline = Group( "representative_contents_multiline", \
- Rep( representative_contents_line ) )
-range_contents_multiline = Group( "range_contents_multiline", \
- Rep( range_contents_line ) )
-sequence_contents_multiline = Group( "sequence_contents_multiline", \
- Rep( sequence_contents_line ) )
-
-cd_block = cd_key_line + cd_contents_multiline
-description_block = description_key_line + description_contents_multiline
-status_block = status_key_line + status_contents_multiline
-source_block = source_key_line + source_contents_multiline
-date_block = date_key_line + date_contents_multiline
-reference_block = Assert(reference_tag ) + reference_key_line + \
- reference_contents_multiline
-taxonomy_block = taxonomy_key_line + taxonomy_contents_multiline
-aligned_block = aligned_key_line + aligned_contents_multiline
-representative_block = representative_key_line + representative_contents_multiline
-range_block = range_key_line + range_contents_multiline
-sequence_block = sequence_key_line + sequence_contents_multiline
-trailer_line = ToEol()
-
-cdd_record = summary_line + cd_block + description_block + status_block + \
- source_block + date_block + Opt( reference_block ) + taxonomy_block + \
- aligned_block + representative_block + range_block + sequence_block + \
- Rep( boiler_plate ) + sentinel_line + table
-
-
-
-
-
-
-
-
-
View
475 Bio/Gobase/__init__.py
@@ -1,475 +0,0 @@
-# Copyright 2000 by Katharine Lindner. All rights reserved.
-# This code is part of the Biopython distribution and governed by its
-# license. Please see the LICENSE file that should have been included
-# as part of this package.
-
-"""
-This module provides code to work with files from Gobase.
-http://megasun.bch.umontreal.ca/gobase/
-
-
-Classes:
-Record Holds gobase sequence data.
-Iterator Iterates over sequence data in a gobase file.
-Dictionary Accesses a gobase file using a dictionary interface.
-RecordParser Parses gobase sequence data into a Record object.
-
-_Scanner Scans a gobase-format stream.
-_RecordConsumer Consumes gobase data to a Record object.
-
-
-Functions:
-index_file Index a FASTA file for a Dictionary.
-
-"""
-
-import warnings
-warnings.warn("Bio.Gobase is deprecated, as this module doesn't seem to have any users. If you are using Bio.Gobase, please get in contact with the Biopython developers at biopython-dev@biopython.org to avoid permanent removal of this module.", DeprecationWarning)
-
-from types import *
-import string
-import re
-from Bio import File
-from Bio import Index
-from Bio.ParserSupport import *
-
-class Record:
- """Holds information from a Gobase record.
-
- Members:
- species_name
- taxon_division
- gobase_id
- """
- def __init__(self, colwidth=60):
- """__init__(self, colwidth=60)
-
- Create a new Record. colwidth specifies the number of residues
- to put on each line.
-
- """
- self.species_name = ''
- self.taxon_division = ''
-
-class SequenceRecord( Record ):
- """Holds information from a Gobase record.
-
- Members:
- molecule_type
- is_plasmid
- shape
- submission_date
- update_date
- entrez_record
- genbank_accession
- """
- def __init__(self, colwidth=60):
- """__init__(self, colwidth=60)
-
- Create a new Record. colwidth specifies the number of residues
- to put on each line.
-
- """
- Record.__init__( self )
- self.molecule_type = ''
- self.is_plasmid = ''
- self.shape = ''
- self.submission_date = ''
- self.update_date = ''
- self.entrez_record = ''
- self.genbank_accession = ''
-
-class GeneRecord( Record ):
- """Holds information from a Gobase record.
-
- Members:
- """
- def __init__(self, colwidth=60):
- """__init__(self, colwidth=60)
-
- Create a new Record. colwidth specifies the number of residues
- to put on each line.
-
- """
- Record.__init__( self )
- self.gene_class = ''
- self.plasmid_encoded = ''
- self.is_partial_gene = ''
- self.is_pseudo_gene = ''
- self.is_transpliced_gene = ''
- self.chloroplast_origin = ''
- self.contains_intron = ''
- self.orf = ''
- self.included_in_intron = ''
- self.published_info = ''
- self.genbank_accession = ''
- self.entrez_record = ''
- self.product_type = ''
- self.product_class = ''
-
-class ProteinRecord( Record ):
- """Holds information from a Gobase record.
-
- Members:
- product_class
- gene_class
- is_partial_protein
- is_plasmid
- function
- entry_record
- """
- def __init__(self, colwidth=60):
- """__init__(self, colwidth=60)
-
- Create a new Record. colwidth specifies the number of residues
- to put on each line.
-
- """
- Record.__init__( self )
- self.product_class = ''
- self.gene_class = ''
- self.is_partial_protein = ''
- self.is_plasmid = ''
- self.is_pseudo = ''
- self.function = ''
- self.entry_record = ''
-
-class Iterator:
- """Returns one record at a time from a Gobase file.
-
- Methods:
- next Return the next record from the stream, or None.
-
- """
- def __init__(self, handle, parser=None):
- """__init__(self, handle, parser=None)
-
- Create a new iterator. handle is a file-like object. parser
- is an optional Parser object to change the results into another form.
- If set to None, then the raw contents of the file will be returned.
-
- """
- if type(handle) is not FileType and type(handle) is not InstanceType:
- raise ValueError, "I expected a file handle or file-like object"
- self._uhandle = SGMLHandle( File.UndoHandle( handle ) )
- self._parser = parser
-
- def next(self):
- """next(self) -> object
-
- Return the next gobase record from the file. If no more records,
- return None.
-
- """
- lines = []
- first_tag = 'Recognition Sequence'
- while 1:
- line = self._uhandle.readline()
- if not line:
- break
- if line[:len( first_tag )] == 'first_tag':
- self._uhandle.saveline(line)
- break
-
- if not line:
- return None
-
- if self._parser is not None:
- return self._parser.parse(File.StringHandle(data))
- return data
-
- def __iter__(self):
- return iter(self.next, None)
-
-class Dictionary:
- """Accesses a gobase file using a dictionary interface.
-
- """
- __filename_key = '__filename'
-
- def __init__(self, indexname, parser=None):
- """__init__(self, indexname, parser=None)
-
- Open a Gobase Dictionary. indexname is the name of the
- index for the dictionary. The index should have been created
- using the index_file function. parser is an optional Parser
- object to change the results into another form. If set to None,
- then the raw contents of the file will be returned.
-
- """
- self._index = Index.Index(indexname)
- self._handle = open(self._index[Dictionary.__filename_key])
- self._parser = parser
-
- def __len__(self):
- return len(self._index)
-
- def __getitem__(self, key):
- start, len = self._index[key]
- self._handle.seek(start)
- data = self._handle.read(len)
- if self._parser is not None:
- return self._parser.parse(File.StringHandle(data))
- return data
-
- def __getattr__(self, name):
- return getattr(self._index, name)
-
-class RecordParser:
- """Parses Gobase sequence data into a Record object.
-
- """
- def __init__(self):
- self._scanner = _Scanner()
- self._consumer = _RecordConsumer()
-
- def parse(self, handle):
- self._scanner.feed(handle, self._consumer)
- return self._consumer.data
-
-class _Scanner:
- """Scans a gobase file.
-
- Methods:
- feed Feed in one gobase record.
-
- """
- def feed(self, handle, consumer):
- """feed(self, handle, consumer)
-
- Feed in gobase data for scanning. handle is a file-like object
- containing gobase data. consumer is a Consumer object that will
- receive events as the gobase data is scanned.
-
- """
- if isinstance(handle, File.UndoHandle):
- uhandle = handle
- else:
- uhandle = File.UndoHandle(handle)
- uhandle = File.SGMLHandle( uhandle )
-
- if uhandle.peekline():
- self._scan_record(uhandle, consumer)
-
- def _scan_line(self, uhandle ):
- line = safe_readline( uhandle )
- line = string.join( string.split( line ), ' ' ) + ' '
- return line
-
- def _text_in( self, uhandle, text, count ):
- for j in range( count ):
- try:
- line = self._scan_line( uhandle )
- text = text + line
- except:
- if( line == '' ):
- return text
- return text
-
- def _scan_sequence_record( self, text, consumer ):
- data = consumer.data
- next_item = self._scan_field( text, 'Molecule type:', 'Species name:' )
- data.molecule_type = consumer.text_field( next_item )
-
- next_item = self._scan_field( text, 'Shape of molecule:', 'Sequence length:' )
- data.shape = consumer.text_field( next_item )
-
- next_item = self._scan_field( text, 'Plasmid:', 'Complete genome:' )
- data.is_plasmid = consumer.text_field( next_item )
-
- next_item = self._scan_field( text, 'NCBI Entrez record:', 'Genbank accession:' )
- data.entrez_record = consumer.text_field( next_item )
-
- next_item = self._scan_field( text, 'Genbank accession:', 'Coding gene(s):' )
- data.genbank_accession = consumer.text_field( next_item )
- consumer.data = data
-
- def _scan_gene_record( self, text, consumer ):
- data = consumer.data
- next_item = self._scan_field( text, 'Gene Class:', 'Species name:' )
- data.gene_class = consumer.text_field( next_item )
-
- next_item = self._scan_field( text, 'Plasmid encoded:', 'Partial gene:' )
- data.is_plasmid = consumer.word_field( next_item )
-
- next_item = self._scan_field( text, 'Partial gene:', 'Pseudo:' )
- data.is_partial_gene = consumer.text_field( next_item )
-
- next_item = self._scan_field( text, 'Pseudo:', 'Transpliced gene:' )
- data.is_pseudo_gene = consumer.text_field( next_item )
-
- next_item = self._scan_field( text, 'Transpliced gene:', 'Chloroplast origin:' )
- data.is_transpliced_gene = consumer.text_field( next_item )
-
- next_item = self._scan_field( text, 'Chloroplast origin:', 'Contains intron(s):' )
- data.chloroplast_origin = consumer.word_field( next_item )
-
- next_item = self._scan_field( text, 'Contains intron(s):' )
- data.contains_intron = consumer.word_field( next_item )
-
- next_item = self._scan_field( text, 'Included in intron:' )
- data.included_in_intron = consumer.word_field( next_item )
-
- next_item = self._scan_field( text, 'ORF:' )
- data.orf = consumer.word_field( next_item )
-
- next_item = self._scan_field( text, 'NCBI Entrez record:' )
- data.entrez_record = consumer.word_field( next_item )
-
- next_item = self._scan_field( text, 'Genbank accession:', 'Product type:' )
- data.genbank_accession = consumer.word_field( next_item )
-
- next_item = self._scan_field( text, 'Product type:', 'Product Class:' )
- data.product_type = consumer.text_field( next_item )
-
- next_item = self._scan_field( text, 'Product Class:' )
- data.product_class = consumer.text_field( next_item )
-
- consumer.data = data
-
- def _scan_protein_record( self, text, consumer ):
- data = consumer.data
- next_item = self._scan_field( text, 'Product Class:', 'Species name:' )
- data.product_class = consumer.text_field( next_item )
-
- next_item = self._scan_field( text, 'Gene Class:', 'Partial protein:' )
- data.gene_class = consumer.text_field( next_item )
-
- next_item = self._scan_field( text, 'Partial protein:', 'Conflict:' )
- data.is_partial_protein = consumer.text_field( next_item )
-
- next_item = self._scan_field( text, 'Plasmid:', 'Sequence length:' )
- data.is_plasmid = consumer.text_field( next_item )
-
- next_item = self._scan_field( text, 'General function:' )
- data.function = consumer.text_field( next_item )
-
- next_item = self._scan_field( text, 'NCBI Entrez record:' )
- data.entrez_record = consumer.word_field( next_item )
-
- consumer.data = data
-
- def _scan_record(self, uhandle, consumer):
- text = ''
- text = self._text_in( uhandle, text, 100 )
- text = string.lstrip( text )
-
- if( string.find( text, 'Sequence' ) == 0 ):
- consumer.data = SequenceRecord()
- self._scan_sequence_record( text, consumer )
- elif( string.find( text, 'Gene' ) == 0 ):
- consumer.data = GeneRecord()
- self._scan_gene_record( text, consumer )
- elif( string.find( text, 'Protein' ) == 0 ):
- consumer.data = ProteinRecord()
- self._scan_protein_record( text, consumer )
- else:
- print 'UNKNOWN!!!!!!'
-
- data = consumer.data
- next_item = self._scan_field( text, 'Species name:', 'Taxon division' )
- data.species_name = consumer.text_field( next_item )
-
- next_item = self._scan_field( text, 'Taxon division:' )
- print next_item
- data.taxon_division = consumer.word_field( next_item )
- consumer.data = data
-
-# consumer.end_sequence()
-
-
- def _scan_field(self, text, field, next_field = None ):
- start = string.find( text, field )
- if( start == -1 ):
- return ''
- if( next_field == None ):
- pattern = re.compile( '[A-Z][a-z0-9 ]+:' )
- offset = start + len( field )
- match = pattern.search( text[ offset: ] )
- if match:
- end = offset + match.start()
- else:
- end = start + 40
- else:
- end = string.find( text, next_field )
- if( end == -1 ):
- return ''
- next_item = text[ start:end ]
- return( next_item )
-
-
-class _RecordConsumer(AbstractConsumer):
- """Consumer that converts a gobase record to a Record object.
-
- Members:
- data Record with gobase data.
-
- """
- def __init__(self):
- self.data = None
-
- def end_sequence(self):
- pass
-
- def text_field( self, line ):
- if( line == '' ):
- return ''
- cols = string.split( line, ': ' )
- return( cols[ 1 ] )
-
- def int_field( self, line ):
- if( line == '' ):
- return None
- cols = string.split( line, ': ' )
- return( int( cols[ 1 ] ) )
-
- def word_field( self, line ):
- if( line == '' ):
- return ''
- cols = string.split( line, ': ' )
- cols = string.split( cols[ 1 ] )
- return( cols[ 0 ] )
-
- def date_field( self, line ):
- if( line == '' ):
- return ''
- cols = string.split( line, ':' )
- cols = string.split( cols[ 1 ] )
- return( string.join( cols[ :3 ] ) )
-
-
-def index_file(filename, indexname, rec2key=None):
- """index_file(filename, ind/exname, rec2key=None)
-
- Index a gobase file. filename is the name of the file.
- indexname is the name of the dictionary. rec2key is an
- optional callback that takes a Record and generates a unique key
- (e.g. the accession number) for the record. If not specified,
- the sequence title will be used.
-
- """
- if not os.path.exists(filename):
- raise ValueError, "%s does not exist" % filename
-
- index = Index.Index(indexname, truncate=1)
- index[Dictionary._Dictionary__filename_key] = filename
-
- iter = Iterator(open(filename), parser=RecordParser())
- while 1:
- start = iter._uhandle.tell()
- rec = iter.next()
- length = iter._uhandle.tell() - start
-
- if rec is None:
- break
- if rec2key is not None:
- key = rec2key(rec)
- else:
- key = rec.title
-
- if not key:
- raise KeyError, "empty sequence key was produced"
- elif index.has_key(key):
- raise KeyError, "duplicate key %s found" % key
-
- index[key] = start, length
View
441 Bio/Rebase/__init__.py
@@ -1,441 +0,0 @@
-# Copyright 2000 by Katharine Lindner. All rights reserved.
-# This code is part of the Biopython distribution and governed by its
-# license. Please see the LICENSE file that should have been included
-# as part of this package.
-
-"""
-This module provides code to work with files from Rebase.
-http://rebase.neb.com/rebase/rebase.html
-
-
-Classes:
-Record Holds rebase sequence data.
-Iterator Iterates over sequence data in a rebase file.
-Dictionary Accesses a rebase file using a dictionary interface.
-RecordParser Parses rebase sequence data into a Record object.
-
-_Scanner Scans a rebase-format stream.
-_RecordConsumer Consumes rebase data to a Record object.
-
-
-Functions:
-index_file Index a FASTA file for a Dictionary.
-
-"""
-
-import warnings
-warnings.warn("Bio.Rebase was deprecated, as it does not seem to be able to parse recent HTML files from Rebase. If you want to continue to use this module, please get in contact with the Biopython developers at biopython-dev@biopython.org to avoid permanent removal of this module from Biopython", DeprecationWarning)
-
-
-from types import *
-import string
-from Bio import File
-from Bio import Index
-from Bio.ParserSupport import *
-
-class Record:
- """Holds information from a FASTA record.
-
- Members:
- seq_5_to_3 The sequence.
- seq_3_to_5
- enzyme_num The enzyme number
- pos Position of cleavage
- prototype Prototype
- source
- microorganism
- temperature Growth temperature
- misc Miscellaneous information
- date_entered
- date_modified
- num_Adeno2
- num_Lambda
- num_pBR322
- num_PhiX174
- num_SV40
-
- """
- def __init__(self, colwidth=60):
- """__init__(self, colwidth=60)
-
- Create a new Record. colwidth specifies the number of residues
- to put on each line.
-
- """
- self.seq_5_to_3 = ''
- self.seq_3_to_5 = ''
- self.methylation = ''
- self.enzyme_num = None
- self.prototype = ''
- self.source = ''
- self.microorganism = ''
- self.temperature = None
- self.misc = ''
- self.date_entered = ''
- self.date_modified = ''
- self._colwidth = colwidth
- self.num_Adeno2 = 0
- self.num_Lambda = 0
- self.num_pBR322 = 0
- self.num_PhiX174 = 0
- self.num_SV40 = 0
-
-class Iterator:
- """Returns one record at a time from a Rebase file.
-
- Methods:
- next Return the next record from the stream, or None.
-
- """
- def __init__(self, handle, parser=None):
- """__init__(self, handle, parser=None)
-
- Create a new iterator. handle is a file-like object. parser
- is an optional Parser object to change the results into another form.
- If set to None, then the raw contents of the file will be returned.
-
- """
- if type(handle) is not FileType and type(handle) is not InstanceType:
- raise ValueError, "I expected a file handle or file-like object"
- self._uhandle = SGMLHandle( File.UndoHandle( handle ) )
- self._parser = parser
-
- def next(self):
- """next(self) -> object
-
- Return the next rebase record from the file. If no more records,
- return None.
-
- """
- lines = []
- first_tag = 'Recognition Sequence'
- while 1:
- line = self._uhandle.readline()
- if not line:
- break
- if line[:len( first_tag )] == 'first_tag':
- self._uhandle.saveline(line)
- break
-
- if not line:
- return None
-
- if self._parser is not None:
- return self._parser.parse(File.StringHandle(data))
- return data
-
- def __iter__(self):
- return iter(self.next, None)
-
-class Dictionary:
- """Accesses a rebase file using a dictionary interface.
-
- """
- __filename_key = '__filename'
-
- def __init__(self, indexname, parser=None):
- """__init__(self, indexname, parser=None)
-
- Open a Fasta Dictionary. indexname is the name of the
- index for the dictionary. The index should have been created
- using the index_file function. parser is an optional Parser
- object to change the results into another form. If set to None,
- then the raw contents of the file will be returned.
-
- """
- self._index = Index.Index(indexname)
- self._handle = open(self._index[Dictionary.__filename_key])
- self._parser = parser
-
- def __len__(self):
- return len(self._index)
-
- def __getitem__(self, key):
- start, len = self._index[key]
- self._handle.seek(start)
- data = self._handle.read(len)
- if self._parser is not None:
- return self._parser.parse(File.StringHandle(data))
- return data
-
- def __getattr__(self, name):
- return getattr(self._index, name)
-
-class RecordParser:
- """Parses FASTA sequence data into a Record object.
-
- """
- def __init__(self):
- self._scanner = _Scanner()
- self._consumer = _RecordConsumer()
-
- def parse(self, handle):
- self._scanner.feed(handle, self._consumer)
- return self._consumer.data
-
-class _Scanner:
- """Scans a rebase file.
-
- Methods:
- feed Feed in one rebase record.
-
- """
- def feed(self, handle, consumer):
- """feed(self, handle, consumer)
-
- Feed in rebase data for scanning. handle is a file-like object
- containing rebase data. consumer is a Consumer object that will
- receive events as the rebase data is scanned.
-
- """
- if isinstance(handle, File.UndoHandle):
- uhandle = handle
- else:
- uhandle = File.UndoHandle(handle)
- uhandle = File.SGMLHandle( uhandle )
-
- if uhandle.peekline():
- self._scan_record(uhandle, consumer)
-
- def _scan_line(self, uhandle ):
- line = safe_readline( uhandle )
- line = string.join( string.split( line ), ' ' ) + ' '
- return line
-
- def _text_in( self, uhandle, text, count ):
- for j in range( count ):
- line = self._scan_line( uhandle )
- text = text + line
- return text
-
- def _scan_record(self, uhandle, consumer):
- consumer.start_sequence()
- text = ''
- text = self._text_in( uhandle, text, 100 )
- self._scan_sequence( text, consumer)
- self._scan_methylation( text, consumer)
- self._scan_enzyme_num( text, consumer )
- self._scan_prototype( text, consumer )
- self._scan_source( text, consumer )
- self._scan_microorganism( text, consumer )
- self._scan_temperature( text, consumer)
- self._scan_date_entered( text, consumer)
- self._scan_date_modified( text, consumer)
- self._scan_Adeno2( text, consumer)
- self._scan_Lambda( text, consumer)
- self._scan_pBR322( text, consumer)
- self._scan_PhiX174( text, consumer)
- self._scan_SV40( text, consumer)
-# consumer.end_sequence()
-
-
- def _scan_sequence(self, text, consumer ):
- start = string.find( text, 'Recognition Sequence:' )
- end = string.find( text, 'Base (Type of methylation):' )
- if( end == -1 ):
- end = string.find( text, 'REBASE enzyme #:' )
- next_item = text[ start:end ]
- consumer.sequence( next_item )
-
- def _scan_methylation(self, text, consumer ):
- start = string.find( text, 'Base (Type of methylation):' )
- if( start != -1 ):
- end = string.find( text, 'REBASE enzyme #:' )
- next_item = text[ start:end ]
- consumer.methylation( next_item )
-
- def _scan_enzyme_num(self, text, consumer ):
- start = string.find( text, 'REBASE enzyme #:' )
- end = string.find( text, 'Prototype:' )
- next_item = text[ start:end ]
- consumer.enzyme_num( next_item )
-
- def _scan_prototype(self, text, consumer ):
- start = string.find( text, 'Prototype:' )
- end = string.find( text, 'Source:' )
- next_item = text[ start:end ]
- consumer.prototype( next_item )
-
- def _scan_source(self, text, consumer ):
- start = string.find( text, 'Source:' )
- end = string.find( text, 'Microorganism:' )
- next_item = text[ start:end ]
- consumer.source( next_item )
-
-
- def _scan_microorganism(self, text, consumer ):
- start = string.find( text, 'Microorganism:' )
- end = string.find( text, 'Growth Temperature:' )
- next_item = text[ start:end ]
- consumer.microorganism( next_item )
-
- def _scan_temperature(self, text, consumer):
- start = string.find( text, 'Growth Temperature:' )
- end = start + 30
- next_item = text[ start:end ]
- consumer.temperature( next_item )
-
-
- def _scan_date_entered(self, text, consumer):
- start = string.find( text, 'Entered:' )
- end = start + 30
- next_item = text[ start:end ]
- consumer.data_entered( next_item )
-
- def _scan_date_modified(self, text, consumer):
- start = string.find( text, 'Modified:' )
- if( start != -1 ):
- end = start + 30
- next_item = text[ start:end ]
- consumer.data_modified( next_item )
-
- def _scan_Adeno2( self, text, consumer ):
- start = string.find( text, 'Adeno2:' )
- end = string.find( text, 'Lambda:' )
- next_item = text[ start:end ]
- consumer.num_Adeno2( next_item )
-
- def _scan_Lambda( self, text, consumer ):
- start = string.find( text, 'Lambda:' )
- end = string.find( text, 'pBR322:' )
- next_item = text[ start:end ]
- consumer.num_Lambda( next_item )
-
- def _scan_pBR322(self, text, consumer ):
- start = string.find( text, 'pBR322:' )
- end = string.find( text, 'PhiX174:' )
- next_item = text[ start:end ]
- consumer.num_pBR322( next_item )
-
- def _scan_PhiX174(self, text, consumer ):
- start = string.find( text, 'PhiX174:' )
- end = string.find( text, 'SV40:' )
- next_item = text[ start:end ]
- consumer.num_PhiX174( next_item )
-
- def _scan_SV40(self, text, consumer ):
- start = string.find( text, 'SV40:' )
- end = start + 30
- next_item = text[ start:end ]
- consumer.num_SV40( next_item )
-
-
-class _RecordConsumer(AbstractConsumer):
- """Consumer that converts a rebase record to a Record object.
-
- Members:
- data Record with rebase data.
-
- """
- def __init__(self):
- self.data = None
-
- def start_sequence(self):
- self.data = Record()
-
- def end_sequence(self):
- pass
-
- def sequence( self, line ):
- cols = string.split( line, ': ' )
- sequence = cols[ 1 ]
- sequence = string.strip( sequence )
- if( string.find( sequence, ' ...' ) != -1 ):
- cols = string.split( sequence, '...' )
- self.data.seq_5_to_3 = cols[ 1 ]
- elif( string.lower( sequence ) != 'unknown' ):
- seq_len = len( sequence ) / 2
- self.data.seq_5_to_3 = string.strip( sequence[ :seq_len ] )
- self.data.seq_3_to_5 = string.strip( sequence[ seq_len: ] )
-
- def methylation( self, line ):
- cols = string.split( line, ': ' )
- self.data.methylation = cols[ 1 ]
-
- def enzyme_num( self, line ):
- cols = string.split( line, ': ' )
- self.data.enzyme_num = int( cols[ 1 ] )
-
- def prototype( self, line ):
- cols = string.split( line, ': ' )
- self.data.prototype = cols[ 1 ]
-
- def source( self, line ):
- cols = string.split( line, ': ' )
- self.data.source = cols[ 1 ]
-
- def microorganism( self, line ):
- cols = string.split( line, ': ' )
- self.data.microorganism = cols[ 1 ]
-
- def temperature( self, line ):
- cols = string.split( line, ':' )
- cols = string.split( cols[ 1 ], ' ' )
- self.data.temperature = cols[ 1 ]
-
- def data_entered( self, line ):
- cols = string.split( line, ':' )
- cols = string.split( cols[ 1 ] )
- self.data.date_entered = string.join( cols[ :3 ] )
-
- def data_modified( self, line ):
- cols = string.split( line, ':' )
- cols = string.split( cols[ 1 ] )
- self.data.date_modified = string.join( cols[ :3 ] )
-
- def num_Adeno2( self, line ):
- cols = string.split( line, ': ' )
- self.data.num_Adeno2 = int( cols[ 1 ] )
-
- def num_Lambda( self, line ):
- cols = string.split( line, ': ' )
- self.data.num_Lambda = int( cols[ 1 ] )
-
- def num_pBR322( self, line ):
- cols = string.split( line, ': ' )
- self.data.num_pBR322 = int( cols[ 1 ] )
-
- def num_PhiX174( self, line ):
- cols = string.split( line, ': ' )
- self.data.num_PhiX174 = int( cols[ 1 ] )
-
- def num_SV40( self, line ):
- cols = string.split( line, ':' )
- cols = string.split( cols[ 1 ], ' ' )
- self.data.num_SV40 = cols[ 1 ]
-
-def index_file(filename, indexname, rec2key=None):
- """index_file(filename, ind/exname, rec2key=None)
-
- Index a rebase file. filename is the name of the file.
- indexname is the name of the dictionary. rec2key is an
- optional callback that takes a Record and generates a unique key
- (e.g. the accession number) for the record. If not specified,
- the sequence title will be used.
-
- """
- if not os.path.exists(filename):
- raise ValueError, "%s does not exist" % filename
-
- index = Index.Index(indexname, truncate=1)
- index[Dictionary._Dictionary__filename_key] = filename
-
- iter = Iterator(open(filename), parser=RecordParser())
- while 1:
- start = iter._uhandle.tell()
- rec = iter.next()
- length = iter._uhandle.tell() - start
-
- if rec is None:
- break
- if rec2key is not None:
- key = rec2key(rec)
- else:
- key = rec.title
-
- if not key:
- raise KeyError, "empty sequence key was produced"
- elif index.has_key(key):
- raise KeyError, "duplicate key %s found" % key
-
- index[key] = start, length
View
138 Bio/SGMLExtractor.py
@@ -1,138 +0,0 @@
-# Copyright 2002 by Katharine Lindner. All rights reserved.
-# This code is part of the Biopython distribution and governed by its
-# license. Please see the LICENSE file that should have been included
-# as part of this package.
-
-"""Code for more fancy file handles.
-
-
-Classes:
-SGMLExtractorHandle File object that strips tags and returns content from specified
-tags blocks.
-
-SGMLExtractor Object that scans for specified SGML tag pairs, removes any inner tags
-and returns the raw content.
-For example the object SGMLExtractor( [ 'h1' ] )on the following html file would return
-'House that Jack built'
-SGMLExtractor( [ 'dt' ] ) would return 'ratcatdogcowmaiden'
-SGMLExtractor( [ 'dt', 'dd' ] ) would return 'rat that ate the malttcat ate the rat' etc
-
-<h1>House that Jack Built</h1>
-<dl>
- <dt><big>rat</big></dt>
- <dd><big>ate the malt</big></dd>
- <dt><big>cat</big></dt>
- <dd><big>that ate the rat</big></dd>
- <dt><big>dog</big></dt>
- <dd><big>that worried the dats</big></dd>
- <dt><big>cow</big></dt>
- <dd><big>with crumpled horn</big></dd>
- <dt><big>maiden</big></dt>
- <dd><big>all forlorns</big></dd>
-</dl>
-"""
-
-import warnings
-warnings.warn("Bio.SGMLExtractor was deprecated, as all Biopython modules that use Bio.SGMLExtractor have been deprecated. If you do use this module, please contact the Biopython developers at biopython-dev@biopython.org to avoid permanent removal of this module")
-
-
-import os
-import string
-import StringIO
-import sgmllib
-
-
-class SGMLExtractorHandle:
- """A Python handle that automatically strips SGML tags and returns data from
- specified tag start and end pairs.
-
- """
- def __init__(self, handle, tags_of_interest = [] ):
- """SGMLExtractor(handle, tags_of_interest )
-
- handle is a file handle to SGML-formatted data.
- tags_of_interest is a list of root names for pairs of start and end tags
-
- """
- self._handle = handle
- self._stripper = SGMLExtractor( tags_of_interest )
-
- def read(self, *args, **keywds):
- data = self._handle.read( *args, **keywds)
- return self._stripper.strip(data)
-
- def readline(self, *args, **keywds):
- line = self._handle.readline( *args, **keywds)
- return self._stripper.strip(line)
-
- def readlines(self, *args, **keywds):
- lines = self._handle.readlines( *args, **keywds)
- for i in range(len(lines)):
- lines[i] = self._stripper.strip(str)
- return lines
-
- def __getattr__(self, attr):
- return getattr(self._handle, attr)
-
-
-def is_empty( items ):
- if( len( items ) > 0 ):
- return 0
- else:
- return 1
-
-class SGMLExtractor:
- class LocalParser(sgmllib.SGMLParser):
- def __init__(self, tags_of_interest = [] ):
- sgmllib.SGMLParser.__init__(self)
- self.data = ''
- self._instack = []
- self._tags_of_interest = []
- for tag in tags_of_interest:
- self._tags_of_interest.append( tag.lower() )
-
- def handle_data(self, data):
- if( not is_empty( self._instack ) ):
- self.data = self.data + data
-
- def unknown_starttag(self, tag, attrs):
- lower_tag = tag.lower()
- if( lower_tag in self._tags_of_interest ):
- self._instack.append( lower_tag )
-
- def unknown_endtag(self, tag ):
- if( not is_empty( self._instack ) ):
- open_tag = self._instack.pop()
- try:
- if( open_tag != tag.lower() ):
- self._instack.append( open_tag )
- except:
- print tag
-
-
- def __init__(self, tags_of_interest = [] ):
- self._parser = SGMLExtractor.LocalParser( tags_of_interest )
-
- def strip(self, str):
- """S.strip(str) -> string
-
- Strip the SGML tags from str.
-
- """
- if not str: # empty string, don't do anything.
- return ''
- # I need to make sure that I don't return an empty string if
- # the buffer is not empty. This can happen if there's a newline
- # character embedded within a tag. Thus, I'll first check to
- # see if the last character is a newline. If it is, and it's stripped
- # away, I'll add it back.
- is_newline = str[-1] in ['\n', '\r']
-
- self._parser.data = '' # clear the parser's data (don't reset)
- self._parser.feed(str)
- if self._parser.data:
- str = self._parser.data
- elif is_newline:
- str = '\n'
- return str
-
View
12 DEPRECATED
@@ -18,7 +18,7 @@ Declared obsolete in Release 1.48, deprecated in Release 1.49
Bio.builders, Bio.Std, Bio.StdHandler, Bio.Decode
=================================================
-Part of the Martel/Mindy infrastructure, these was deprecated in Release 1.49
+Part of the Martel/Mindy infrastructure, these were deprecated in Release 1.49
Bio.Writer and Bio.writers
==========================
@@ -72,17 +72,21 @@ Bio.ECell
Deprecated as of Release 1.47, as it appears to have no users, and the code
does not seem relevant for ECell 3.
+Bio.SGMLExtractor
+=================
+Deprecated as of Release 1.46, removed in Release 1.49.
+
Bio.Rebase
==========
-Deprecated as of Release 1.46.
+Deprecated as of Release 1.46, removed in Release 1.49.
Bio.Gobase
==========
-Deprecated as of Release 1.46.
+Deprecated as of Release 1.46, removed in Release 1.49.
Bio.CDD
=======
-Deprecated as of Release 1.46.
+Deprecated as of Release 1.46, removed in Release 1.49.
Bio.biblio
==========
View
3  setup.py
@@ -244,7 +244,6 @@ def is_Numpy_installed():
'Bio.Application',
'Bio.Blast',
'Bio.CAPS',
- 'Bio.CDD',
'Bio.Compass',
'Bio.Clustalw',
'Bio.Crystal',
@@ -266,7 +265,6 @@ def is_Numpy_installed():
'Bio.GenBank',
'Bio.Geo',
'Bio.GFF',
- 'Bio.Gobase',
'Bio.Graphics',
'Bio.HMM',
'Bio.IntelliGenetics',
@@ -298,7 +296,6 @@ def is_Numpy_installed():
'Bio.PopGen.GenePop',
'Bio.PopGen.SimCoal',
'Bio.Prosite',
- 'Bio.Rebase',
'Bio.Restriction',
'Bio.Restriction._Update',
'Bio.Saf',
Please sign in to comment.
Something went wrong with that request. Please try again.