Permalink
Browse files

Removing Bio.Ndb (deprecated a year ago in Biopython 1.49)

  • Loading branch information...
peterjc committed Sep 29, 2009
1 parent 7b2a1a5 commit 3972bd6bcc0bc1ee93e4dc5c6862a80f2ee24dd5
Showing with 1 addition and 366 deletions.
  1. +0 −69 Bio/Ndb/PR0004.htm
  2. +0 −296 Bio/Ndb/__init__.py
  3. +1 −0 DEPRECATED
  4. +0 −1 setup.py
View
@@ -1,69 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
-<!-- saved from url=(0062)http://ndbserver.rutgers.edu/NDB/NDBATLAS/P/pr0004/pr0004.html -->
-<HTML><HEAD><TITLE>NDB Atlas Entry for Structure PR0004</TITLE>
-<META http-equiv=Content-Type content="text/html; charset=windows-1252">
-<META content="MSHTML 5.50.4134.600" name=GENERATOR></HEAD>
-<BODY bgColor=#ffffff>
-<H1>NDB ID: PR0004 </H1>
-<P>
-<H2>Nucleic acid features </H2>tRNA, modified, mismatch
-<H2>Compound name </H2>ELONGATION FACTOR TU/CYSTEINYL TRNA COMPLEX
-<H2>Sequence in asymmetric unit </H2>
-<UL>
- <LI>Chain A: G G C G C G U 4SU A A C A A A G C G G H2U H2U A U G U A G C G G A
- PSU U G C A MIA A PSU C C G U C U A G U C C G G T PSU C G A C U C C G G A A C
- G C G C C U C C A
- <LI>Chain B: ALA LYS GLY GLU PHE ILE ARG THR LYS PRO HIS VAL ASN VAL GLY THR
- ILE GLY HIS VAL ASP HIS GLY LYS THR THR LEU THR ALA ALA LEU THR TYR VAL ALA
- ALA ALA GLU ASN PRO ASN VAL GLU VAL LYS ASP TYR GLY ASP ILE ASP LYS ALA PRO
- GLU GLU ARG ALA ARG GLY ILE THR ILE ASN THR ALA HIS VAL GLU TYR GLU THR ALA
- LYS ARG HIS TYR SER HIS VAL ASP CYS PRO GLY HIS ALA ASP TYR ILE LYS ASN MET
- ILE THR GLY ALA ALA GLN MET ASP GLY ALA ILE LEU VAL VAL SER ALA ALA ASP GLY
- PRO MET PRO GLN THR ARG GLU HIS ILE LEU LEU ALA ARG GLN VAL GLY VAL PRO TYR
- ILE VAL VAL PHE MET ASN LYS VAL ASP MET VAL ASP ASP PRO GLU LEU LEU ASP LEU
- VAL GLU MET GLU VAL ARG ASP LEU LEU ASN GLN TYR GLU PHE PRO GLY ASP GLU VAL
- PRO VAL ILE ARG GLY SER ALA LEU LEU ALA LEU GLU GLU MET HIS LYS ASN PRO LYS
- THR LYS ARG GLY GLU ASN GLU TRP VAL ASP LYS ILE TRP GLU LEU LEU ASP ALA ILE
- ASP GLU TYR ILE PRO THR PRO VAL ARG ASP VAL ASP LYS PRO PHE LEU MET PRO VAL
- GLU ASP VAL PHE THR ILE THR GLY ARG GLY THR VAL ALA THR GLY ARG ILE GLU ARG
- GLY LYS VAL LYS VAL GLY ASP GLU VAL GLU ILE VAL GLY LEU ALA PRO GLU THR ARG
- LYS THR VAL VAL THR GLY VAL GLU MET HIS ARG LYS THR LEU GLN GLU GLY ILE ALA
- GLY ASP ASN VAL GLY LEU LEU LEU ARG GLY VAL SER ARG GLU GLU VAL GLU ARG GLY
- GLN VAL LEU ALA LYS PRO GLY SER ILE THR PRO HIS THR LYS PHE GLU ALA SER VAL
- TYR ILE LEU LYS LYS GLU GLU GLY GLY ARG HIS THR GLY PHE PHE THR GLY TYR ARG
- PRO GLN PHE TYR PHE ARG THR THR ASP VAL THR GLY VAL VAL ARG LEU PRO GLN GLY
- VAL GLU MET VAL MET PRO GLY ASP ASN VAL THR PHE THR VAL GLU LEU ILE LYS PRO
- VAL ALA LEU GLU GLU GLY LEU ARG PHE ALA ILE ARG GLU GLY GLY ARG THR VAL GLY
- ALA GLY VAL VAL THR LYS ILE LEU GLU </LI></UL>
-<H2>Citation </H2>P. Nissen, S. Thirup, M. Kjeldgaard, J. Nyborg<BR>The Crystal
-Structure of Cys-tRNA-EF-Tu-GDPNP Reveals General and Specific Features of the
-Ternary Complex and in tRNA<BR><I>Structure,</I> <B>7</B>, pp. 143-156, 1999.
-<P>
-<H2>Space group </H2>F 2 2 2
-<H2>Cell constants </H2><PRE> A = 126.750 B = 132.980 C = 154.880 (Ångstroms)
- alpha = 90.00 beta = 90.00 gamma = 90.00 (degrees)
-</PRE>
-<H2>Crystallization conditions </H2>
-<UL>
- <LI>Method: vapor diffusion, hanging drop
- <LI>Drop: (NH<SUB>4</SUB>)<SUB>2</SUB>SO<SUB>4</SUB>, tris, MES,
- MgCl<SUB>2</SUB>, DTT </LI></UL>
-<H2>Refinement </H2>The structure was refined using the X-PLOR 3.851 program.
-The R value is 20.6 for 18043 reflections in the resolution range 10.0 to 2.6 Å
-with Fobs &gt; 0.0 sigma(Fobs).
-<P>
-<H2>Coordinates </H2>The <A
-href="http://ndbserver.rutgers.edu/NDB/NDBATLAS/coords/pdb-coord/pr0004.pdb">coordinates
-for the asymmetric unit</A> of this structure are stored in the NDB archive.
-<P>
-<P>
-<H2>Views of PR0004 </H2><A
-href="http://ndbserver.rutgers.edu/NDB/NDBATLAS/P/pr0004/pr0004_1_gif.html">One
-view of PR0004 </A>
-<P><PRE>
-
-</PRE>
-<HR>
-
-<P align=center><I>©1995-1998 The Nucleic Acid Database Project<BR>Rutgers, The
-State University of New Jersey </I></P></BODY></HTML>
View
@@ -1,296 +0,0 @@
-# Copyright 2002 by Katharine Lindner. All rights reserved.
-# This code is part of the Biopython distribution and governed by its
-# license. Please see the LICENSE file that should have been included
-# as part of this package.
-
-"""
-This module provided code to parse HTML files from NDB (DEPRECATED).
-
-This module provides an HTML parser designed for the NDB website
-http://ndbserver.rutgers.edu/ as it was circa 2002. The site has since
-been redesigned, breaking the parser. Bio.Ndb is therefore deprecated,
-and will be removed in a future release of Biopython.
-
-Classes:
-Record Holds NDB sequence data.
-NdbParser Parses NDB sequence data into a Record object.
-
-The algorithm is based on a state machine because the record has multiple
-sections and the handling of tags varies depending on the section.
-Citations have their own state machine.
-"""
-import warnings
-warnings.warn("Bio.Ndb has been deprecated as the NDB website it used to"\
- " parse has been redesigned.", DeprecationWarning)
-
-from types import *
-from Bio import File
-from Bio import Index
-from Bio.Crystal import Hetero
-from Bio.Crystal import Chain
-from Bio.Crystal import Crystal
-from Bio.SeqFeature import Reference
-import urllib
-import sgmllib
-from Bio.ParserSupport import *
-from Bio.SeqFeature import Reference
-
-
-class Record( dict ):
-
- def __init__( self ):
- self[ 'Id' ] = ''
- self[ 'Features' ] = ''
- self[ 'Name' ] = ''
- self[ 'Sequence' ] = Crystal( {} )
- self[ 'Citation' ] = Reference()
- self[ 'Space Group' ] = ''
- self[ 'Cell Constants' ] = {}
- self[ 'Crystallization Conditions' ] = []
- self[ 'Refinement' ] = ''
- self[ 'Coordinates' ] = ''
-
- def __str__( self ):
- keys = self.keys()
- keys.sort()
- out = ''
- for key in keys:
- val = self[ key ]
- if( type( val ) == type( [] ) ):
- out = out + '\n%s\n' % key
- for item in val:
- out = out + '%s\n' % item
-
- elif( type( val ) == type( {} ) ):
- out = out + '\n%s\n' % key
- subkeys = val.keys()
- subkeys.sort()
- for item in subkeys:
- out = out + '%s : %s\n' % ( item, val[ item ] )
- elif( isinstance( val, dict ) ):
- out = out + '\n%s\n' % key
- subkeys = val.keys()
- subkeys.sort()
- for item in subkeys:
- out = out + '%s : %s\n' % ( item, val[ item ] )
-
- else:
- out = out + '%s: %s\n' % ( key, self[ key ] )
- return out
-
-def _parse_constants( text ):
- items = text.split( '=' )
- constants = {}
- key = ''
- for i in range( 0, ( len( items ) - 1 ) ):
- item = items[ i ]
- item = item.strip()
- separator = item.rfind( ' ' )
- if( separator < 0 ):
- separator = 0
- val = item[ :separator ]
- val = val.strip()
- if( key != '' ):
- constants[ key ] = val
- key = item[ separator: ]
- key = key.strip()
- constants[ key ] = items[ -1 ]
- return constants
-
-
-
-
-
-class NdbParser( sgmllib.SGMLParser ):
- """Parses Ndb sequence data into a Record object.
- data available at: http://ndbserver.rutgers.edu/NDB/NDBATLAS/index.html
- """
- def reset(self):
- sgmllib.SGMLParser.reset( self )
- self.ndb_dict = Record()
- self.text = ''
- self._space_group = ''
- self._state = 'id'
- self._reference_state = 'authors'
- self._current_reference = Reference()
-
- def parse(self, handle):
- self.reset()
- self.feed(handle)
- return self.ndb_dict
-
- def feed(self, handle):
- """feed(self, handle )
-
- Feed in ndb data for scanning. handle is a file-like object
- containing ndb data. consumer is a Consumer object that will
- receive events as the ndb data is scanned.
-
- """
- if isinstance(handle, File.UndoHandle):
- uhandle = handle
- else:
- uhandle = File.UndoHandle(handle)
- text = ''
- while 1:
- line = uhandle.readline()
- if( not line ):
- break
- line = line.strip()
- if( line[ -7: ] == '</HTML>' ):
- break
- text = text + ' ' + line
-
- sgmllib.SGMLParser.feed( self, text )
-
-
- def handle_data(self, newtext ):
- newtext = newtext.strip()
- self.text = self.text + newtext
-
- def start_h1( self, attrs ):
- self._flush_text()
-
- def end_h1( self ):
- text = self._flush_text()
- if( self._state == 'id' ):
- cols = text.split( ':' )
- self.ndb_dict[ 'Id' ] = ( cols[ 1 ] ).upper()
- self._state = 'id_found'
-
- def start_h2( self, attrs ):
- text = self._flush_text()
- if( self._state == 'features' ):
- self.ndb_dict[ 'Features' ] = text
- elif( self._state == 'name' ):
- self.ndb_dict[ 'Name' ] = text
- elif( self._state == 'sequence' ):
- pass
- elif( self._state == 'citation' ):
- if( self._reference_state == 'journal' ):
- self._current_reference.journal = text
- self.ndb_dict[ 'Citation' ] = self._current_reference
- elif( self._state == 'space' ):
- self._space_group = self._space_group + text
- self.ndb_dict[ 'Space Group' ] = self._space_group
- elif( self._state == 'constants' ):
- self.ndb_dict[ 'Cell Constants' ] = _parse_constants( text )
- elif( self._state == 'crystallization' ):
- pass
- elif( self._state == 'refinement' ):
- self.ndb_dict[ 'Refinement' ] = text
- elif( self._state == 'coordinates' ):
- self.ndb_dict[ 'Coordinates' ] = text
-
- def end_h2( self ):
- text = self._flush_text()
- text = text.lower()
- if( self._state == 'id' ):
- if( text.find( 'id' ) >= 0 ):
- cols = text.split( ':' )
- self.ndb_dict[ 'Id' ] = ( cols[ 1 ] ).upper()
- self._state = 'id_found'
- elif( text.find( 'feature' ) >= 0 ):
- self._state = 'features'
- elif( text.find( 'name' ) >= 0 ):
- self._state = 'name'
- elif( text.find( 'sequence' ) >= 0 ):
- self._state = 'sequence'
- elif( text.find( 'citation' ) >= 0 ):
- self._state = 'citation'
- elif( text.find( 'space' ) >= 0 ):
- self._state = 'space'
- elif( text.find( 'constants' ) >= 0 ):
- self._state = 'constants'
- elif( text.find( 'crystallization' ) >= 0 ):
- self._state = 'crystallization'
- elif( text.find( 'refinement' ) >= 0 ):
- self._state = 'refinement'
- elif( text.find( 'coordinates' ) >= 0 ):
- self._state = 'coordinates'
-
-
- def start_ul( self, attrs ):
- if( self._state == 'sequence' ):
- self._flush_text()
-
- elif( self._state == 'crystallization' ):
- self._flush_text()
-
- def end_ul( self ):
- if( self._state == 'sequence' ):
- self._parse_chain()
- elif( self._state == 'crystallization' ):
- text = self._flush_text()
- ( self.ndb_dict[ 'Crystallization Conditions' ] ).append( text )
- elif( self._state == 'citation' ):
- if( self._reference_state == 'journal' ):
- self._current_reference.journal = self._flush_text()
- self._reference_state = 'done'
-
- def start_sub( self, attrs ):
- if( self._state == 'space' ):
- self._space_group = self._space_group + self._flush_text()
-
- def end_sub( self ):
- if( self._state == 'space' ):
- self._space_group = self._space_group + '(%s) ' % self._flush_text()
-
- def start_li( self, attrs ):
- if( self._state == 'sequence' ):
- self._parse_chain()
- elif( self._state == 'crystallization' ):
- text = self._flush_text()
- ( self.ndb_dict[ 'Crystallization Conditions' ] ).append( text )
-
- def end_li( self ):
- if( self._state == 'sequence' ):
- self._parse_chain()
- elif( self._state == 'crystallization' ):
- text = self._flush_text()
- ( self.ndb_dict[ 'Crystallization Conditions' ] ).append( text )
-
- def do_br( self, attrs ):
- if( self._state == 'citation' ):
- if( self._reference_state == 'authors' ):
- self._current_reference.authors = self._flush_text()
- self._reference_state = 'title'
- elif( self._reference_state == 'title' ):
- self._current_reference.title = self._flush_text()
- self._reference_state = 'journal'
-
- def start_i( self, attrs ):
- pass
-
- def end_i( self ):
- if( self._state == 'references' ):
- if( self._reference_state == 'title' ):
- text = self._flush_text()
- self._current_reference.title = text
- self._reference_state = 'journal'
-
-
- def _parse_chain( self ):
- text = self._flush_text()
- text = text.strip()
- if( text.lower().startswith( 'chain' ) ):
- fields = text.split( ':' )
- words = fields[ 0 ].split()
- key = words[ 1 ]
- val = fields[ 1 ]
- self.ndb_dict[ 'Sequence' ][ key ] = val
-
-
-
- def _flush_text( self ):
- text = self.text.strip()
- self.text = ''
- return text[:]
-
-
-if( __name__ == '__main__' ):
- handle = open( 'PR0004.htm')
- undo_handle = File.UndoHandle( handle )
- ndb_parser = NdbParser()
- record = ndb_parser.parse( handle )
- print str( record )
View
@@ -95,6 +95,7 @@ API).
Bio.Ndb
=======
Deprecated in Release 1.49, as the website this parsed has been redesigned.
+Removed in Release 1.53.
Martel
======
View
@@ -229,7 +229,6 @@ def is_Numpy_installed():
'Bio.Motif',
'Bio.Motif.Parsers',
'Bio.Motif.Applications',
- 'Bio.Ndb',
'Bio.NeuralNetwork',
'Bio.NeuralNetwork.BackPropagation',
'Bio.NeuralNetwork.Gene',

0 comments on commit 3972bd6

Please sign in to comment.