Skip to content

Commit

Permalink
*** empty log message ***
Browse files Browse the repository at this point in the history
  • Loading branch information
katel committed Feb 28, 2001
1 parent 0ac9a8d commit 2c1cd66
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 110 deletions.
141 changes: 83 additions & 58 deletions Bio/UniGene/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import Bio.File
import Martel
from mx import TextTools
import unigene_format



Expand All @@ -16,10 +15,10 @@ def reset( self ):
sgmllib.SGMLParser.reset( self )
self.text = ''
self.queue = UserDict.UserDict()
self.taglist = []
self.tag = 'html'
self.nextkey = ''
self.table = ''
self.open_tag_stack = []
self.open_tag = 'open_html'
self.key_waiting = ''
self.master_key = ''
self.context = 'general_info'

def parse( self, handle ):
Expand All @@ -43,9 +42,11 @@ def feed( self, handle ):
text = ''
while 1:
line = uhandle.readline()
if( string.strip( line ) == '' ):
line = string.strip( line )
if( line == '' ):
break
text = text + line
text = text + ' ' + line

sgmllib.SGMLParser.feed( self, text )


Expand All @@ -56,122 +57,146 @@ def handle_data(self, newtext ):

def start_a( self, attrs ):
if( self.context == 'seq_info' ):
self.text = ''
if( self.open_tag != 'open_b' ):
self.text = ''

# self.queue.append( attrs )

def end_a( self ):
if( self.context == 'seq_info' ):
self.nextkey = self.text
self.text = ''
if( self.open_tag != 'open_b' ):
if( self.key_waiting == '' ):
self.key_waiting = self.text
self.text = ''

def start_b( self, attrs ):

self.taglist.append( self.tag )
self.tag = 'label'
self.text = ''
self.open_tag_stack.append( self.open_tag )
self.open_tag = 'open_b'
if( self.key_waiting == '' ):
self.text = ''

def end_b( self ):
key = string.strip( self.text )
if( self.text[ :15 ] == 'UniGene Cluster' ):
self.queue[ 'UniGene Cluster' ] = self.text[ 16: ]
self.text = ''
elif( self.key_waiting == '' ):
self.extract_key()

def extract_key( self ):
text = string.strip( self.text )
key = string.join( string.split( text ) )
words = string.split( key )
key = string.join( words[ :2 ] )
self.text = ''

try:
self.tag = self.taglist.pop()
self.open_tag = self.open_tag_stack.pop()
except:
self.tag = 'html'
if( self.tag == 'table_data' ):
self.open_tag = 'open_html'
if( self.open_tag == 'open_table_data' ):
if( self.context == 'general_info' ):
self.nextkey = key
self.text = ''
if( self.key_waiting == '' ):
self.key_waiting = key
self.text = ''
elif( self.context == 'seq_info' ):
if( key == 'Key to Symbols' ):
if( text == 'Key to Symbols' ):
self.context = 'legend'
self.table = key
self.master_key = key
elif( self.context == 'general_info' ):
self.table = key
self.master_key = key
if( string.find( key, 'SEQUENCE' ) != -1 ):
self.context = 'seq_info'
self.queue[ key ] = UserDict.UserDict()
elif( self.context == 'seq_info' ):
self.queue[ key ] = UserDict.UserDict()
self.table = key
self.master_key = key



def start_table( self, attrs ):
self.taglist.append( self.tag )
self.tag = 'table'
self.open_tag_stack.append( self.open_tag )
self.open_tag = 'open_table'

def end_table( self ):
try:
self.tag = self.taglist.pop()
self.open_tag = self.open_tag_stack.pop()
except:
self.tag = 'html'
self.open_tag = 'open_html'
self.key_waiting = ''

def start_tr( self, attrs ):
self.taglist.append( self.tag )
self.tag = 'table_row'
self.open_tag_stack.append( self.open_tag )
self.open_tag = 'open_table_row'
self.text = ''

def end_tr( self ):
try:
self.tag = self.taglist.pop()
self.open_tag = self.open_tag_stack.pop()
except:
self.tag = 'html'
self.open_tag = 'open_html'
text = self.text
self.text = ''
if( text[ 0 ] == ':' ):
text = text[ 1: ]
if( self.context == 'general_info' ):
self.queue[ self.table ][ self.nextkey ] = text
elif( self.context == 'seq_info' ):
self.queue[ self.table ][ self.nextkey ] = text
if text:
self.text = ''
if( text[ 0 ] == ':' ):
text = text[ 1: ]
text = string.join( string.split( text ) )
if( ( self.context == 'general_info' ) or \
( self.context == 'seq_info' ) ):
try:
contents = self.queue[ self.master_key ][ self.key_waiting ]
if( type( contents ) == type( [] ) ):
contents.append( text )
else:
self.queue[ self.master_key ][ self.key_waiting ] = \
[ contents , text ]
except:
self.queue[ self.master_key ][ self.key_waiting ] = text


self.key_waiting = ''



def start_td( self, attrs ):
self.taglist.append( self.tag )
self.tag = 'table_data'
self.open_tag_stack.append( self.open_tag )
self.open_tag = 'open_table_data'

def end_td( self ):
try:
self.tag = self.taglist.pop()
self.open_tag = self.open_tag_stack.pop()
except:
self.tag = 'html'
self.open_tag = 'open_html'
if( self.context == 'seq_info' ):
self.text = self.text + ' '

def print_item( self, item ):
def print_item( self, item, level = 1 ):
indent = ' '
for j in range( 0, level ):
indent = indent + ' '
if( type( item ) == type( '' ) ):
if( item != '' ):
print item
elif( type( item ) == type( [] ) ):
print '%s%s' % ( indent, item )
elif( type( item ) == type([])):
for subitem in item:
self.print_item( subitem )
elif( type( item ) == type( {} ) ):
self.print_item( subitem, level + 1 )
elif( isinstance( item, UserDict.UserDict ) ):
for subitem in item.keys():
self.print_item( subitem )
self.print_item( item[ subitem ] )
print '%skey is %s' % ( indent, subitem )
self.print_item( item[ subitem ], level + 1 )
else:
print item

def print_tags( self ):
print '\nTAGS\n'
for key in self.queue.keys():
print key
print 'key %s' % key
self.print_item( self.queue[ key ] )

def join_tags( self ):
self.data = '\n'.join( self.queue ) + '\n'
print self.data


if( __name__ == '__main__' ):
handle = urllib.urlopen( 'http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=Hs&CID=222015&OPT=text')
handle = open( 'Hs13225.htm')
undo_handle = Bio.File.UndoHandle( handle )
unigene_parser = UniGeneParser()
unigene_parser.parse( handle )
unigene_parser.print_tags()
# unigene_parser.print_data()


65 changes: 13 additions & 52 deletions Tests/output/test_unigene
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
test_unigene
Warning: can't open .\output\test_unigene
testing Bt145.htm
key EXPRESSION INFORMATION
key is Note
Expand Down Expand Up @@ -91,61 +90,24 @@ key EST SEQUENCES
cDNA clone IMAGE:574684 5' read


testing Dr48.htm
testing Rn35.htm
key EXPRESSION INFORMATION
key is cDNA sources
heart, kidney
Whole embryo
key UniGene Cluster
Rn.35
key SEE ALSO
key is LocusLink
30675
key is HomoloGene
Dr.48
key MAPPING INFORMATION
key is Chromosome
LG 10
key mRNA/GENE SEQUENCES
key is L77146
Danio rerio heat shock cognate (hsc70) mRNA, complete cds
key is Y11413
D.rerio hsc70 mRNA
key UniGene Cluster
Dr.48
Rn.35
key EST SEQUENCES
key is AI313707
cDNA clone ZAE39 3' read
key is AW116799
cDNA clone 2601688 3' read
key is AI331807
cDNA clone (no-name) 3' read
key is AI385065
cDNA clone (no-name) 3' read
key is AW232778
cDNA clone (no-name) 3' read
key is AI667629
cDNA clone 2351498 3' read
key is AW077180
cDNA clone (no-name) 3' read
key is AA658601
cDNA clone zbr1406 kidney 3' read
key is AW233742
cDNA clone (no-name) 3' read
key is AI330589
cDNA clone (no-name) 3' read
key SELECTED MODEL
key is M. musculus
SP:P08109- HS7C_MOUSE HEAT SHOCK COGNATE 71 KDA PROTEIN93 % / 648 aa
key is C. elegans
PID:g3924750- similar to HSP-1 heat shock 70kd protein A83 % / 648 aa
key is D. melanogaster
SP:P11147- HS7D_DROME HEAT SHOCK 70 KDA PROTEIN COGNATE 483 % / 648 aa
key is S. cerevisiae
PID:g349747- Ssa1p: Heat shock protein of HSP70 family73 % / 648 aa
key is E. coli
PID:g1786196- chaperone Hsp7050 % / 566 aa
key is R. norvegicus
SP:P08109- HS7C_MOUSE HEAT SHOCK COGNATE 71 KDA PROTEIN93 % / 648 aa
key is H. sapiens
SP:P11142- HS7C_HUMAN HEAT SHOCK COGNATE 71 KD PROTEIN93 % / 648 aa
key is AA859652
cDNA clone UI-R-E0-bs-b-06-0-UI Whole embryo 3' read
key is BF547995
cDNA clone UI-R-E0-bs-b-06-0-UI Whole embryo 5' read
key is BE098081
cDNA clone UI-R-BJ1-asw-f-07-0-UI 3' read
key is BE101172
cDNA clone UI-R-BJ1-auh-h-09-0-UI 3' read


testing Rn20.htm
Expand Down Expand Up @@ -333,4 +295,3 @@ key EST SEQUENCES
cDNA clone IMAGE:223702 Eye 5' read 2.6 kb


1 test OK.

0 comments on commit 2c1cd66

Please sign in to comment.