Skip to content

Commit

Permalink
flake8 lib/galaxy_utils/
Browse files Browse the repository at this point in the history
  • Loading branch information
nsoranzo committed Aug 14, 2015
1 parent 4459e94 commit 1383ced
Show file tree
Hide file tree
Showing 6 changed files with 240 additions and 109 deletions.
1 change: 0 additions & 1 deletion .ci/flake8_blacklist.txt
Expand Up @@ -8,7 +8,6 @@ lib/galaxy/model/migrate/versions/
lib/galaxy/model/tool_shed_install/migrate/versions/
lib/galaxy/util/pastescript/serve.py
lib/galaxy/webapps/tool_shed/model/migrate/versions/
lib/galaxy_utils/
lib/pkg_resources.py
lib/tool_shed/
scripts/api/
Expand Down
36 changes: 25 additions & 11 deletions lib/galaxy_utils/sequence/fasta.py
@@ -1,22 +1,28 @@
#Dan Blankenberg
# Dan Blankenberg


class fastaSequence( object ):
def __init__( self ):
self.identifier = None
self.sequence = '' #holds raw sequence string: no whitespace
self.sequence = '' # holds raw sequence string: no whitespace

def __len__( self ):
return len( self.sequence )

def __str__( self ):
return "%s\n%s\n" % ( self.identifier, self.sequence )


class fastaReader( object ):
def __init__( self, fh ):
self.file = fh

def close( self ):
return self.file.close()

def next( self ):
line = self.file.readline()
#remove header comment lines
# remove header comment lines
while line and line.startswith( '#' ):
line = self.file.readline()
if not line:
Expand All @@ -29,29 +35,33 @@ def next( self ):
line = self.file.readline()
if not line or line.startswith( '>' ):
if line:
self.file.seek( offset ) #this causes sequence id lines to be read twice, once to determine previous sequence end and again when getting actual sequence; can we cache this to prevent it from being re-read?
self.file.seek( offset ) # this causes sequence id lines to be read twice, once to determine previous sequence end and again when getting actual sequence; can we cache this to prevent it from being re-read?
return rval
#454 qual test data that was used has decimal scores that don't have trailing spaces
#so we'll need to parse and build these sequences not based upon de facto standards
#i.e. in a less than ideal fashion
# 454 qual test data that was used has decimal scores that don't have trailing spaces
# so we'll need to parse and build these sequences not based upon de facto standards
# i.e. in a less than ideal fashion
line = line.rstrip()
if ' ' in rval.sequence or ' ' in line:
rval.sequence = "%s%s " % ( rval.sequence, line )
else:
rval.sequence += line
offset = self.file.tell()

def __iter__( self ):
while True:
yield self.next()


class fastaNamedReader( object ):
def __init__( self, fh ):
self.file = fh
self.reader = fastaReader( self.file )
self.offset_dict = {}
self.eof = False

def close( self ):
return self.file.close()

def get( self, sequence_id ):
if not isinstance( sequence_id, basestring ):
sequence_id = sequence_id.identifier
Expand All @@ -71,7 +81,7 @@ def get( self, sequence_id ):
fasta_seq = self.reader.next()
except StopIteration:
self.eof = True
break #eof, id not found, will return None
break # eof, id not found, will return None
if fasta_seq.identifier == sequence_id:
rval = fasta_seq
break
Expand All @@ -80,8 +90,9 @@ def get( self, sequence_id ):
self.offset_dict[ fasta_seq.identifier ] = []
self.offset_dict[ fasta_seq.identifier ].append( offset )
return rval

def has_data( self ):
#returns a string representation of remaining data, or empty string (False) if no data remaining
# returns a string representation of remaining data, or empty string (False) if no data remaining
eof = self.eof
count = 0
rval = ''
Expand All @@ -90,7 +101,7 @@ def has_data( self ):
if not eof:
offset = self.file.tell()
try:
fasta_seq = self.reader.next()
self.reader.next()
except StopIteration:
eof = True
self.file.seek( offset )
Expand All @@ -100,11 +111,14 @@ def has_data( self ):
rval = "%s%s" % ( rval, "An additional unknown number of sequences exist in the input that were not utilized." )
return rval


class fastaWriter( object ):
def __init__( self, fh ):
self.file = fh

def write( self, fastq_read ):
#this will include color space adapter base if applicable
# this will include color space adapter base if applicable
self.file.write( ">%s\n%s\n" % ( fastq_read.identifier[1:], fastq_read.sequence ) )

def close( self ):
return self.file.close()

0 comments on commit 1383ced

Please sign in to comment.