Merge pull request #2946 from nsoranzo/binascii_reduction

Remove unnecessary use of binascii
galaxyproject · Sep 21, 2016 · c81ec07 · c81ec07
2 parents b30d331 + 36b514c
commit c81ec07
Show file tree

Hide file tree

Showing 4 changed files with 41 additions and 43 deletions.
diff --git a/.ci/flake8_lint_include_list.txt b/.ci/flake8_lint_include_list.txt
@@ -13,6 +13,7 @@ lib/galaxy/dataset_collections/structure.py
 lib/galaxy/dataset_collections/subcollections.py
 lib/galaxy/dataset_collections/type_description.py
 lib/galaxy/datatypes/assembly.py
+lib/galaxy/datatypes/binary.py
 lib/galaxy/datatypes/constructive_solid_geometry.py
 lib/galaxy/datatypes/converters/bcf_bgzip_to_bcf_converter.py
 lib/galaxy/datatypes/converters/bcf_to_bcf_bgzip_converter.py
@@ -39,6 +40,7 @@ lib/galaxy/datatypes/converters/maf_to_interval_converter.py
 lib/galaxy/datatypes/converters/pbed_to_lped_converter.py
 lib/galaxy/datatypes/converters/picard_interval_list_to_bed6_converter.py
 lib/galaxy/datatypes/converters/pileup_to_interval_index_converter.py
+lib/galaxy/datatypes/converters/tabular_to_dbnsfp.py
 lib/galaxy/datatypes/converters/vcf_to_vcf_bgzip.py
 lib/galaxy/datatypes/converters/wiggle_to_array_tree_converter.py
 lib/galaxy/datatypes/coverage.py

diff --git a/lib/galaxy/datatypes/binary.py b/lib/galaxy/datatypes/binary.py
@@ -12,12 +12,11 @@
 import zipfile
 
 import pysam
-
 from bx.seq.twobit import TWOBIT_MAGIC_NUMBER, TWOBIT_MAGIC_NUMBER_SWAP, TWOBIT_MAGIC_SIZE
 
-from galaxy.datatypes.metadata import MetadataElement, MetadataParameter, ListParameter, DictParameter
 from galaxy.datatypes import metadata
-from galaxy.util import nice_size, sqlite, which, FILENAME_VALID_CHARS
+from galaxy.datatypes.metadata import DictParameter, ListParameter, MetadataElement, MetadataParameter
+from galaxy.util import FILENAME_VALID_CHARS, nice_size, sqlite, which
 from . import data, dataproviders
 
 
@@ -115,8 +114,8 @@ class Idat( Binary ):
 
     def sniff( self, filename ):
         try:
-            header = open( filename ).read(4)
-            if binascii.b2a_hex( header ) == binascii.hexlify( 'IDAT' ):
+            header = open( filename, 'rb' ).read(4)
+            if header == b'IDAT':
                 return True
             return False
         except:
@@ -398,7 +397,7 @@ def sniff( self, filename ):
         # The first 4 bytes of any bam file is 'BAM\1', and the file is binary.
         try:
             header = gzip.open( filename ).read(4)
-            if binascii.b2a_hex( header ) == binascii.hexlify( 'BAM\1' ):
+            if header == b'BAM\1':
                 return True
             return False
         except:
@@ -523,9 +522,9 @@ def set_meta( self, dataset, overwrite=True, **kwd ):
 
     def get_cram_version( self, filename):
         try:
-            with open( filename, "r") as fh:
+            with open( filename, "rb") as fh:
                 header = fh.read(6)
-                return ord( header[4] ), ord( header[5] )
+            return ord( header[4] ), ord( header[5] )
         except Exception as exc:
             log.warning( '%s, get_cram_version Exception: %s', self, exc )
             return -1, -1
@@ -564,8 +563,8 @@ def set_peek( self, dataset, is_multi_byte=False ):
 
     def sniff( self, filename ):
         try:
-            header = open( filename ).read(4)
-            if header[0:4] == "CRAM":
+            header = open( filename, 'rb' ).read(4)
+            if header == b"CRAM":
                 return True
             return False
         except:
@@ -587,7 +586,7 @@ def sniff( self, filename ):
         # The first 3 bytes of any bcf file is 'BCF', and the file is binary.
         try:
             header = gzip.open( filename ).read(3)
-            if binascii.b2a_hex( header ) == binascii.hexlify( 'BCF' ):
+            if header == b'BCF':
                 return True
             return False
         except:
@@ -646,7 +645,7 @@ def __init__( self, **kwd ):
     def sniff( self, filename ):
         # The first 8 bytes of any hdf5 file are 0x894844460d0a1a0a
         try:
-            header = open( filename ).read(8)
+            header = open( filename, 'rb' ).read(8)
             if header == self._magic:
                 return True
             return False
@@ -703,8 +702,8 @@ def sniff( self, filename ):
         # The first 4 bytes of any sff file is '.sff', and the file is binary. For details
         # about the format, see http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=show&f=formats&m=doc&s=format
         try:
-            header = open( filename ).read(4)
-            if binascii.b2a_hex( header ) == binascii.hexlify( '.sff' ):
+            header = open( filename, 'rb' ).read(4)
+            if header == b'.sff':
                 return True
             return False
         except:
@@ -748,7 +747,7 @@ def _unpack( self, pattern, handle ):
 
     def sniff( self, filename ):
         try:
-            magic = self._unpack( "I", open( filename ) )
+            magic = self._unpack( "I", open( filename, 'rb' ) )
             return magic[0] == self._magic
         except:
             return False
@@ -795,8 +794,8 @@ def sniff(self, filename):
             # All twobit files start with a 16-byte header. If the file is smaller than 16 bytes, it's obviously not a valid twobit file.
             if os.path.getsize(filename) < 16:
                 return False
-            input = open(filename)
-            magic = struct.unpack(">L", input.read(TWOBIT_MAGIC_SIZE))[0]
+            header = open(filename, 'rb').read(TWOBIT_MAGIC_SIZE)
+            magic = struct.unpack(">L", header)[0]
             if magic == TWOBIT_MAGIC_NUMBER or magic == TWOBIT_MAGIC_NUMBER_SWAP:
                 return True
         except IOError:
@@ -864,8 +863,8 @@ def sniff( self, filename ):
         # The first 16 bytes of any SQLite3 database file is 'SQLite format 3\0', and the file is binary. For details
         # about the format, see http://www.sqlite.org/fileformat.html
         try:
-            header = open(filename).read(16)
-            if binascii.b2a_hex(header) == binascii.hexlify('SQLite format 3\0'):
+            header = open(filename, 'rb').read(16)
+            if header == b'SQLite format 3\0':
                 return True
             return False
         except:
@@ -1074,8 +1073,8 @@ def sniff( self, filename ):
         For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure
         """
         try:
-            header = open(filename).read(8)
-            if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'):
+            header = open(filename, 'rb').read(8)
+            if header == b'NCBI.sra':
                 return True
             else:
                 return False
@@ -1104,14 +1103,14 @@ class RData( Binary ):
     file_ext = 'RData'
 
     def sniff( self, filename ):
-        rdata_header = binascii.hexlify('RDX2\nX\n')
+        rdata_header = b'RDX2\nX\n'
         try:
-            header = open(filename).read(7)
-            if binascii.b2a_hex(header) == rdata_header:
+            header = open(filename, 'rb').read(7)
+            if header == rdata_header:
                 return True
 
             header = gzip.open( filename ).read(7)
-            if binascii.b2a_hex(header) == rdata_header:
+            if header == rdata_header:
                 return True
         except:
             return False
@@ -1124,12 +1123,12 @@ class OxliBinary(Binary):
     @staticmethod
     def _sniff(filename, oxlitype):
         try:
-            with open(filename) as fileobj:
+            with open(filename, 'rb') as fileobj:
                 header = fileobj.read(4)
-                if binascii.b2a_hex(header) == binascii.hexlify('OXLI'):
+                if header == b'OXLI':
                     fileobj.read(1)  # skip the version number
                     ftype = fileobj.read(1)
-                    if binascii.b2a_hex(ftype) == oxlitype:
+                    if binascii.hexlify(ftype) == oxlitype:
                         return True
             return False
         except IOError:
@@ -1154,7 +1153,7 @@ class OxliCountGraph(OxliBinary):
     """
 
     def sniff(self, filename):
-        return OxliBinary._sniff(filename, "01")
+        return OxliBinary._sniff(filename, b"01")
 
 Binary.register_sniffable_binary_format("oxli.countgraph", "oxlicg",
                                         OxliCountGraph)
@@ -1178,7 +1177,7 @@ class OxliNodeGraph(OxliBinary):
     """
 
     def sniff(self, filename):
-        return OxliBinary._sniff(filename, "02")
+        return OxliBinary._sniff(filename, b"02")
 
 Binary.register_sniffable_binary_format("oxli.nodegraph", "oxling",
                                         OxliNodeGraph)
@@ -1203,7 +1202,7 @@ class OxliTagSet(OxliBinary):
     """
 
     def sniff(self, filename):
-        return OxliBinary._sniff(filename, "03")
+        return OxliBinary._sniff(filename, b"03")
 
 Binary.register_sniffable_binary_format("oxli.tagset", "oxlits", OxliTagSet)
 
@@ -1224,7 +1223,7 @@ class OxliStopTags(OxliBinary):
     """
 
     def sniff(self, filename):
-        return OxliBinary._sniff(filename, "04")
+        return OxliBinary._sniff(filename, b"04")
 
 Binary.register_sniffable_binary_format("oxli.stoptags", "oxlist",
                                         OxliStopTags)
@@ -1249,7 +1248,7 @@ class OxliSubset(OxliBinary):
     """
 
     def sniff(self, filename):
-        return OxliBinary._sniff(filename, "05")
+        return OxliBinary._sniff(filename, b"05")
 
 Binary.register_sniffable_binary_format("oxli.subset", "oxliss", OxliSubset)
 
@@ -1273,7 +1272,7 @@ class OxliGraphLabels(OxliBinary):
     """
 
     def sniff(self, filename):
-        return OxliBinary._sniff(filename, "06")
+        return OxliBinary._sniff(filename, b"06")
 
 Binary.register_sniffable_binary_format("oxli.graphlabels", "oxligl",
                                         OxliGraphLabels)
@@ -1354,9 +1353,9 @@ def display_peek( self, dataset ):
 
     def sniff( self, filename ):
         try:
-            with open( filename, 'r' ) as f:
+            with open( filename, 'rb' ) as f:
                 header = f.read(3)
-            if binascii.b2a_hex( header ) == binascii.hexlify( 'CDF' ):
+            if header == b'CDF':
                 return True
             return False
         except:

diff --git a/lib/galaxy/datatypes/proteomics.py b/lib/galaxy/datatypes/proteomics.py
@@ -1,7 +1,6 @@
 """
 Proteomics Datatypes
 """
-import binascii
 import logging
 import re
 
@@ -272,10 +271,9 @@ def sniff(self, filename):
         # This combination represents 17 bytes, but to play safe we read 20 bytes from
         # the start of the file.
         try:
-            header = open(filename).read(20)
-            hexheader = binascii.b2a_hex(header)
-            finnigan = binascii.hexlify('F\0i\0n\0n\0i\0g\0a\0n')
-            if hexheader.find(finnigan) != -1:
+            header = open(filename, 'rb').read(20)
+            finnigan = b'F\0i\0n\0n\0i\0g\0a\0n'
+            if header.find(finnigan) != -1:
                 return True
             return False
         except:

diff --git a/lib/galaxy/util/checkers.py b/lib/galaxy/util/checkers.py
@@ -1,4 +1,3 @@
-import binascii
 import bz2
 import gzip
 import imghdr
@@ -95,7 +94,7 @@ def check_gzip( file_path ):
     # for sff format.
     try:
         header = gzip.open( file_path ).read(4)
-        if binascii.b2a_hex( header ) == binascii.hexlify( '.sff' ):
+        if header == b'.sff':
             return ( True, True )
     except:
         return( False, False )