diff --git a/.ci/flake8_lint_include_list.txt b/.ci/flake8_lint_include_list.txt index 85461e4d816e..9d1bebdee5a5 100644 --- a/.ci/flake8_lint_include_list.txt +++ b/.ci/flake8_lint_include_list.txt @@ -13,6 +13,7 @@ lib/galaxy/dataset_collections/structure.py lib/galaxy/dataset_collections/subcollections.py lib/galaxy/dataset_collections/type_description.py lib/galaxy/datatypes/assembly.py +lib/galaxy/datatypes/binary.py lib/galaxy/datatypes/constructive_solid_geometry.py lib/galaxy/datatypes/converters/bcf_bgzip_to_bcf_converter.py lib/galaxy/datatypes/converters/bcf_to_bcf_bgzip_converter.py @@ -39,6 +40,7 @@ lib/galaxy/datatypes/converters/maf_to_interval_converter.py lib/galaxy/datatypes/converters/pbed_to_lped_converter.py lib/galaxy/datatypes/converters/picard_interval_list_to_bed6_converter.py lib/galaxy/datatypes/converters/pileup_to_interval_index_converter.py +lib/galaxy/datatypes/converters/tabular_to_dbnsfp.py lib/galaxy/datatypes/converters/vcf_to_vcf_bgzip.py lib/galaxy/datatypes/converters/wiggle_to_array_tree_converter.py lib/galaxy/datatypes/coverage.py diff --git a/lib/galaxy/datatypes/binary.py b/lib/galaxy/datatypes/binary.py index a1927c1ac54c..17a649877ecf 100644 --- a/lib/galaxy/datatypes/binary.py +++ b/lib/galaxy/datatypes/binary.py @@ -12,12 +12,11 @@ import zipfile import pysam - from bx.seq.twobit import TWOBIT_MAGIC_NUMBER, TWOBIT_MAGIC_NUMBER_SWAP, TWOBIT_MAGIC_SIZE -from galaxy.datatypes.metadata import MetadataElement, MetadataParameter, ListParameter, DictParameter from galaxy.datatypes import metadata -from galaxy.util import nice_size, sqlite, which, FILENAME_VALID_CHARS +from galaxy.datatypes.metadata import DictParameter, ListParameter, MetadataElement, MetadataParameter +from galaxy.util import FILENAME_VALID_CHARS, nice_size, sqlite, which from . import data, dataproviders @@ -115,8 +114,8 @@ class Idat( Binary ): def sniff( self, filename ): try: - header = open( filename ).read(4) - if binascii.b2a_hex( header ) == binascii.hexlify( 'IDAT' ): + header = open( filename, 'rb' ).read(4) + if header == b'IDAT': return True return False except: @@ -398,7 +397,7 @@ def sniff( self, filename ): # The first 4 bytes of any bam file is 'BAM\1', and the file is binary. try: header = gzip.open( filename ).read(4) - if binascii.b2a_hex( header ) == binascii.hexlify( 'BAM\1' ): + if header == b'BAM\1': return True return False except: @@ -523,9 +522,9 @@ def set_meta( self, dataset, overwrite=True, **kwd ): def get_cram_version( self, filename): try: - with open( filename, "r") as fh: + with open( filename, "rb") as fh: header = fh.read(6) - return ord( header[4] ), ord( header[5] ) + return ord( header[4] ), ord( header[5] ) except Exception as exc: log.warning( '%s, get_cram_version Exception: %s', self, exc ) return -1, -1 @@ -564,8 +563,8 @@ def set_peek( self, dataset, is_multi_byte=False ): def sniff( self, filename ): try: - header = open( filename ).read(4) - if header[0:4] == "CRAM": + header = open( filename, 'rb' ).read(4) + if header == b"CRAM": return True return False except: @@ -587,7 +586,7 @@ def sniff( self, filename ): # The first 3 bytes of any bcf file is 'BCF', and the file is binary. try: header = gzip.open( filename ).read(3) - if binascii.b2a_hex( header ) == binascii.hexlify( 'BCF' ): + if header == b'BCF': return True return False except: @@ -646,7 +645,7 @@ def __init__( self, **kwd ): def sniff( self, filename ): # The first 8 bytes of any hdf5 file are 0x894844460d0a1a0a try: - header = open( filename ).read(8) + header = open( filename, 'rb' ).read(8) if header == self._magic: return True return False @@ -703,8 +702,8 @@ def sniff( self, filename ): # The first 4 bytes of any sff file is '.sff', and the file is binary. For details # about the format, see http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=show&f=formats&m=doc&s=format try: - header = open( filename ).read(4) - if binascii.b2a_hex( header ) == binascii.hexlify( '.sff' ): + header = open( filename, 'rb' ).read(4) + if header == b'.sff': return True return False except: @@ -748,7 +747,7 @@ def _unpack( self, pattern, handle ): def sniff( self, filename ): try: - magic = self._unpack( "I", open( filename ) ) + magic = self._unpack( "I", open( filename, 'rb' ) ) return magic[0] == self._magic except: return False @@ -795,8 +794,8 @@ def sniff(self, filename): # All twobit files start with a 16-byte header. If the file is smaller than 16 bytes, it's obviously not a valid twobit file. if os.path.getsize(filename) < 16: return False - input = open(filename) - magic = struct.unpack(">L", input.read(TWOBIT_MAGIC_SIZE))[0] + header = open(filename, 'rb').read(TWOBIT_MAGIC_SIZE) + magic = struct.unpack(">L", header)[0] if magic == TWOBIT_MAGIC_NUMBER or magic == TWOBIT_MAGIC_NUMBER_SWAP: return True except IOError: @@ -864,8 +863,8 @@ def sniff( self, filename ): # The first 16 bytes of any SQLite3 database file is 'SQLite format 3\0', and the file is binary. For details # about the format, see http://www.sqlite.org/fileformat.html try: - header = open(filename).read(16) - if binascii.b2a_hex(header) == binascii.hexlify('SQLite format 3\0'): + header = open(filename, 'rb').read(16) + if header == b'SQLite format 3\0': return True return False except: @@ -1074,8 +1073,8 @@ def sniff( self, filename ): For details about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure """ try: - header = open(filename).read(8) - if binascii.b2a_hex(header) == binascii.hexlify('NCBI.sra'): + header = open(filename, 'rb').read(8) + if header == b'NCBI.sra': return True else: return False @@ -1104,14 +1103,14 @@ class RData( Binary ): file_ext = 'RData' def sniff( self, filename ): - rdata_header = binascii.hexlify('RDX2\nX\n') + rdata_header = b'RDX2\nX\n' try: - header = open(filename).read(7) - if binascii.b2a_hex(header) == rdata_header: + header = open(filename, 'rb').read(7) + if header == rdata_header: return True header = gzip.open( filename ).read(7) - if binascii.b2a_hex(header) == rdata_header: + if header == rdata_header: return True except: return False @@ -1124,12 +1123,12 @@ class OxliBinary(Binary): @staticmethod def _sniff(filename, oxlitype): try: - with open(filename) as fileobj: + with open(filename, 'rb') as fileobj: header = fileobj.read(4) - if binascii.b2a_hex(header) == binascii.hexlify('OXLI'): + if header == b'OXLI': fileobj.read(1) # skip the version number ftype = fileobj.read(1) - if binascii.b2a_hex(ftype) == oxlitype: + if binascii.hexlify(ftype) == oxlitype: return True return False except IOError: @@ -1154,7 +1153,7 @@ class OxliCountGraph(OxliBinary): """ def sniff(self, filename): - return OxliBinary._sniff(filename, "01") + return OxliBinary._sniff(filename, b"01") Binary.register_sniffable_binary_format("oxli.countgraph", "oxlicg", OxliCountGraph) @@ -1178,7 +1177,7 @@ class OxliNodeGraph(OxliBinary): """ def sniff(self, filename): - return OxliBinary._sniff(filename, "02") + return OxliBinary._sniff(filename, b"02") Binary.register_sniffable_binary_format("oxli.nodegraph", "oxling", OxliNodeGraph) @@ -1203,7 +1202,7 @@ class OxliTagSet(OxliBinary): """ def sniff(self, filename): - return OxliBinary._sniff(filename, "03") + return OxliBinary._sniff(filename, b"03") Binary.register_sniffable_binary_format("oxli.tagset", "oxlits", OxliTagSet) @@ -1224,7 +1223,7 @@ class OxliStopTags(OxliBinary): """ def sniff(self, filename): - return OxliBinary._sniff(filename, "04") + return OxliBinary._sniff(filename, b"04") Binary.register_sniffable_binary_format("oxli.stoptags", "oxlist", OxliStopTags) @@ -1249,7 +1248,7 @@ class OxliSubset(OxliBinary): """ def sniff(self, filename): - return OxliBinary._sniff(filename, "05") + return OxliBinary._sniff(filename, b"05") Binary.register_sniffable_binary_format("oxli.subset", "oxliss", OxliSubset) @@ -1273,7 +1272,7 @@ class OxliGraphLabels(OxliBinary): """ def sniff(self, filename): - return OxliBinary._sniff(filename, "06") + return OxliBinary._sniff(filename, b"06") Binary.register_sniffable_binary_format("oxli.graphlabels", "oxligl", OxliGraphLabels) @@ -1354,9 +1353,9 @@ def display_peek( self, dataset ): def sniff( self, filename ): try: - with open( filename, 'r' ) as f: + with open( filename, 'rb' ) as f: header = f.read(3) - if binascii.b2a_hex( header ) == binascii.hexlify( 'CDF' ): + if header == b'CDF': return True return False except: diff --git a/lib/galaxy/datatypes/proteomics.py b/lib/galaxy/datatypes/proteomics.py index dcf712e4cc1a..f1dd1285b8fd 100644 --- a/lib/galaxy/datatypes/proteomics.py +++ b/lib/galaxy/datatypes/proteomics.py @@ -1,7 +1,6 @@ """ Proteomics Datatypes """ -import binascii import logging import re @@ -272,10 +271,9 @@ def sniff(self, filename): # This combination represents 17 bytes, but to play safe we read 20 bytes from # the start of the file. try: - header = open(filename).read(20) - hexheader = binascii.b2a_hex(header) - finnigan = binascii.hexlify('F\0i\0n\0n\0i\0g\0a\0n') - if hexheader.find(finnigan) != -1: + header = open(filename, 'rb').read(20) + finnigan = b'F\0i\0n\0n\0i\0g\0a\0n' + if header.find(finnigan) != -1: return True return False except: diff --git a/lib/galaxy/util/checkers.py b/lib/galaxy/util/checkers.py index b2e08ed5e838..9cf66ea7c51d 100644 --- a/lib/galaxy/util/checkers.py +++ b/lib/galaxy/util/checkers.py @@ -1,4 +1,3 @@ -import binascii import bz2 import gzip import imghdr @@ -95,7 +94,7 @@ def check_gzip( file_path ): # for sff format. try: header = gzip.open( file_path ).read(4) - if binascii.b2a_hex( header ) == binascii.hexlify( '.sff' ): + if header == b'.sff': return ( True, True ) except: return( False, False )