Type checking fixes for galaxy-data (datatypes and models).

galaxyproject · Dec 28, 2020 · a7b6f08 · a7b6f08
1 parent 4ba4097
commit a7b6f08
Show file tree

Hide file tree

Showing 26 changed files with 118 additions and 79 deletions.
diff --git a/lib/galaxy/datatypes/anvio.py b/lib/galaxy/datatypes/anvio.py
@@ -6,6 +6,7 @@
 import logging
 import os
 import sys
+from typing import Optional
 
 from galaxy.datatypes.metadata import MetadataElement
 from galaxy.datatypes.text import Html
@@ -79,7 +80,7 @@ def display_peek(self, dataset):
 
 class AnvioDB(AnvioComposite):
     """Class for AnvioDB database files."""
-    _anvio_basename = None
+    _anvio_basename: Optional[str] = None
     MetadataElement(name="anvio_basename", default=_anvio_basename, desc="Basename", readonly=True)
     file_ext = 'anvio_db'
 

diff --git a/lib/galaxy/datatypes/binary.py b/lib/galaxy/datatypes/binary.py
@@ -14,6 +14,7 @@
 import zipfile
 from collections import OrderedDict
 from json import dumps
+from typing import Optional
 
 import h5py
 import pysam
@@ -51,7 +52,7 @@ def register_sniffable_binary_format(data_type, ext, type_class):
     def register_unsniffable_binary_ext(ext):
         """Deprecated method."""
 
-    def set_peek(self, dataset, is_multi_byte=False):
+    def set_peek(self, dataset, **kwd):
         """Set the peek and blurb text"""
         if not dataset.dataset.purged:
             dataset.peek = 'binary data'
@@ -272,7 +273,7 @@ class BamNative(CompressedArchive):
     edam_format = "format_2572"
     edam_data = "data_0863"
     file_ext = "unsorted.bam"
-    sort_flag = None
+    sort_flag: Optional[str] = None
 
     MetadataElement(name="bam_version", default=None, desc="BAM Version", param=MetadataParameter, readonly=True, visible=False, optional=True, no_value=None)
     MetadataElement(name="sort_order", default=None, desc="Sort Order", param=MetadataParameter, readonly=True, visible=False, optional=True, no_value=None)
@@ -1122,7 +1123,7 @@ class GmxBinary(Binary):
     Base class for GROMACS binary files - xtc, trr, cpt
     """
 
-    magic_number = None  # variables to be overwritten in the child class
+    magic_number: Optional[int] = None  # variables to be overwritten in the child class
     file_ext = ""
 
     def sniff(self, filename):

diff --git a/lib/galaxy/datatypes/blast.py b/lib/galaxy/datatypes/blast.py
@@ -89,6 +89,7 @@ def sniff_prefix(self, file_prefix):
             return False
         return True
 
+    @staticmethod
     def merge(split_files, output_file):
         """Merging multiple XML files is non-trivial and must be done in subclasses."""
         if len(split_files) == 1:
@@ -167,7 +168,6 @@ def merge(split_files, output_file):
                 h.close()
             out.write("  </BlastOutput_iterations>\n")
             out.write("</BlastOutput>\n")
-    merge = staticmethod(merge)
 
 
 class _BlastDb(Data):

diff --git a/lib/galaxy/datatypes/constructive_solid_geometry.py b/lib/galaxy/datatypes/constructive_solid_geometry.py
@@ -1,6 +1,10 @@
+# TODO: revisit ignoring type and write some tests for this, the multi-inheritance in this
+# this file is challenging, it should be broken into true mixins.
+# type: ignore
 """
 Constructive Solid Geometry file formats.
 """
+
 import abc
 
 from galaxy import util

diff --git a/lib/galaxy/datatypes/converters/fasta_to_tabular_converter.py b/lib/galaxy/datatypes/converters/fasta_to_tabular_converter.py
@@ -13,8 +13,6 @@
 import os
 import sys
 
-seq_hash = {}
-
 
 def __main__():
     infile = sys.argv[1]

diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py
@@ -7,6 +7,7 @@
 import tempfile
 from collections import OrderedDict
 from inspect import isclass
+from typing import Any, Dict, Optional
 
 import webob.exc
 from markupsafe import escape
@@ -116,27 +117,27 @@ class Data(metaclass=DataMeta):
     # Add metadata elements
     MetadataElement(name="dbkey", desc="Database/Build", default="?", param=metadata.DBKeyParameter, multiple=False, no_value="?")
     # Stores the set of display applications, and viewing methods, supported by this datatype
-    supported_display_apps = {}
+    supported_display_apps: Dict[str, Any] = {}
     # If False, the peek is regenerated whenever a dataset of this type is copied
     copy_safe_peek = True
     # The dataset contains binary data --> do not space_to_tab or convert newlines, etc.
     # Allow binary file uploads of this type when True.
     is_binary = True
     # Composite datatypes
-    composite_type = None
-    composite_files = OrderedDict()
+    composite_type: Optional[str] = None
+    composite_files: Dict[str, Any] = OrderedDict()
     primary_file_name = 'index'
     # Allow user to change between this datatype and others. If left to None,
     # datatype change is allowed if the datatype is not composite.
-    allow_datatype_change = None
+    allow_datatype_change: Optional[bool] = None
     # A per datatype setting (inherited): max file size (in bytes) for setting optional metadata
     _max_optional_metadata_filesize = None
 
     # Trackster track type.
-    track_type = None
+    track_type: Optional[str] = None
 
     # Data sources.
-    data_sources = {}
+    data_sources: Dict[str, str] = {}
 
     def __init__(self, **kwd):
         """Initialize the datatype"""
@@ -180,7 +181,7 @@ def init_meta(self, dataset, copy_from=None):
         if copy_from:
             dataset.metadata = copy_from.metadata
 
-    def set_meta(self, dataset, overwrite=True, **kwd):
+    def set_meta(self, dataset: Any, overwrite=True, **kwd):
         """Unimplemented method, allows guessing of metadata from contents of file"""
         return True
 
@@ -681,11 +682,11 @@ def __substitute_composite_key(self, key, composite_file, dataset=None):
         return key
 
     @property
-    def writable_files(self, dataset=None):
+    def writable_files(self):
         files = OrderedDict()
         if self.composite_type != 'auto_primary_file':
             files[self.primary_file_name] = self.__new_composite_file(self.primary_file_name)
-        for key, value in self.get_composite_files(dataset=dataset).items():
+        for key, value in self.get_composite_files().items():
             files[key] = value
         return files
 
@@ -718,6 +719,7 @@ def matches_any(self, target_datatypes):
         datatype_classes = tuple(datatype if isclass(datatype) else datatype.__class__ for datatype in target_datatypes)
         return isinstance(self, datatype_classes)
 
+    @staticmethod
     def merge(split_files, output_file):
         """
             Merge files with copy.copyfileobj() will not hit the
@@ -732,8 +734,6 @@ def merge(split_files, output_file):
                 for fsrc in split_files:
                     shutil.copyfileobj(open(fsrc, 'rb'), fdst)
 
-    merge = staticmethod(merge)
-
     def get_visualizations(self, dataset):
         """
         Returns a list of visualizations for datatype.
@@ -844,7 +844,7 @@ def count_data_lines(self, dataset):
                 data_lines = None
         return data_lines
 
-    def set_peek(self, dataset, line_count=None, is_multi_byte=False, WIDTH=256, skipchars=None, line_wrap=True):
+    def set_peek(self, dataset, line_count=None, is_multi_byte=False, WIDTH=256, skipchars=None, line_wrap=True, **kwd):
         """
         Set the peek.  This method is used by various subclasses of Text.
         """
@@ -879,6 +879,7 @@ def set_peek(self, dataset, line_count=None, is_multi_byte=False, WIDTH=256, ski
             dataset.peek = 'file does not exist'
             dataset.blurb = 'file purged from disk'
 
+    @classmethod
     def split(cls, input_datasets, subdir_generator_function, split_params):
         """
         Split the input files by line.
@@ -947,7 +948,6 @@ def _file_len(fname):
             f.close()
             if part_file:
                 part_file.close()
-    split = classmethod(split)
 
     # ------------- Dataproviders
     @dataproviders.decorators.dataprovider_factory('line', dataproviders.line.FilteredLineDataProvider.settings)

diff --git a/lib/galaxy/datatypes/dataproviders/base.py b/lib/galaxy/datatypes/dataproviders/base.py
@@ -9,7 +9,7 @@
 
 import logging
 from collections import deque
-
+from typing import Dict
 
 from . import exceptions
 
@@ -74,7 +74,7 @@ class DataProvider(metaclass=HasSettings):
     # a definition of expected types for keyword arguments sent to __init__
     #   useful for controlling how query string dictionaries can be parsed into correct types for __init__
     #   empty in this base class
-    settings = {}
+    settings: Dict[str, str] = {}
 
     def __init__(self, source, **kwargs):
         """

diff --git a/lib/galaxy/datatypes/display_applications/parameters.py b/lib/galaxy/datatypes/display_applications/parameters.py
@@ -1,5 +1,6 @@
 # Contains parameters that are used in Display Applications
 import mimetypes
+from typing import Optional
 from urllib.parse import quote_plus
 
 from galaxy.util import string_as_bool
@@ -12,7 +13,7 @@
 class DisplayApplicationParameter:
     """ Abstract Class for Display Application Parameters """
 
-    type = None
+    type: Optional[str] = None
 
     @classmethod
     def from_elem(cls, elem, link):

diff --git a/lib/galaxy/datatypes/molecules.py b/lib/galaxy/datatypes/molecules.py
@@ -125,6 +125,7 @@ def set_meta(self, dataset, **kwd):
         """
         dataset.metadata.number_of_molecules = count_special_lines(r"^\$\$\$\$$", dataset.file_name)
 
+    @classmethod
     def split(cls, input_datasets, subdir_generator_function, split_params):
         """
         Split the input files by molecule records.
@@ -172,7 +173,6 @@ def _write_part_sdf_file(accumulated_lines):
         except Exception as e:
             log.error('Unable to split files: %s', unicodify(e))
             raise
-    split = classmethod(split)
 
 
 @build_sniff_from_prefix
@@ -208,6 +208,7 @@ def set_meta(self, dataset, **kwd):
         """
         dataset.metadata.number_of_molecules = count_special_lines("@<TRIPOS>MOLECULE", dataset.file_name)
 
+    @classmethod
     def split(cls, input_datasets, subdir_generator_function, split_params):
         """
         Split the input files by molecule records.
@@ -259,7 +260,6 @@ def _write_part_mol2_file(accumulated_lines):
         except Exception as e:
             log.error('Unable to split files: %s', unicodify(e))
             raise
-    split = classmethod(split)
 
 
 @build_sniff_from_prefix
@@ -293,6 +293,7 @@ def set_meta(self, dataset, **kwd):
         """
         dataset.metadata.number_of_molecules = count_special_lines('^#', dataset.file_name, invert=True)
 
+    @classmethod
     def split(cls, input_datasets, subdir_generator_function, split_params):
         """
         Split the input files by fingerprint records.
@@ -338,8 +339,8 @@ def _write_part_fingerprint_file(accumulated_lines):
         except Exception as e:
             log.error('Unable to split files: %s', unicodify(e))
             raise
-    split = classmethod(split)
 
+    @staticmethod
     def merge(split_files, output_file):
         """
         Merging fps files requires merging the header manually.
@@ -363,7 +364,6 @@ def merge(split_files, output_file):
                             # line is no header and not a comment, we assume the first header is written to out and we set 'first' to False
                             first = False
                             out.write(line)
-    merge = staticmethod(merge)
 
 
 class OBFS(Binary):
@@ -846,6 +846,7 @@ def sniff_prefix(self, file_prefix):
 
         return True
 
+    @classmethod
     def split(cls, input_datasets, subdir_generator_function, split_params):
         """
         Split the input files by molecule records.
@@ -902,8 +903,8 @@ def _write_part_cml_file(accumulated_lines):
         except Exception as e:
             log.error('Unable to split files: %s', unicodify(e))
             raise
-    split = classmethod(split)
 
+    @staticmethod
     def merge(split_files, output_file):
         """
         Merging CML files.
@@ -938,4 +939,3 @@ def merge(split_files, output_file):
                         if molecule_found:
                             out.write(line)
             out.write("</cml>\n")
-    merge = staticmethod(merge)
diff --git a/lib/galaxy/datatypes/msa.py b/lib/galaxy/datatypes/msa.py
@@ -169,6 +169,7 @@ def set_meta(self, dataset, **kwd):
         """
         dataset.metadata.number_of_models = generic_util.count_special_lines('^#[[:space:]+]STOCKHOLM[[:space:]+]1.0', dataset.file_name)
 
+    @classmethod
     def split(cls, input_datasets, subdir_generator_function, split_params):
         """
 
@@ -218,7 +219,6 @@ def _write_part_stockholm_file(accumulated_lines):
         except Exception as e:
             log.error('Unable to split files: %s', unicodify(e))
             raise
-    split = classmethod(split)
 
 
 @build_sniff_from_prefix