autogluon · yinweisu · Jun 21, 2023 · Jun 20, 2023
diff --git a/.github/workflow_scripts/lint_check.sh b/.github/workflow_scripts/lint_check.sh
@@ -5,8 +5,17 @@ set -ex
 source $(dirname "$0")/env_setup.sh
 
 setup_build_env
+
+function lint_check {
+    black --check --diff "$1/" --line-length "$2"
+    isort --check --diff "$1/"
+}
+
+function lint_check_all {
+    lint_check multimodal 119
+    lint_check timeseries 119
+    lint_check common 160
+}
+
 bandit -r multimodal/src -ll
-black --check --diff multimodal/
-isort --check --diff multimodal/
-black --check --diff timeseries/
-isort --check --diff timeseries/
+lint_check_all
diff --git a/common/setup.py b/common/setup.py
@@ -1,11 +1,13 @@
 #!/usr/bin/env python
 ###########################
 # This code block is a HACK (!), but is necessary to avoid code duplication. Do NOT alter these lines.
+import importlib.util
 import os
+
 from setuptools import setup
-import importlib.util
+
 filepath = os.path.abspath(os.path.dirname(__file__))
-filepath_import = os.path.join(filepath, '..', 'core', 'src', 'autogluon', 'core', '_setup_utils.py')
+filepath_import = os.path.join(filepath, "..", "core", "src", "autogluon", "core", "_setup_utils.py")
 spec = importlib.util.spec_from_file_location("ag_min_dependencies", filepath_import)
 ag = importlib.util.module_from_spec(spec)  # type: ignore
 # Identical to `from autogluon.core import _setup_utils as ag`, but works without `autogluon.core` being installed.
@@ -15,37 +17,41 @@
 version = ag.load_version_file()
 version = ag.update_version(version, use_file_if_exists=False, create_file=True)
 
-submodule = 'common'
-install_requires = [
-    # version ranges added in ag.get_dependency_version_ranges()
-    'numpy',  # version range defined in `core/_setup_utils.py`
-    'pandas',  # version range defined in `core/_setup_utils.py`
-    'boto3',  # version range defined in `core/_setup_utils.py`
-    'psutil',  # version range defined in `core/_setup_utils.py`
-    'setuptools',
-] if not ag.LITE_MODE else {
-    'numpy',  # version range defined in `core/_setup_utils.py`
-    'pandas',  # version range defined in `core/_setup_utils.py`
-    'setuptools',
-}
+submodule = "common"
+install_requires = (
+    [
+        # version ranges added in ag.get_dependency_version_ranges()
+        "numpy",  # version range defined in `core/_setup_utils.py`
+        "pandas",  # version range defined in `core/_setup_utils.py`
+        "boto3",  # version range defined in `core/_setup_utils.py`
+        "psutil",  # version range defined in `core/_setup_utils.py`
+        "setuptools",
+    ]
+    if not ag.LITE_MODE
+    else {
+        "numpy",  # version range defined in `core/_setup_utils.py`
+        "pandas",  # version range defined in `core/_setup_utils.py`
+        "setuptools",
+    }
+)
 
 extras_require = dict()
 
 test_requirements = [
-    'pytest',
-    'types-requests',
-    'types-setuptools',
-    'pytest-mypy',
+    "pytest",
+    "types-requests",
+    "types-setuptools",
+    "pytest-mypy",
 ]
 
 test_requirements = list(set(test_requirements))
-extras_require['tests'] = test_requirements
+extras_require["tests"] = test_requirements
 
 install_requires = ag.get_dependency_version_ranges(install_requires)
 for key in extras_require:
     extras_require[key] = ag.get_dependency_version_ranges(extras_require[key])
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     ag.create_version_file(version=version, submodule=submodule)
     setup_args = ag.default_setup_args(version=version, submodule=submodule)
     setup(

diff --git a/common/src/autogluon/common/__init__.py b/common/src/autogluon/common/__init__.py
@@ -1,7 +1,7 @@
-from .version import __version__
-
 from .features.feature_metadata import FeatureMetadata
-from .utils.log_utils import _add_stream_handler, fix_logging_if_kaggle as __fix_logging_if_kaggle
+from .utils.log_utils import _add_stream_handler
+from .utils.log_utils import fix_logging_if_kaggle as __fix_logging_if_kaggle
+from .version import __version__
 
 # Fixes logger in Kaggle to show logs in notebook.
 __fix_logging_if_kaggle()

diff --git a/common/src/autogluon/common/features/feature_metadata.py b/common/src/autogluon/common/features/feature_metadata.py
@@ -1,11 +1,11 @@
 import copy
 import logging
 from collections import defaultdict
-from typing import Dict, List, Tuple, Set, Union, Any
+from typing import Any, Dict, List, Set, Tuple, Union
 
 import pandas as pd
 
-from .infer_types import get_type_map_raw, get_type_group_map_special
+from .infer_types import get_type_group_map_special, get_type_map_raw
 
 logger = logging.getLogger(__name__)
 
@@ -42,7 +42,7 @@ def __init__(self, type_map_raw: Dict[str, str], type_group_map_special: Dict[st
             else:
                 type_group_map_special = defaultdict(list)
         elif type_map_special is not None:
-            raise ValueError('Only one of type_group_map_special and type_map_special can be specified in init.')
+            raise ValueError("Only one of type_group_map_special and type_map_special can be specified in init.")
         if not isinstance(type_group_map_special, defaultdict):
             type_group_map_special = defaultdict(list, type_group_map_special)
 
@@ -64,13 +64,22 @@ def _validate(self):
                 features_invalid.append(feature)
         if features_invalid:
             raise AssertionError(
-                f"{len(features_invalid)} features are present in type_group_map_special but not in type_map_raw. Invalid features: {features_invalid}")
+                f"{len(features_invalid)} features are present in type_group_map_special but not in type_map_raw. Invalid features: {features_invalid}"
+            )
 
     # Note: This is not optimized for speed. Do not rely on this function during inference.
     # TODO: Add valid_names, invalid_names arguments which override all other arguments for the features listed?
-    def get_features(self, valid_raw_types: list = None, valid_special_types: list = None, invalid_raw_types: list = None, invalid_special_types: list = None,
-                     required_special_types: list = None, required_raw_special_pairs: List[Tuple[str, Union[List[str], Set[str]]]] = None,
-                     required_exact=False, required_at_least_one_special=False) -> List[str]:
+    def get_features(
+        self,
+        valid_raw_types: list = None,
+        valid_special_types: list = None,
+        invalid_raw_types: list = None,
+        invalid_special_types: list = None,
+        required_special_types: list = None,
+        required_raw_special_pairs: List[Tuple[str, Union[List[str], Set[str]]]] = None,
+        required_exact=False,
+        required_at_least_one_special=False,
+    ) -> List[str]:
         """
         Returns a list of features held within the feature metadata object after being pruned through the available parameters.
 
@@ -118,9 +127,11 @@ def get_features(self, valid_raw_types: list = None, valid_special_types: list =
             features = [feature for feature in features if self.get_feature_type_raw(feature) in valid_raw_types]
         if valid_special_types is not None:
             valid_special_types_set = set(valid_special_types)
-            features = [feature for feature in features
-                        if not valid_special_types_set.isdisjoint(self.get_feature_types_special(feature))
-                        or not self.get_feature_types_special(feature)]
+            features = [
+                feature
+                for feature in features
+                if not valid_special_types_set.isdisjoint(self.get_feature_types_special(feature)) or not self.get_feature_types_special(feature)
+            ]
         if invalid_raw_types is not None:
             features = [feature for feature in features if self.get_feature_type_raw(feature) not in invalid_raw_types]
         if invalid_special_types is not None:
@@ -163,7 +174,7 @@ def get_feature_type_raw(self, feature: str) -> str:
 
     def get_feature_types_special(self, feature: str) -> list:
         if feature not in self.type_map_raw:
-            raise KeyError(f'{feature} does not exist in {self.__class__.__name__}.')
+            raise KeyError(f"{feature} does not exist in {self.__class__.__name__}.")
         return self._get_feature_types(feature=feature, feature_types_dict=self.type_group_map_special)
 
     def get_type_map_special(self) -> dict:
@@ -191,7 +202,7 @@ def remove_features(self, features: list, inplace=False):
             metadata = copy.deepcopy(self)
         features_invalid = [feature for feature in features if feature not in self.get_features()]
         if features_invalid:
-            raise KeyError(f'remove_features was called with a feature that does not exist in feature metadata. Invalid Features: {features_invalid}')
+            raise KeyError(f"remove_features was called with a feature that does not exist in feature metadata. Invalid Features: {features_invalid}")
         metadata._remove_features_from_type_map(d=metadata.type_map_raw, features=features)
         metadata._remove_features_from_type_group_map(d=metadata.type_group_map_special, features=features)
         return metadata
@@ -200,7 +211,7 @@ def keep_features(self, features: list, inplace=False):
         """Removes all features from metadata except for those in features"""
         features_invalid = [feature for feature in features if feature not in self.get_features()]
         if features_invalid:
-            raise KeyError(f'keep_features was called with a feature that does not exist in feature metadata. Invalid Features: {features_invalid}')
+            raise KeyError(f"keep_features was called with a feature that does not exist in feature metadata. Invalid Features: {features_invalid}")
         features_to_remove = [feature for feature in self.get_features() if feature not in features]
         return self.remove_features(features=features_to_remove, inplace=inplace)
 
@@ -260,15 +271,15 @@ def rename_features(self, rename_map: dict, inplace=False):
         metadata.type_map_raw = {rename_map.get(key, key): val for key, val in metadata.type_map_raw.items()}
         after_len = len(metadata.type_map_raw.keys())
         if before_len != after_len:
-            raise AssertionError('key names conflicted during renaming. Do not rename features to exist feature names.')
+            raise AssertionError("key names conflicted during renaming. Do not rename features to exist feature names.")
         for dtype in metadata.type_group_map_special:
             metadata.type_group_map_special[dtype] = [rename_map.get(feature, feature) for feature in metadata.type_group_map_special[dtype]]
         return metadata
 
     # TODO: Add documentation on shared_raw_features usage
-    def join_metadata(self, metadata, shared_raw_features='error'):
+    def join_metadata(self, metadata, shared_raw_features="error"):
         """Join two FeatureMetadata objects together, returning a new FeatureMetadata object"""
-        if shared_raw_features not in ['error', 'error_if_diff', 'overwrite']:
+        if shared_raw_features not in ["error", "error_if_diff", "overwrite"]:
             raise ValueError(f"shared_raw_features must be one of {['error', 'error_if_diff', 'overwrite']}, but was: '{shared_raw_features}'")
         type_map_raw = copy.deepcopy(self.type_map_raw)
         shared_features = []
@@ -279,26 +290,29 @@ def join_metadata(self, metadata, shared_raw_features='error'):
                 if type_map_raw[key] != metadata.type_map_raw[key]:
                     shared_features_diff_types.append(key)
         if shared_features:
-            if shared_raw_features == 'error':
-                logger.error('ERROR: Conflicting metadata:')
-                logger.error('Metadata 1:')
-                self.print_feature_metadata_full(log_prefix='\t', log_level=40)
-                logger.error('Metadata 2:')
-                metadata.print_feature_metadata_full(log_prefix='\t', log_level=40)
+            if shared_raw_features == "error":
+                logger.error("ERROR: Conflicting metadata:")
+                logger.error("Metadata 1:")
+                self.print_feature_metadata_full(log_prefix="\t", log_level=40)
+                logger.error("Metadata 2:")
+                metadata.print_feature_metadata_full(log_prefix="\t", log_level=40)
                 raise AssertionError(f"Metadata objects to join share raw features, but `shared_raw_features='error'`. Shared features: {shared_features}")
             if shared_features_diff_types:
-                if shared_raw_features == 'overwrite':
-                    logger.log(20, f'Overwriting type_map_raw during FeatureMetadata join. '
-                                   f'Shared features with conflicting types: {shared_features_diff_types}')
+                if shared_raw_features == "overwrite":
+                    logger.log(
+                        20, f"Overwriting type_map_raw during FeatureMetadata join. " f"Shared features with conflicting types: {shared_features_diff_types}"
+                    )
                     shared_features = []
-                elif shared_raw_features == 'error_if_diff':
-                    logger.error('ERROR: Conflicting metadata:')
-                    logger.error('Metadata 1:')
-                    self.print_feature_metadata_full(log_prefix='\t', log_level=40)
-                    logger.error('Metadata 2:')
-                    metadata.print_feature_metadata_full(log_prefix='\t', log_level=40)
-                    raise AssertionError(f"Metadata objects to join share raw features but do not agree on raw dtypes, "
-                                         f"and `shared_raw_features='error_if_diff'`. Shared conflicting features: {shared_features_diff_types}")
+                elif shared_raw_features == "error_if_diff":
+                    logger.error("ERROR: Conflicting metadata:")
+                    logger.error("Metadata 1:")
+                    self.print_feature_metadata_full(log_prefix="\t", log_level=40)
+                    logger.error("Metadata 2:")
+                    metadata.print_feature_metadata_full(log_prefix="\t", log_level=40)
+                    raise AssertionError(
+                        f"Metadata objects to join share raw features but do not agree on raw dtypes, "
+                        f"and `shared_raw_features='error_if_diff'`. Shared conflicting features: {shared_features_diff_types}"
+                    )
         type_map_raw.update({key: val for key, val in metadata.type_map_raw.items() if key not in shared_features})
 
         type_group_map_special = self._add_type_group_map_special([self.type_group_map_special, metadata.type_group_map_special])
@@ -330,7 +344,7 @@ def _get_feature_types(feature: str, feature_types_dict: dict) -> list:
 
     # Joins a list of metadata objects together, returning a new metadata object
     @staticmethod
-    def join_metadatas(metadata_list, shared_raw_features='error'):
+    def join_metadatas(metadata_list, shared_raw_features="error"):
         metadata_new = copy.deepcopy(metadata_list[0])
         for metadata in metadata_list[1:]:
             metadata_new = metadata_new.join_metadata(metadata, shared_raw_features=shared_raw_features)
@@ -355,25 +369,27 @@ def to_dict(self, inverse=False) -> dict:
 
         return feature_metadata_dict
 
-    def print_feature_metadata_full(self, log_prefix='', print_only_one_special=False, log_level=20, max_list_len=5, return_str=False):
+    def print_feature_metadata_full(self, log_prefix="", print_only_one_special=False, log_level=20, max_list_len=5, return_str=False):
         feature_metadata_dict = self.to_dict(inverse=True)
         if not feature_metadata_dict:
             if return_str:
-                return ''
+                return ""
             else:
                 return
         keys = list(feature_metadata_dict.keys())
         keys = sorted(keys)
         output = [((key[0], list(key[1])), feature_metadata_dict[key]) for key in keys]
-        output_str = ''
+        output_str = ""
         if print_only_one_special:
             for i, ((raw, special), features) in enumerate(output):
                 if len(special) == 1:
                     output[i] = ((raw, special[0]), features)
                 elif len(special) > 1:
                     output[i] = ((raw, special[0]), features)
-                    logger.warning(f'Warning: print_only_one_special=True was set, but features with {len(special)} special types were found. '
-                                   f'Invalid Types: {output[i]}')
+                    logger.warning(
+                        f"Warning: print_only_one_special=True was set, but features with {len(special)} special types were found. "
+                        f"Invalid Types: {output[i]}"
+                    )
                 else:
                     output[i] = ((raw, None), features)
         max_key_len = max([len(str(key)) for key, _ in output])
@@ -386,17 +402,17 @@ def print_feature_metadata_full(self, log_prefix='', print_only_one_special=Fals
             if max_list_len is not None:
                 features = str(val[:max_list_len])
                 if len(val) > max_list_len:
-                    features = features[:-1] + ', ...]'
+                    features = features[:-1] + ", ...]"
             else:
                 features = str(val)
             if val:
                 message = f'{log_prefix}{key}{" " * max_key_minus_cur} : {" " * max_val_minus_cur}{len(val)} | {features}'
                 if return_str:
-                    output_str += message + '\n'
+                    output_str += message + "\n"
                 else:
                     logger.log(log_level, message)
         if return_str:
-            if output_str[-1] == '\n':
+            if output_str[-1] == "\n":
                 output_str = output_str[:-1]
             return output_str