Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lint common #3337

Merged
merged 1 commit into from
Jun 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions .github/workflow_scripts/lint_check.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,17 @@ set -ex
source $(dirname "$0")/env_setup.sh

setup_build_env

function lint_check {
black --check --diff "$1/" --line-length "$2"
isort --check --diff "$1/"
}

function lint_check_all {
lint_check multimodal 119
lint_check timeseries 119
lint_check common 160
}

bandit -r multimodal/src -ll
black --check --diff multimodal/
isort --check --diff multimodal/
black --check --diff timeseries/
isort --check --diff timeseries/
lint_check_all
48 changes: 27 additions & 21 deletions common/setup.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
#!/usr/bin/env python
###########################
# This code block is a HACK (!), but is necessary to avoid code duplication. Do NOT alter these lines.
import importlib.util
import os

from setuptools import setup
import importlib.util

filepath = os.path.abspath(os.path.dirname(__file__))
filepath_import = os.path.join(filepath, '..', 'core', 'src', 'autogluon', 'core', '_setup_utils.py')
filepath_import = os.path.join(filepath, "..", "core", "src", "autogluon", "core", "_setup_utils.py")
spec = importlib.util.spec_from_file_location("ag_min_dependencies", filepath_import)
ag = importlib.util.module_from_spec(spec) # type: ignore
# Identical to `from autogluon.core import _setup_utils as ag`, but works without `autogluon.core` being installed.
Expand All @@ -15,37 +17,41 @@
version = ag.load_version_file()
version = ag.update_version(version, use_file_if_exists=False, create_file=True)

submodule = 'common'
install_requires = [
# version ranges added in ag.get_dependency_version_ranges()
'numpy', # version range defined in `core/_setup_utils.py`
'pandas', # version range defined in `core/_setup_utils.py`
'boto3', # version range defined in `core/_setup_utils.py`
'psutil', # version range defined in `core/_setup_utils.py`
'setuptools',
] if not ag.LITE_MODE else {
'numpy', # version range defined in `core/_setup_utils.py`
'pandas', # version range defined in `core/_setup_utils.py`
'setuptools',
}
submodule = "common"
install_requires = (
[
# version ranges added in ag.get_dependency_version_ranges()
"numpy", # version range defined in `core/_setup_utils.py`
"pandas", # version range defined in `core/_setup_utils.py`
"boto3", # version range defined in `core/_setup_utils.py`
"psutil", # version range defined in `core/_setup_utils.py`
"setuptools",
]
if not ag.LITE_MODE
else {
"numpy", # version range defined in `core/_setup_utils.py`
"pandas", # version range defined in `core/_setup_utils.py`
"setuptools",
}
)

extras_require = dict()

test_requirements = [
'pytest',
'types-requests',
'types-setuptools',
'pytest-mypy',
"pytest",
"types-requests",
"types-setuptools",
"pytest-mypy",
]

test_requirements = list(set(test_requirements))
extras_require['tests'] = test_requirements
extras_require["tests"] = test_requirements

install_requires = ag.get_dependency_version_ranges(install_requires)
for key in extras_require:
extras_require[key] = ag.get_dependency_version_ranges(extras_require[key])

if __name__ == '__main__':
if __name__ == "__main__":
ag.create_version_file(version=version, submodule=submodule)
setup_args = ag.default_setup_args(version=version, submodule=submodule)
setup(
Expand Down
6 changes: 3 additions & 3 deletions common/src/autogluon/common/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .version import __version__

from .features.feature_metadata import FeatureMetadata
from .utils.log_utils import _add_stream_handler, fix_logging_if_kaggle as __fix_logging_if_kaggle
from .utils.log_utils import _add_stream_handler
from .utils.log_utils import fix_logging_if_kaggle as __fix_logging_if_kaggle
from .version import __version__

# Fixes logger in Kaggle to show logs in notebook.
__fix_logging_if_kaggle()
Expand Down
100 changes: 58 additions & 42 deletions common/src/autogluon/common/features/feature_metadata.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import copy
import logging
from collections import defaultdict
from typing import Dict, List, Tuple, Set, Union, Any
from typing import Any, Dict, List, Set, Tuple, Union

import pandas as pd

from .infer_types import get_type_map_raw, get_type_group_map_special
from .infer_types import get_type_group_map_special, get_type_map_raw

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -42,7 +42,7 @@ def __init__(self, type_map_raw: Dict[str, str], type_group_map_special: Dict[st
else:
type_group_map_special = defaultdict(list)
elif type_map_special is not None:
raise ValueError('Only one of type_group_map_special and type_map_special can be specified in init.')
raise ValueError("Only one of type_group_map_special and type_map_special can be specified in init.")
if not isinstance(type_group_map_special, defaultdict):
type_group_map_special = defaultdict(list, type_group_map_special)

Expand All @@ -64,13 +64,22 @@ def _validate(self):
features_invalid.append(feature)
if features_invalid:
raise AssertionError(
f"{len(features_invalid)} features are present in type_group_map_special but not in type_map_raw. Invalid features: {features_invalid}")
f"{len(features_invalid)} features are present in type_group_map_special but not in type_map_raw. Invalid features: {features_invalid}"
)

# Note: This is not optimized for speed. Do not rely on this function during inference.
# TODO: Add valid_names, invalid_names arguments which override all other arguments for the features listed?
def get_features(self, valid_raw_types: list = None, valid_special_types: list = None, invalid_raw_types: list = None, invalid_special_types: list = None,
required_special_types: list = None, required_raw_special_pairs: List[Tuple[str, Union[List[str], Set[str]]]] = None,
required_exact=False, required_at_least_one_special=False) -> List[str]:
def get_features(
self,
valid_raw_types: list = None,
valid_special_types: list = None,
invalid_raw_types: list = None,
invalid_special_types: list = None,
required_special_types: list = None,
required_raw_special_pairs: List[Tuple[str, Union[List[str], Set[str]]]] = None,
required_exact=False,
required_at_least_one_special=False,
) -> List[str]:
"""
Returns a list of features held within the feature metadata object after being pruned through the available parameters.

Expand Down Expand Up @@ -118,9 +127,11 @@ def get_features(self, valid_raw_types: list = None, valid_special_types: list =
features = [feature for feature in features if self.get_feature_type_raw(feature) in valid_raw_types]
if valid_special_types is not None:
valid_special_types_set = set(valid_special_types)
features = [feature for feature in features
if not valid_special_types_set.isdisjoint(self.get_feature_types_special(feature))
or not self.get_feature_types_special(feature)]
features = [
feature
for feature in features
if not valid_special_types_set.isdisjoint(self.get_feature_types_special(feature)) or not self.get_feature_types_special(feature)
]
if invalid_raw_types is not None:
features = [feature for feature in features if self.get_feature_type_raw(feature) not in invalid_raw_types]
if invalid_special_types is not None:
Expand Down Expand Up @@ -163,7 +174,7 @@ def get_feature_type_raw(self, feature: str) -> str:

def get_feature_types_special(self, feature: str) -> list:
if feature not in self.type_map_raw:
raise KeyError(f'{feature} does not exist in {self.__class__.__name__}.')
raise KeyError(f"{feature} does not exist in {self.__class__.__name__}.")
return self._get_feature_types(feature=feature, feature_types_dict=self.type_group_map_special)

def get_type_map_special(self) -> dict:
Expand Down Expand Up @@ -191,7 +202,7 @@ def remove_features(self, features: list, inplace=False):
metadata = copy.deepcopy(self)
features_invalid = [feature for feature in features if feature not in self.get_features()]
if features_invalid:
raise KeyError(f'remove_features was called with a feature that does not exist in feature metadata. Invalid Features: {features_invalid}')
raise KeyError(f"remove_features was called with a feature that does not exist in feature metadata. Invalid Features: {features_invalid}")
metadata._remove_features_from_type_map(d=metadata.type_map_raw, features=features)
metadata._remove_features_from_type_group_map(d=metadata.type_group_map_special, features=features)
return metadata
Expand All @@ -200,7 +211,7 @@ def keep_features(self, features: list, inplace=False):
"""Removes all features from metadata except for those in features"""
features_invalid = [feature for feature in features if feature not in self.get_features()]
if features_invalid:
raise KeyError(f'keep_features was called with a feature that does not exist in feature metadata. Invalid Features: {features_invalid}')
raise KeyError(f"keep_features was called with a feature that does not exist in feature metadata. Invalid Features: {features_invalid}")
features_to_remove = [feature for feature in self.get_features() if feature not in features]
return self.remove_features(features=features_to_remove, inplace=inplace)

Expand Down Expand Up @@ -260,15 +271,15 @@ def rename_features(self, rename_map: dict, inplace=False):
metadata.type_map_raw = {rename_map.get(key, key): val for key, val in metadata.type_map_raw.items()}
after_len = len(metadata.type_map_raw.keys())
if before_len != after_len:
raise AssertionError('key names conflicted during renaming. Do not rename features to exist feature names.')
raise AssertionError("key names conflicted during renaming. Do not rename features to exist feature names.")
for dtype in metadata.type_group_map_special:
metadata.type_group_map_special[dtype] = [rename_map.get(feature, feature) for feature in metadata.type_group_map_special[dtype]]
return metadata

# TODO: Add documentation on shared_raw_features usage
def join_metadata(self, metadata, shared_raw_features='error'):
def join_metadata(self, metadata, shared_raw_features="error"):
"""Join two FeatureMetadata objects together, returning a new FeatureMetadata object"""
if shared_raw_features not in ['error', 'error_if_diff', 'overwrite']:
if shared_raw_features not in ["error", "error_if_diff", "overwrite"]:
raise ValueError(f"shared_raw_features must be one of {['error', 'error_if_diff', 'overwrite']}, but was: '{shared_raw_features}'")
type_map_raw = copy.deepcopy(self.type_map_raw)
shared_features = []
Expand All @@ -279,26 +290,29 @@ def join_metadata(self, metadata, shared_raw_features='error'):
if type_map_raw[key] != metadata.type_map_raw[key]:
shared_features_diff_types.append(key)
if shared_features:
if shared_raw_features == 'error':
logger.error('ERROR: Conflicting metadata:')
logger.error('Metadata 1:')
self.print_feature_metadata_full(log_prefix='\t', log_level=40)
logger.error('Metadata 2:')
metadata.print_feature_metadata_full(log_prefix='\t', log_level=40)
if shared_raw_features == "error":
logger.error("ERROR: Conflicting metadata:")
logger.error("Metadata 1:")
self.print_feature_metadata_full(log_prefix="\t", log_level=40)
logger.error("Metadata 2:")
metadata.print_feature_metadata_full(log_prefix="\t", log_level=40)
raise AssertionError(f"Metadata objects to join share raw features, but `shared_raw_features='error'`. Shared features: {shared_features}")
if shared_features_diff_types:
if shared_raw_features == 'overwrite':
logger.log(20, f'Overwriting type_map_raw during FeatureMetadata join. '
f'Shared features with conflicting types: {shared_features_diff_types}')
if shared_raw_features == "overwrite":
logger.log(
20, f"Overwriting type_map_raw during FeatureMetadata join. " f"Shared features with conflicting types: {shared_features_diff_types}"
)
shared_features = []
elif shared_raw_features == 'error_if_diff':
logger.error('ERROR: Conflicting metadata:')
logger.error('Metadata 1:')
self.print_feature_metadata_full(log_prefix='\t', log_level=40)
logger.error('Metadata 2:')
metadata.print_feature_metadata_full(log_prefix='\t', log_level=40)
raise AssertionError(f"Metadata objects to join share raw features but do not agree on raw dtypes, "
f"and `shared_raw_features='error_if_diff'`. Shared conflicting features: {shared_features_diff_types}")
elif shared_raw_features == "error_if_diff":
logger.error("ERROR: Conflicting metadata:")
logger.error("Metadata 1:")
self.print_feature_metadata_full(log_prefix="\t", log_level=40)
logger.error("Metadata 2:")
metadata.print_feature_metadata_full(log_prefix="\t", log_level=40)
raise AssertionError(
f"Metadata objects to join share raw features but do not agree on raw dtypes, "
f"and `shared_raw_features='error_if_diff'`. Shared conflicting features: {shared_features_diff_types}"
)
type_map_raw.update({key: val for key, val in metadata.type_map_raw.items() if key not in shared_features})

type_group_map_special = self._add_type_group_map_special([self.type_group_map_special, metadata.type_group_map_special])
Expand Down Expand Up @@ -330,7 +344,7 @@ def _get_feature_types(feature: str, feature_types_dict: dict) -> list:

# Joins a list of metadata objects together, returning a new metadata object
@staticmethod
def join_metadatas(metadata_list, shared_raw_features='error'):
def join_metadatas(metadata_list, shared_raw_features="error"):
metadata_new = copy.deepcopy(metadata_list[0])
for metadata in metadata_list[1:]:
metadata_new = metadata_new.join_metadata(metadata, shared_raw_features=shared_raw_features)
Expand All @@ -355,25 +369,27 @@ def to_dict(self, inverse=False) -> dict:

return feature_metadata_dict

def print_feature_metadata_full(self, log_prefix='', print_only_one_special=False, log_level=20, max_list_len=5, return_str=False):
def print_feature_metadata_full(self, log_prefix="", print_only_one_special=False, log_level=20, max_list_len=5, return_str=False):
feature_metadata_dict = self.to_dict(inverse=True)
if not feature_metadata_dict:
if return_str:
return ''
return ""
else:
return
keys = list(feature_metadata_dict.keys())
keys = sorted(keys)
output = [((key[0], list(key[1])), feature_metadata_dict[key]) for key in keys]
output_str = ''
output_str = ""
if print_only_one_special:
for i, ((raw, special), features) in enumerate(output):
if len(special) == 1:
output[i] = ((raw, special[0]), features)
elif len(special) > 1:
output[i] = ((raw, special[0]), features)
logger.warning(f'Warning: print_only_one_special=True was set, but features with {len(special)} special types were found. '
f'Invalid Types: {output[i]}')
logger.warning(
f"Warning: print_only_one_special=True was set, but features with {len(special)} special types were found. "
f"Invalid Types: {output[i]}"
)
else:
output[i] = ((raw, None), features)
max_key_len = max([len(str(key)) for key, _ in output])
Expand All @@ -386,17 +402,17 @@ def print_feature_metadata_full(self, log_prefix='', print_only_one_special=Fals
if max_list_len is not None:
features = str(val[:max_list_len])
if len(val) > max_list_len:
features = features[:-1] + ', ...]'
features = features[:-1] + ", ...]"
else:
features = str(val)
if val:
message = f'{log_prefix}{key}{" " * max_key_minus_cur} : {" " * max_val_minus_cur}{len(val)} | {features}'
if return_str:
output_str += message + '\n'
output_str += message + "\n"
else:
logger.log(log_level, message)
if return_str:
if output_str[-1] == '\n':
if output_str[-1] == "\n":
output_str = output_str[:-1]
return output_str

Expand Down
Loading
Loading