Skip to content

Commit

Permalink
Accept black reformatting
Browse files Browse the repository at this point in the history
  • Loading branch information
ravinkohli committed May 25, 2021
1 parent 2e4c8e7 commit e8cb0ba
Show file tree
Hide file tree
Showing 191 changed files with 11,884 additions and 7,871 deletions.
517 changes: 315 additions & 202 deletions autoPyTorch/api/base_task.py

Large diffs are not rendered by default.

58 changes: 37 additions & 21 deletions autoPyTorch/api/tabular_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
)
from autoPyTorch.datasets.tabular_dataset import TabularDataset
from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
from autoPyTorch.utils.hyperparameter_search_space_update import (
HyperparameterSearchSpaceUpdates,
)


class TabularClassificationTask(BaseTask):
Expand Down Expand Up @@ -57,6 +59,7 @@ class TabularClassificationTask(BaseTask):
specifies set of components not to use. Incompatible
with include components
"""

def __init__(
self,
seed: int = 1,
Expand All @@ -71,10 +74,12 @@ def __init__(
delete_output_folder_after_terminate: bool = True,
include_components: Optional[Dict] = None,
exclude_components: Optional[Dict] = None,
resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation,
resampling_strategy: Union[
CrossValTypes, HoldoutValTypes
] = HoldoutValTypes.holdout_validation,
resampling_strategy_args: Optional[Dict[str, Any]] = None,
backend: Optional[Backend] = None,
search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None,
):
super().__init__(
seed=seed,
Expand All @@ -96,7 +101,9 @@ def __init__(
task_type=TASK_TYPES_TO_STRING[TABULAR_CLASSIFICATION],
)

def build_pipeline(self, dataset_properties: Dict[str, Any]) -> TabularClassificationPipeline:
def build_pipeline(
self, dataset_properties: Dict[str, Any]
) -> TabularClassificationPipeline:
return TabularClassificationPipeline(dataset_properties=dataset_properties)

def search(
Expand All @@ -119,7 +126,7 @@ def search(
precision: int = 32,
disable_file_output: List = [],
load_models: bool = True,
) -> 'BaseTask':
) -> "BaseTask":
"""
Search for the best pipeline configuration for the given dataset.
Expand Down Expand Up @@ -208,11 +215,15 @@ def search(
# Fit a input validator to check the provided data
# Also, an encoder is fit to both train and test data,
# to prevent unseen categories during inference
self.InputValidator.fit(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
self.InputValidator.fit(
X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test
)

self.dataset = TabularDataset(
X=X_train, Y=y_train,
X_test=X_test, Y_test=y_test,
X=X_train,
Y=y_train,
X_test=X_test,
Y_test=y_test,
validator=self.InputValidator,
resampling_strategy=self.resampling_strategy,
resampling_strategy_args=self.resampling_strategy_args,
Expand All @@ -236,18 +247,18 @@ def search(
)

def predict(
self,
X_test: np.ndarray,
batch_size: Optional[int] = None,
n_jobs: int = 1
self, X_test: np.ndarray, batch_size: Optional[int] = None, n_jobs: int = 1
) -> np.ndarray:
if self.InputValidator is None or not self.InputValidator._is_fitted:
raise ValueError("predict() is only supported after calling search. Kindly call first "
"the estimator fit() method.")
raise ValueError(
"predict() is only supported after calling search. Kindly call first "
"the estimator fit() method."
)

X_test = self.InputValidator.feature_validator.transform(X_test)
predicted_probabilities = super().predict(X_test, batch_size=batch_size,
n_jobs=n_jobs)
predicted_probabilities = super().predict(
X_test, batch_size=batch_size, n_jobs=n_jobs
)

if self.InputValidator.target_validator.is_single_column_target():
predicted_indexes = np.argmax(predicted_probabilities, axis=1)
Expand All @@ -258,11 +269,16 @@ def predict(
# in our encoded values
return self.InputValidator.target_validator.inverse_transform(predicted_indexes)

def predict_proba(self,
X_test: Union[np.ndarray, pd.DataFrame, List],
batch_size: Optional[int] = None, n_jobs: int = 1) -> np.ndarray:
def predict_proba(
self,
X_test: Union[np.ndarray, pd.DataFrame, List],
batch_size: Optional[int] = None,
n_jobs: int = 1,
) -> np.ndarray:
if self.InputValidator is None or not self.InputValidator._is_fitted:
raise ValueError("predict() is only supported after calling search. Kindly call first "
"the estimator fit() method.")
raise ValueError(
"predict() is only supported after calling search. Kindly call first "
"the estimator fit() method."
)
X_test = self.InputValidator.feature_validator.transform(X_test)
return super().predict(X_test, batch_size=batch_size, n_jobs=n_jobs)
75 changes: 40 additions & 35 deletions autoPyTorch/api/tabular_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,17 @@

from autoPyTorch.api.base_task import BaseTask
from autoPyTorch.automl_common.common.utils.backend import Backend
from autoPyTorch.constants import (
TABULAR_REGRESSION,
TASK_TYPES_TO_STRING
)
from autoPyTorch.constants import TABULAR_REGRESSION, TASK_TYPES_TO_STRING
from autoPyTorch.data.tabular_validator import TabularInputValidator
from autoPyTorch.datasets.resampling_strategy import (
CrossValTypes,
HoldoutValTypes,
)
from autoPyTorch.datasets.tabular_dataset import TabularDataset
from autoPyTorch.pipeline.tabular_regression import TabularRegressionPipeline
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
from autoPyTorch.utils.hyperparameter_search_space_update import (
HyperparameterSearchSpaceUpdates,
)


class TabularRegressionTask(BaseTask):
Expand Down Expand Up @@ -50,23 +49,25 @@ class TabularRegressionTask(BaseTask):
"""

def __init__(
self,
seed: int = 1,
n_jobs: int = 1,
logging_config: Optional[Dict] = None,
ensemble_size: int = 50,
ensemble_nbest: int = 50,
max_models_on_disc: int = 50,
temporary_directory: Optional[str] = None,
output_directory: Optional[str] = None,
delete_tmp_folder_after_terminate: bool = True,
delete_output_folder_after_terminate: bool = True,
include_components: Optional[Dict] = None,
exclude_components: Optional[Dict] = None,
resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation,
resampling_strategy_args: Optional[Dict[str, Any]] = None,
backend: Optional[Backend] = None,
search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None
self,
seed: int = 1,
n_jobs: int = 1,
logging_config: Optional[Dict] = None,
ensemble_size: int = 50,
ensemble_nbest: int = 50,
max_models_on_disc: int = 50,
temporary_directory: Optional[str] = None,
output_directory: Optional[str] = None,
delete_tmp_folder_after_terminate: bool = True,
delete_output_folder_after_terminate: bool = True,
include_components: Optional[Dict] = None,
exclude_components: Optional[Dict] = None,
resampling_strategy: Union[
CrossValTypes, HoldoutValTypes
] = HoldoutValTypes.holdout_validation,
resampling_strategy_args: Optional[Dict[str, Any]] = None,
backend: Optional[Backend] = None,
search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None,
):
super().__init__(
seed=seed,
Expand All @@ -88,7 +89,9 @@ def __init__(
task_type=TASK_TYPES_TO_STRING[TABULAR_REGRESSION],
)

def build_pipeline(self, dataset_properties: Dict[str, Any]) -> TabularRegressionPipeline:
def build_pipeline(
self, dataset_properties: Dict[str, Any]
) -> TabularRegressionPipeline:
return TabularRegressionPipeline(dataset_properties=dataset_properties)

def search(
Expand All @@ -111,7 +114,7 @@ def search(
precision: int = 32,
disable_file_output: List = [],
load_models: bool = True,
) -> 'BaseTask':
) -> "BaseTask":
"""
Search for the best pipeline configuration for the given dataset.
Expand Down Expand Up @@ -196,11 +199,15 @@ def search(
# Fit a input validator to check the provided data
# Also, an encoder is fit to both train and test data,
# to prevent unseen categories during inference
self.InputValidator.fit(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
self.InputValidator.fit(
X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test
)

self.dataset = TabularDataset(
X=X_train, Y=y_train,
X_test=X_test, Y_test=y_test,
X=X_train,
Y=y_train,
X_test=X_test,
Y_test=y_test,
validator=self.InputValidator,
resampling_strategy=self.resampling_strategy,
resampling_strategy_args=self.resampling_strategy_args,
Expand All @@ -224,18 +231,16 @@ def search(
)

def predict(
self,
X_test: np.ndarray,
batch_size: Optional[int] = None,
n_jobs: int = 1
self, X_test: np.ndarray, batch_size: Optional[int] = None, n_jobs: int = 1
) -> np.ndarray:
if self.InputValidator is None or not self.InputValidator._is_fitted:
raise ValueError("predict() is only supported after calling search. Kindly call first "
"the estimator fit() method.")
raise ValueError(
"predict() is only supported after calling search. Kindly call first "
"the estimator fit() method."
)

X_test = self.InputValidator.feature_validator.transform(X_test)
predicted_values = super().predict(X_test, batch_size=batch_size,
n_jobs=n_jobs)
predicted_values = super().predict(X_test, batch_size=batch_size, n_jobs=n_jobs)

# Allow to predict in the original domain -- that is, the user is not interested
# in our encoded values
Expand Down
66 changes: 37 additions & 29 deletions autoPyTorch/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,33 @@
TIMESERIES_REGRESSION = 6

REGRESSION_TASKS = [TABULAR_REGRESSION, IMAGE_REGRESSION, TIMESERIES_REGRESSION]
CLASSIFICATION_TASKS = [TABULAR_CLASSIFICATION, IMAGE_CLASSIFICATION, TIMESERIES_CLASSIFICATION]
CLASSIFICATION_TASKS = [
TABULAR_CLASSIFICATION,
IMAGE_CLASSIFICATION,
TIMESERIES_CLASSIFICATION,
]

TABULAR_TASKS = [TABULAR_CLASSIFICATION, TABULAR_REGRESSION]
IMAGE_TASKS = [IMAGE_CLASSIFICATION, IMAGE_REGRESSION]
TASK_TYPES = REGRESSION_TASKS + CLASSIFICATION_TASKS

TASK_TYPES_TO_STRING = \
{TABULAR_CLASSIFICATION: 'tabular_classification',
IMAGE_CLASSIFICATION: 'image_classification',
TABULAR_REGRESSION: 'tabular_regression',
IMAGE_REGRESSION: 'image_regression',
TIMESERIES_CLASSIFICATION: 'time_series_classification',
TIMESERIES_REGRESSION: 'time_series_regression'}

STRING_TO_TASK_TYPES = \
{'tabular_classification': TABULAR_CLASSIFICATION,
'image_classification': IMAGE_CLASSIFICATION,
'tabular_regression': TABULAR_REGRESSION,
'image_regression': IMAGE_REGRESSION,
'time_series_classification': TIMESERIES_CLASSIFICATION,
'time_series_regression': TIMESERIES_REGRESSION}
TASK_TYPES_TO_STRING = {
TABULAR_CLASSIFICATION: "tabular_classification",
IMAGE_CLASSIFICATION: "image_classification",
TABULAR_REGRESSION: "tabular_regression",
IMAGE_REGRESSION: "image_regression",
TIMESERIES_CLASSIFICATION: "time_series_classification",
TIMESERIES_REGRESSION: "time_series_regression",
}

STRING_TO_TASK_TYPES = {
"tabular_classification": TABULAR_CLASSIFICATION,
"image_classification": IMAGE_CLASSIFICATION,
"tabular_regression": TABULAR_REGRESSION,
"image_regression": IMAGE_REGRESSION,
"time_series_classification": TIMESERIES_CLASSIFICATION,
"time_series_regression": TIMESERIES_REGRESSION,
}

# Output types have been defined as in scikit-learn type_of_target
# (https://scikit-learn.org/stable/modules/generated/sklearn.utils.multiclass.type_of_target.html)
Expand All @@ -38,19 +44,21 @@

OUTPUT_TYPES = [BINARY, CONTINUOUSMULTIOUTPUT, MULTICLASS, CONTINUOUS]

OUTPUT_TYPES_TO_STRING = \
{BINARY: 'binary',
CONTINUOUSMULTIOUTPUT: 'continuous-multioutput',
MULTICLASS: 'multiclass',
CONTINUOUS: 'continuous',
MULTICLASSMULTIOUTPUT: 'multiclass-multioutput'}

STRING_TO_OUTPUT_TYPES = \
{'binary': BINARY,
'continuous-multioutput': CONTINUOUSMULTIOUTPUT,
'multiclass': MULTICLASS,
'continuous': CONTINUOUS,
'multiclass-multioutput': MULTICLASSMULTIOUTPUT}
OUTPUT_TYPES_TO_STRING = {
BINARY: "binary",
CONTINUOUSMULTIOUTPUT: "continuous-multioutput",
MULTICLASS: "multiclass",
CONTINUOUS: "continuous",
MULTICLASSMULTIOUTPUT: "multiclass-multioutput",
}

STRING_TO_OUTPUT_TYPES = {
"binary": BINARY,
"continuous-multioutput": CONTINUOUSMULTIOUTPUT,
"multiclass": MULTICLASS,
"continuous": CONTINUOUS,
"multiclass-multioutput": MULTICLASSMULTIOUTPUT,
}

CLASSIFICATION_OUTPUTS = [BINARY, MULTICLASS, MULTICLASSMULTIOUTPUT]
REGRESSION_OUTPUTS = [CONTINUOUS, CONTINUOUSMULTIOUTPUT]
28 changes: 16 additions & 12 deletions autoPyTorch/data/base_feature_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,13 @@ class BaseFeatureValidator(BaseEstimator):
enc_columns (typing.List[str])
List of columns that were encoded.
"""
def __init__(self,
logger: typing.Optional[typing.Union[PicklableClientLogger, logging.Logger
]] = None,
) -> None:

def __init__(
self,
logger: typing.Optional[
typing.Union[PicklableClientLogger, logging.Logger]
] = None,
) -> None:
# Register types to detect unsupported data format changes
self.feat_type = None # type: typing.Optional[typing.List[str]]
self.data_type = None # type: typing.Optional[type]
Expand All @@ -54,9 +57,9 @@ def __init__(self,
self.encoder = None # type: typing.Optional[BaseEstimator]
self.enc_columns = [] # type: typing.List[str]

self.logger: typing.Union[
PicklableClientLogger, logging.Logger
] = logger if logger is not None else logging.getLogger(__name__)
self.logger: typing.Union[PicklableClientLogger, logging.Logger] = (
logger if logger is not None else logging.getLogger(__name__)
)

# Required for dataset properties
self.num_features = None # type: typing.Optional[int]
Expand Down Expand Up @@ -94,11 +97,12 @@ def fit(
self._check_data(X_test)

if np.shape(X_train)[1] != np.shape(X_test)[1]:
raise ValueError("The feature dimensionality of the train and test "
"data does not match train({}) != test({})".format(
np.shape(X_train)[1],
np.shape(X_test)[1]
))
raise ValueError(
"The feature dimensionality of the train and test "
"data does not match train({}) != test({})".format(
np.shape(X_train)[1], np.shape(X_test)[1]
)
)

# Fit on the training data
self._fit(X_train)
Expand Down

0 comments on commit e8cb0ba

Please sign in to comment.