Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce run time of the test #205

Merged
merged 14 commits into from
May 17, 2021
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,14 @@ class KernelPCA(autoPyTorchFeaturePreprocessingComponent):
def __init__(self, n_components: int = 10,
kernel: str = 'rbf', degree: int = 3,
gamma: float = 0.01, coef0: float = 0.0,
random_state: Optional[Union[int, np.random.RandomState]] = None
random_state: Optional[np.random.RandomState] = None
) -> None:
self.n_components = n_components
self.kernel = kernel
self.degree = degree
self.gamma = gamma
self.coef0 = coef0
self.random_state = random_state
super().__init__()
super().__init__(random_state=random_state)

self.add_fit_requirements([
FitRequirement('issparse', (bool,), user_defined=True, dataset_property=True)])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,14 @@ class Nystroem(autoPyTorchFeaturePreprocessingComponent):
def __init__(self, n_components: int = 10,
kernel: str = 'rbf', degree: int = 3,
gamma: float = 0.01, coef0: float = 0.0,
random_state: Optional[Union[int, np.random.RandomState]] = None
random_state: Optional[np.random.RandomState] = None
) -> None:
self.n_components = n_components
self.kernel = kernel
self.degree = degree
self.gamma = gamma
self.coef0 = coef0
self.random_state = random_state
super().__init__()
super().__init__(random_state=random_state)

def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,12 @@
class PolynomialFeatures(autoPyTorchFeaturePreprocessingComponent):
def __init__(self, degree: int = 2, interaction_only: bool = False,
include_bias: bool = False,
random_state: Optional[Union[int, np.random.RandomState]] = None):
random_state: Optional[np.random.RandomState] = None):
self.degree = degree
self.interaction_only = interaction_only
self.include_bias = include_bias

self.random_state = random_state
super().__init__()
super().__init__(random_state=random_state)

def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
self.preprocessor['numerical'] = sklearn.preprocessing.PolynomialFeatures(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,10 @@

class PowerTransformer(autoPyTorchFeaturePreprocessingComponent):
def __init__(self, standardize: bool = True,
random_state: Optional[Union[int, np.random.RandomState]] = None):
random_state: Optional[np.random.RandomState] = None):
self.standardize = standardize

self.random_state = random_state
super().__init__()
super().__init__(random_state=random_state)

def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
self.preprocessor['numerical'] = sklearn.preprocessing.PowerTransformer(method="yeo-johnson",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,11 @@
class RandomKitchenSinks(autoPyTorchFeaturePreprocessingComponent):
def __init__(self, n_components: int = 100,
gamma: float = 1.0,
random_state: Optional[Union[int, np.random.RandomState]] = None
random_state: Optional[np.random.RandomState] = None
) -> None:
self.n_components = n_components
self.gamma = gamma
self.random_state = random_state
super().__init__()
super().__init__(random_state=random_state)

def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,10 @@

class TruncatedSVD(autoPyTorchFeaturePreprocessingComponent):
def __init__(self, target_dim: int = 128,
random_state: Optional[Union[int, np.random.RandomState]] = None):
random_state: Optional[np.random.RandomState] = None):
self.target_dim = target_dim

self.random_state = random_state
super().__init__()
super().__init__(random_state=random_state)

def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
from typing import Any, Dict, List
from typing import Any, Dict, List, Optional

import numpy as np

from sklearn.utils import check_random_state

from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
autoPyTorchTabularPreprocessingComponent
Expand All @@ -8,7 +12,13 @@
class autoPyTorchFeaturePreprocessingComponent(autoPyTorchTabularPreprocessingComponent):
_required_properties: List[str] = ['handles_sparse']

def __init__(self) -> None:
def __init__(self, random_state: Optional[np.random.RandomState] = None) -> None:
nabenabe0928 marked this conversation as resolved.
Show resolved Hide resolved
if random_state is None:
# A trainer components need a random state for
# sampling -- for example in MixUp training
self.random_state = check_random_state(1)
else:
self.random_state = random_state
super().__init__()

def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
Expand Down
36 changes: 22 additions & 14 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
from autoPyTorch.utils.pipeline import get_dataset_requirements


N_SAMPLES = 200


@pytest.fixture(scope="session")
def callattr_ahead_of_alltests(request):
"""
Expand Down Expand Up @@ -191,7 +194,7 @@ def session_run_at_end():
def get_tabular_data(task):
if task == "classification_numerical_only":
X, y = make_classification(
n_samples=200,
n_samples=N_SAMPLES,
n_features=4,
n_informative=3,
n_redundant=1,
Expand All @@ -207,18 +210,18 @@ def get_tabular_data(task):
X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
categorical_columns = [column for column in X.columns if X[column].dtype.name == 'category']
X = X[categorical_columns]
X = X.iloc[0:200]
y = y.iloc[0:200]
X = X.iloc[0:N_SAMPLES]
y = y.iloc[0:N_SAMPLES]
validator = TabularInputValidator(is_classification=True).fit(X.copy(), y.copy())

elif task == "classification_numerical_and_categorical":
X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
X = X.iloc[0:200]
y = y.iloc[0:200]
X = X.iloc[0:N_SAMPLES]
y = y.iloc[0:N_SAMPLES]
validator = TabularInputValidator(is_classification=True).fit(X.copy(), y.copy())

elif task == "regression_numerical_only":
X, y = make_regression(n_samples=200,
X, y = make_regression(n_samples=N_SAMPLES,
n_features=4,
n_informative=3,
n_targets=1,
Expand All @@ -240,8 +243,8 @@ def get_tabular_data(task):
else:
X[column] = X[column].fillna(0)

X = X.iloc[0:200]
y = y.iloc[0:200]
X = X.iloc[0:N_SAMPLES]
y = y.iloc[0:N_SAMPLES]
y = (y - y.mean()) / y.std()
validator = TabularInputValidator(is_classification=False).fit(X.copy(), y.copy())

Expand All @@ -256,8 +259,8 @@ def get_tabular_data(task):
else:
X[column] = X[column].fillna(0)

X = X.iloc[0:200]
y = y.iloc[0:200]
X = X.iloc[0:N_SAMPLES]
y = y.iloc[0:N_SAMPLES]
y = (y - y.mean()) / y.std()
validator = TabularInputValidator(is_classification=False).fit(X.copy(), y.copy())
elif task == 'iris':
Expand Down Expand Up @@ -288,7 +291,7 @@ def get_fit_dictionary(X, y, validator, backend):
'num_run': np.random.randint(50),
'device': 'cpu',
'budget_type': 'epochs',
'epochs': 100,
'epochs': 5,
'torch_num_threads': 1,
'early_stopping': 10,
'working_dir': '/tmp',
Expand Down Expand Up @@ -326,7 +329,7 @@ def dataset(request):
@pytest.fixture
def dataset_traditional_classifier_num_only():
X, y = make_classification(
n_samples=200,
n_samples=N_SAMPLES,
n_features=4,
n_informative=3,
n_redundant=1,
Expand All @@ -344,15 +347,15 @@ def dataset_traditional_classifier_categorical_only():
X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
categorical_columns = [column for column in X.columns if X[column].dtype.name == 'category']
X = X[categorical_columns]
X, y = X[:200].to_numpy(), y[:200].to_numpy().astype(np.int)
X, y = X[:N_SAMPLES].to_numpy(), y[:N_SAMPLES].to_numpy().astype(np.int)
return X, y


@pytest.fixture
def dataset_traditional_classifier_num_categorical():
X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
y = y.astype(np.int)
X, y = X[:200].to_numpy(), y[:200].to_numpy().astype(np.int)
X, y = X[:N_SAMPLES].to_numpy(), y[:N_SAMPLES].to_numpy().astype(np.int)
return X, y


Expand Down Expand Up @@ -456,3 +459,8 @@ def loss_mse():
@pytest.fixture
def loss_details(request):
return request.getfixturevalue(request.param)


@pytest.fixture
def n_samples():
return N_SAMPLES