Skip to content

Commit

Permalink
Revert "Revert "Revert "Add ensemble regressor primitives"""
Browse files Browse the repository at this point in the history
This reverts commit 0a2eb3f.
  • Loading branch information
EdenWuyifan committed May 1, 2024
1 parent 652f49d commit f92a206
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 48 deletions.
16 changes: 8 additions & 8 deletions alpha_automl/pipeline_synthesis/pipeline_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@ def change_default_hyperparams(primitive_object):

def extract_estimators(pipeline_primitives, all_primitives):
estimators = []
estimator_name, estimator_obj = pipeline_primitives.pop()
current_primitive_type = all_primitives[estimator_name]['type']
classifier_name, classifier_obj = pipeline_primitives.pop()
current_primitive_type = all_primitives[classifier_name]['type']
counter = 0

while current_primitive_type == 'CLASSIFIER' or current_primitive_type == 'REGRESSOR':
estimators.append((f'{estimator_name}-{counter}', estimator_obj))
estimator_name, estimator_obj = pipeline_primitives.pop()
current_primitive_type = all_primitives[estimator_name]['type']
while current_primitive_type == 'CLASSIFIER':
estimators.append((f'{classifier_name}-{counter}', classifier_obj))
classifier_name, classifier_obj = pipeline_primitives.pop()
current_primitive_type = all_primitives[classifier_name]['type']
counter += 1

return estimators
Expand Down Expand Up @@ -101,10 +101,10 @@ def make_primitive_objects(self, primitives):
if primitive_type == 'SEMISUPERVISED_SELFTRAINER':
classifier_obj = pipeline_primitives.pop()[1]
primitive_object = create_object(primitive_name, {'base_estimator': classifier_obj})
elif primitive_type == 'CLASSIFICATION_SINGLE_ENSEMBLER' or primitive_type == 'REGRESSION_SINGLE_ENSEMBLER':
elif primitive_type == 'SINGLE_ENSEMBLER':
classifier_obj = pipeline_primitives.pop()[1]
primitive_object = create_object(primitive_name, {'estimator': classifier_obj})
elif primitive_type == 'CLASSIFICATION_MULTI_ENSEMBLER' or primitive_type == 'REGRESSION_MULTI_ENSEMBLER':
elif primitive_type == 'MULTI_ENSEMBLER':
estimators = extract_estimators(pipeline_primitives, self.all_primitives)
primitive_object = create_object(primitive_name, {'estimators': estimators})
elif self.all_primitives[primitive_name]['origin'] == NATIVE_PRIMITIVE: # It's an installed primitive
Expand Down
43 changes: 21 additions & 22 deletions alpha_automl/pipeline_synthesis/setup_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,7 @@
"SEMISUPERVISED": 5,
"NA": 6,
},
"DATA_TYPES": {
"TABULAR": 1,
"TEXT": 2,
"IMAGE": 3,
"VIDEO": 4,
"MULTIMODAL": 5
},
"DATA_TYPES": {"TABULAR": 1, "TEXT": 2, "IMAGE": 3, "VIDEO": 4, "MULTIMODAL": 5},
"PIPELINE_SIZE": 10,
}

Expand All @@ -45,10 +39,12 @@ def search_pipelines(X, y, scoring, splitting_strategy, task_name, time_bound, a
task_start = datetime.now()

def evaluate_pipeline(primitives):
has_repeated_estimators = check_repeated_classifiers(primitives, all_primitives, ensemble_pipelines_hash)
has_repeated_classifiers = check_repeated_classifiers(
primitives, all_primitives, ensemble_pipelines_hash
)

if has_repeated_estimators:
logger.info("Repeated estimators detected in ensembles, ignoring pipeline")
if has_repeated_classifiers:
logger.info("Repeated classifiers detected in ensembles, ignoring pipeline")
return None

pipeline = builder.make_pipeline(primitives)
Expand Down Expand Up @@ -117,34 +113,37 @@ def update_config(task_name, metric, grammar, metadata):
return config


def check_repeated_classifiers(pipeline_primitives, all_primitives, ensemble_pipelines_hash):
# We should rename this function to check_repeated_estimators, but loading checkpoints raise errors
# Verify if the estimators are repeated in the ensembles (regardless of the order)
estimators = []
def check_repeated_classifiers(
pipeline_primitives, all_primitives, ensemble_pipelines_hash
):
# Verify if the classifiers are repeated in the ensembles (regardless of the order)
classifiers = []
pipeline_hash = ""
has_ensemble_primitive = False
has_repeated_estimators = False
has_repeated_classifiers = False

for primitive_name in pipeline_primitives:
primitive_type = all_primitives[primitive_name]["type"]

if primitive_type == "CLASSIFIER" or primitive_type == "REGRESSOR":
estimators.append(primitive_name)
elif primitive_type == "CLASSIFICATION_MULTI_ENSEMBLER" or primitive_type == "REGRESSION_MULTI_ENSEMBLER":
if primitive_type == "CLASSIFIER":
classifiers.append(primitive_name)
elif primitive_type == "MULTI_ENSEMBLER":
has_ensemble_primitive = True
pipeline_hash += primitive_name
if len(estimators) != len(set(estimators)): # All estimators should be different
has_repeated_estimators = True
if len(classifiers) != len(
set(classifiers)
): # All classifiers should be different
has_repeated_classifiers = True
else:
pipeline_hash += primitive_name

if not has_ensemble_primitive:
return False

if has_repeated_estimators:
if has_repeated_classifiers:
return True

pipeline_hash += "".join(sorted(estimators))
pipeline_hash += "".join(sorted(classifiers))

if pipeline_hash in ensemble_pipelines_hash:
return True
Expand Down
13 changes: 5 additions & 8 deletions alpha_automl/resource/base_grammar.bnf
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
S -> CLASSIFICATION_TASK | REGRESSION_TASK | CLUSTERING_TASK | TIME_SERIES_FORECAST_TASK | SEMISUPERVISED_TASK
CLASSIFICATION_TASK -> IMPUTER ENCODERS FEATURE_SCALER FEATURE_SELECTOR CLASSIFIER CLASSIFICATION_ENSEMBLER
REGRESSION_TASK -> IMPUTER ENCODERS FEATURE_SELECTOR FEATURE_SCALER REGRESSOR REGRESSION_ENSEMBLER
CLASSIFICATION_TASK -> IMPUTER ENCODERS FEATURE_SCALER FEATURE_SELECTOR CLASSIFIER ENSEMBLER
REGRESSION_TASK -> IMPUTER ENCODERS FEATURE_SELECTOR FEATURE_SCALER REGRESSOR
CLUSTERING_TASK -> IMPUTER ENCODERS FEATURE_SCALER FEATURE_SELECTOR CLUSTERER
TIME_SERIES_FORECAST_TASK -> IMPUTER TIME_SERIES_FORECASTER | REGRESSION_TASK
SEMISUPERVISED_TASK -> IMPUTER ENCODERS FEATURE_SCALER SEMISUPERVISED_CLASSIFIER
NA_TASK -> CLASSIFICATION_TASK | REGRESSION_TASK | SEMISUPERVISED_TASK
ENCODERS -> TEXT_ENCODER DATETIME_ENCODER CATEGORICAL_ENCODER IMAGE_ENCODER
CLASSIFICATION_ENSEMBLER -> CLASSIFICATION_SINGLE_ENSEMBLER | CLASSIFIER CLASSIFIER CLASSIFICATION_MULTI_ENSEMBLER | E
REGRESSION_ENSEMBLER -> REGRESSION_SINGLE_ENSEMBLER | REGRESSOR REGRESSOR REGRESSION_MULTI_ENSEMBLER | E
ENSEMBLER -> SINGLE_ENSEMBLER | CLASSIFIER CLASSIFIER MULTI_ENSEMBLER | E
SEMISUPERVISED_CLASSIFIER -> CLASSIFIER SEMISUPERVISED_SELFTRAINER | SEMISUPERVISED_LABELPROPAGATOR
IMPUTER -> 'primitive_terminal'
FEATURE_SCALER -> 'primitive_terminal' | 'E'
Expand All @@ -16,10 +15,8 @@ TEXT_ENCODER -> 'primitive_terminal'
CATEGORICAL_ENCODER -> 'primitive_terminal'
DATETIME_ENCODER -> 'primitive_terminal'
IMAGE_ENCODER -> 'primitive_terminal'
CLASSIFICATION_SINGLE_ENSEMBLER -> 'primitive_terminal'
CLASSIFICATION_MULTI_ENSEMBLER -> 'primitive_terminal'
REGRESSION_SINGLE_ENSEMBLER -> 'primitive_terminal'
REGRESSION_MULTI_ENSEMBLER -> 'primitive_terminal'
SINGLE_ENSEMBLER -> 'primitive_terminal'
MULTI_ENSEMBLER -> 'primitive_terminal'
CLASSIFIER -> 'primitive_terminal'
REGRESSOR -> 'primitive_terminal'
CLUSTERER -> 'primitive_terminal'
Expand Down
12 changes: 2 additions & 10 deletions alpha_automl/resource/primitives_hierarchy.json
Original file line number Diff line number Diff line change
Expand Up @@ -92,20 +92,12 @@
"alpha_automl.builtin_primitives.semisupervised_classifier.SkLabelSpreading",
"alpha_automl.builtin_primitives.semisupervised_classifier.SkLabelPropagation"
],
"CLASSIFICATION_SINGLE_ENSEMBLER": [
"SINGLE_ENSEMBLER": [
"sklearn.ensemble.AdaBoostClassifier",
"sklearn.ensemble.BaggingClassifier"
],
"CLASSIFICATION_MULTI_ENSEMBLER": [
"MULTI_ENSEMBLER": [
"sklearn.ensemble.StackingClassifier",
"sklearn.ensemble.VotingClassifier"
],
"REGRESSION_SINGLE_ENSEMBLER": [
"sklearn.ensemble.AdaBoostRegressor",
"sklearn.ensemble.BaggingRegressor"
],
"REGRESSION_MULTI_ENSEMBLER": [
"sklearn.ensemble.StackingRegressor",
"sklearn.ensemble.VotingRegressor"
]
}

0 comments on commit f92a206

Please sign in to comment.