Skip to content

Commit

Permalink
bugfix: smac
Browse files Browse the repository at this point in the history
  • Loading branch information
EdenWuyifan committed May 14, 2024
1 parent dc9b7ad commit d8505a2
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 43 deletions.
9 changes: 4 additions & 5 deletions alpha_automl/automl_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,18 +106,17 @@ def fit(self, X, y):
sign = get_sign_sorting(self.scorer._score_func, self.score_sorting)
sorted_pipelines = sorted(pipelines, key=lambda x: x.get_score() * sign, reverse=True)

# [SMAC] added here!!
if self.optimizing:
optimizer = SmacOptimizer(X=X, y=y, splitter=self.splitter, scorer=self.scorer, n_trials=200)

leaderboard_data = []
for index, pipeline in enumerate(sorted_pipelines, start=1):
pipeline_id = PIPELINE_PREFIX + str(index)
self.pipelines[pipeline_id] = pipeline
# [SMAC] added here!!
if self.optimizing and index <= 10:
if self.optimizing and index <= 5:
optimizer = SmacOptimizer(X=X, y=y, splitter=self.splitter, scorer=self.scorer, n_trials=50)
opt_pipeline = optimizer.optimize_pipeline(pipeline.get_pipeline())
opt_score, _, _ = score_pipeline(opt_pipeline, X, y, self.scorer, self.splitter)
alphaautoml_pipeline = score_pipeline(opt_pipeline, X, y, self.scorer, self.splitter, self.task_type)
opt_score = alphaautoml_pipeline.get_score()
logger.critical(f'[SMAC] {pipeline_id} successfully optimized: {pipeline.get_score()} => {opt_score}')
pipeline.set_pipeline(opt_pipeline)
pipeline.set_score(opt_score)
Expand Down
2 changes: 1 addition & 1 deletion alpha_automl/automl_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def _search_pipelines(self, automl_hyperparams):

found_pipelines = 0

pipeline_threshold = 20
pipeline_threshold = 5
X, y, _ = sample_dataset(self.X, self.y, SAMPLE_SIZE, self.task)
while pipelines and found_pipelines < pipeline_threshold:
pipeline = pipelines.pop()
Expand Down
56 changes: 40 additions & 16 deletions alpha_automl/hyperparameter_tuning/smac.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import logging
import copy
from os.path import dirname, join

import numpy as np
Expand All @@ -10,6 +11,7 @@
Constant,
Float,
Integer,

)
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import make_pipeline
Expand Down Expand Up @@ -53,7 +55,8 @@ def gen_pipeline(config, pipeline):
primitive_object = create_object(step_name, {'estimator': estimator})
new_pipeline.steps.append([step_name, primitive_object])
elif step_type == 'CLASSIFICATION_MULTI_ENSEMBLER' or step_type == 'REGRESSION_MULTI_ENSEMBLER':
estimators = extract_estimators_smac(step_obj, PRIMITIVE_TYPES)
estimators = extract_estimators_smac(step_obj, config)
logger.critical(f"[YFW] =========== {config} --- {estimators} ==========")
primitive_object = create_object(step_name, {'estimators': estimators})
new_pipeline.steps.append([step_name, primitive_object])
else:
Expand All @@ -64,7 +67,7 @@ def gen_pipeline(config, pipeline):

def extract_estimators_smac(step_obj, config):
new_estimators = []
estimators = step_obj.estimators
estimators = copy.deepcopy(step_obj.estimators)
while estimators:
estimator_name, estimator_obj = estimators.pop()
estimator_name_lookup, estimator_name_counter = estimator_name.split('-')
Expand All @@ -85,31 +88,41 @@ def get_primitive_params(config, step_name):
def gen_configspace(pipeline):
# (from build_configspace) Build Configuration Space which defines all parameters and their ranges
configspace = ConfigurationSpace(seed=0)
all_params = {}
for primitive, prim_obj in pipeline.steps:
step_type = PRIMITIVE_TYPES[primitive]
try:
params = SMAC_DICT[primitive]
configspace.add_hyperparameters(cast_primitive(params))
add_params(params, all_params)
if step_type == 'COLUMN_TRANSFORMER':
for trans_name, _, _ in prim_obj.__dict__['transformers']:
trans_prim_name = trans_name.split('-')[0]
params = SMAC_DICT[trans_prim_name]
configspace.add_hyperparameters(cast_primitive(params))
# elif step_type == 'CLASSIFICATION_SINGLE_ENSEMBLER' or step_type == 'REGRESSION_SINGLE_ENSEMBLER':
# estimator_obj = prim_obj.estimator
# for smac_name, smac_params in SMAC_DICT.items():
# if estimator_obj.__class__.__name__ in smac_name:
# configspace.add_hyperparameters(cast_primitive(smac_params))
add_params(params, all_params)
elif step_type == 'CLASSIFICATION_SINGLE_ENSEMBLER' or step_type == 'REGRESSION_SINGLE_ENSEMBLER':
estimator_obj = prim_obj.estimator
for smac_name, params in SMAC_DICT.items():
if estimator_obj.__class__.__name__ == smac_name.split(".")[-1]:
add_params(params, all_params)
elif step_type == 'CLASSIFICATION_MULTI_ENSEMBLER' or step_type == 'REGRESSION_MULTI_ENSEMBLER':
for estimator_name, _ in prim_obj.estimators:
estimator_name_lookup, _ = estimator_name.split('-')
params = SMAC_DICT[estimator_name_lookup]
configspace.add_hyperparameters(cast_primitive(params))
add_params(params, all_params)
except Exception as e:
logger.critical(f'[SMAC] {str(e)}')
configspace.add_hyperparameters(cast_primitive(all_params))
return configspace


def add_params(params, all_params):
for param_name, param_conf in params.items():
if param_name in all_params:
pass
else:
all_params[param_name] = param_conf


def cast_primitive(params):
new_hyperparameters = []
for name, conf in params.items():
Expand Down Expand Up @@ -144,6 +157,8 @@ def cast_hyperparameter(param_name, param_conf):
config_space = Float(param_name, (min_value, max_value), default=param_default)
elif param_type == 'Constant':
config_space = Constant(param_name, param_value)
elif param_type == 'Boolean':
config_space = Categorical(param_name, param_value, default=param_default)
else:
logger.error(f'Unknown param_type {param_type}')

Expand All @@ -168,31 +183,40 @@ def __init__(
return

def train(self, config: Configuration, seed: int = 0) -> float:
pipeline = gen_pipeline(config, self.pipeline)
self.pipeline = gen_pipeline(config, self.pipeline)
logger.critical(f"~!~!~!~!~!~!~!~!~!~!~!~!~!~{self.pipeline}~!~!~!~!~!~!~!~!~!~!~!~!~!~")
scores = cross_val_score(
pipeline,
self.pipeline,
self.X,
self.y,
cv=self.splitter,
scoring=self.scorer,
error_score='raise',
)
logger.critical(f"[WWWWWWWWWWWWWWWW] {self.pipeline} ~~~~~ {scores}")

return 1 - np.mean(scores)

def optimize_pipeline(self, pipeline):
self.pipeline = pipeline
logger.critical(f"????????????????????????????{pipeline}????????????????????????????")
if self.pipeline is None:
logger.critical('[SMAC] get_pipeline return None value!')
return
optimized_conf = self._optimize_pipeline(self.pipeline)
optimized_pipeline = gen_pipeline(optimized_conf, self.pipeline)
logger.debug(f'[SMAC] {pipeline} successfully optimized!')
return optimized_pipeline
logger.critical(f"[YFW] ----------------- {optimized_conf} --- {pipeline}")
if optimized_conf:
optimized_pipeline = gen_pipeline(optimized_conf, self.pipeline)
logger.debug(f'[SMAC] {pipeline} successfully optimized!')
return optimized_pipeline
else:
return self.pipeline


def _optimize_pipeline(self, pipeline):
scenario = Scenario(
gen_configspace(pipeline), deterministic=True, n_trials=self.n_trials
)

smac = HyperparameterOptimizationFacade(scenario, self.train)
smac = HyperparameterOptimizationFacade(scenario, self.train, overwrite=True)
return smac.optimize()
54 changes: 34 additions & 20 deletions alpha_automl/hyperparameter_tuning/smac_parameters.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,7 @@
"default": "word"
}
},
"sklearn.feature_extraction.text.CountVectorizer": {
"min_df": {
"type": "Float",
"value": [
0,
0.3
],
"default": 0.1
}
},
"sklearn.feature_extraction.text.CountVectorizer": {},
"sklearn.discriminant_analysis.LinearDiscriminantAnalysis": {},
"sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis": {},
"sklearn.ensemble.BaggingClassifier": {
Expand Down Expand Up @@ -135,14 +126,6 @@
1024
],
"default": 0.1
},
"penalty": {
"type": "Categorical",
"value": [
"l2",
"l1"
],
"default": "l2"
}
},
"sklearn.linear_model.PassiveAggressiveClassifier": {},
Expand Down Expand Up @@ -241,14 +224,25 @@
"sklearn.tree.DecisionTreeClassifier": {},
"xgboost.XGBClassifier": {},
"lightgbm.LGBMClassifier": {},
"sklearn.ensemble.AdaBoostClassifier": {},
"sklearn.ensemble.AdaBoostClassifier": {
"algorithm": {
"type": "Constant",
"value": "SAMME",
"default": "SAMME"
}
},
"sklearn.ensemble.StackingClassifier": {},
"sklearn.ensemble.VotingClassifier": {},
"sklearn.ensemble.AdaBoostRegressor": {},
"sklearn.ensemble.BaggingRegressor": {},
"sklearn.ensemble.StackingRegressor": {},
"sklearn.ensemble.VotingRegressor": {},
"catboost.CatBoostClassifier": {
"logging_level": {
"type": "Constant",
"value": "Silent",
"default": "Silent"
},
"learning_rate": {
"type": "Float",
"value": [
Expand All @@ -261,7 +255,7 @@
"type": "Integer",
"value": [
1,
16
6
],
"default": 6
},
Expand Down Expand Up @@ -290,5 +284,25 @@
],
"default": "None"
}
},
"sklearn.impute.SimpleImputer": {
"strategy": {
"type": "Constant",
"value": "most_frequent",
"default": "most_frequent"
},
"keep_empty_features": {
"type": "Boolean",
"value": [true],
"default": true
}
},
"sklearn.preprocessing.OneHotEncoder": {
"handle_unknown": {
"type": "Constant",
"value": "ignore",
"default": "ignore"
}
}

}
2 changes: 1 addition & 1 deletion alpha_automl/pipeline_search/agent_lab.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def pipeline_search_rllib(game, time_bound, checkpoint_load_folder, checkpoint_s
num_cpus = int(ray.available_resources()["CPU"])

# load checkpoint or create a new one
algo = load_rllib_checkpoint(game, checkpoint_load_folder, num_rollout_workers=1)
algo = load_rllib_checkpoint(game, checkpoint_load_folder, num_rollout_workers=7)
logger.debug("Create Algo object done")

# train model
Expand Down

0 comments on commit d8505a2

Please sign in to comment.