Skip to content

Commit

Permalink
tmp
Browse files Browse the repository at this point in the history
  • Loading branch information
EdenWuyifan committed May 14, 2024
1 parent 845eac7 commit ef6ea6a
Show file tree
Hide file tree
Showing 3 changed files with 469 additions and 12 deletions.
17 changes: 11 additions & 6 deletions alpha_automl/automl_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from alpha_automl.automl_manager import AutoMLManager
from alpha_automl.scorer import make_scorer, make_splitter, make_str_metric, get_sign_sorting, score_pipeline
from alpha_automl.utils import make_d3m_pipelines, hide_logs, get_start_method, check_input_for_multiprocessing, \
setup_output_folder, SemiSupervisedSplitter, SemiSupervisedLabelEncoder, write_pipeline_code_as_pyfile
setup_output_folder, SemiSupervisedSplitter, SemiSupervisedLabelEncoder, write_pipeline_code_as_pyfile, sample_dataset
from alpha_automl.visualization import plot_comparison_pipelines
from alpha_automl.pipeline_serializer import PipelineSerializer
from alpha_automl.hyperparameter_tuning.smac import SmacOptimizer
Expand Down Expand Up @@ -108,18 +108,23 @@ def fit(self, X, y):


leaderboard_data = []
if self.optimizing:
X_sample, y_sample, _ = sample_dataset(X, y, 2000, self.task_type)

for index, pipeline in enumerate(sorted_pipelines, start=1):
pipeline_id = PIPELINE_PREFIX + str(index)
self.pipelines[pipeline_id] = pipeline
# [SMAC] added here!!
if self.optimizing and index <= 5:
optimizer = SmacOptimizer(X=X, y=y, splitter=self.splitter, scorer=self.scorer, n_trials=50)
optimizer = SmacOptimizer(X=X_sample, y=y_sample, splitter=self.splitter, scorer=self.scorer, n_trials=100)
opt_pipeline = optimizer.optimize_pipeline(pipeline.get_pipeline())
alphaautoml_pipeline = score_pipeline(opt_pipeline, X, y, self.scorer, self.splitter, self.task_type)
alphaautoml_pipeline = score_pipeline(opt_pipeline, X_sample, y_sample, self.scorer, self.splitter, self.task_type)

opt_score = alphaautoml_pipeline.get_score()
logger.critical(f'[SMAC] {pipeline_id} successfully optimized: {pipeline.get_score()} => {opt_score}')
pipeline.set_pipeline(opt_pipeline)
pipeline.set_score(opt_score)
if opt_score > pipeline.get_score():
logger.critical(f'[SMAC] {pipeline_id} successfully optimized: {pipeline.get_score()} => {opt_score}')
pipeline.set_pipeline(opt_pipeline)
pipeline.set_score(opt_score)
leaderboard_data.append([index, pipeline.get_summary(), pipeline.get_score()])

self.leaderboard = pd.DataFrame(leaderboard_data, columns=['ranking', 'pipeline', self.metric])
Expand Down
5 changes: 4 additions & 1 deletion alpha_automl/hyperparameter_tuning/smac.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ def gen_pipeline(config, pipeline):
new_pipeline.steps.append([step_name, create_object(step_name, step_obj.__dict__)])
elif step_type == 'CLASSIFICATION_SINGLE_ENSEMBLER' or step_type == 'REGRESSION_SINGLE_ENSEMBLER':
estimator = step_obj.estimator
estimator_name = estimator.__class__.__name__
for smac_name in SMAC_DICT.keys():
if estimator_name == smac_name.split(".")[-1]:
estimator = create_object(smac_name, get_primitive_params(config, smac_name))
primitive_object = create_object(step_name, {'estimator': estimator})
new_pipeline.steps.append([step_name, primitive_object])
elif step_type == 'CLASSIFICATION_MULTI_ENSEMBLER' or step_type == 'REGRESSION_MULTI_ENSEMBLER':
Expand Down Expand Up @@ -184,7 +188,6 @@ def __init__(

def train(self, config: Configuration, seed: int = 0) -> float:
self.pipeline = gen_pipeline(config, self.pipeline)
logger.critical(f"~!~!~!~!~!~!~!~!~!~!~!~!~!~{self.pipeline}~!~!~!~!~!~!~!~!~!~!~!~!~!~")
scores = cross_val_score(
self.pipeline,
self.X,
Expand Down

0 comments on commit ef6ea6a

Please sign in to comment.