Skip to content

Commit

Permalink
changes made for grad prix
Browse files Browse the repository at this point in the history
  • Loading branch information
EdenWuyifan committed May 2, 2024
1 parent 90c71b0 commit 692ae55
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 16 deletions.
2 changes: 1 addition & 1 deletion alpha_automl/automl_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ def search_pipelines(self, X, y, scoring, splitting_strategy, automl_hyperparams
def _search_pipelines(self, automl_hyperparams):
search_start_time = time.time()
automl_hyperparams = self.check_automl_hyperparams(automl_hyperparams)
metadata = profile_data(self.X)
X, y, is_sample = sample_dataset(self.X, self.y, SAMPLE_SIZE, self.task)
metadata = profile_data(X)
internal_splitting_strategy = make_splitter(SPLITTING_STRATEGY)
self.found_pipelines = 0
need_rescoring = True
Expand Down
5 changes: 3 additions & 2 deletions alpha_automl/pipeline_search/agent_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def __init__(self, config: EnvContext):
self.observation_space = Dict(
{
"board": Box(
0, 85, shape=(self.game.p + self.game.m,), dtype=np.uint8
0, 90, shape=(self.game.p + self.game.m,), dtype=np.uint8
), # Ray env board contains pipeline and metadata
}
)
Expand Down Expand Up @@ -94,7 +94,8 @@ def step(self, action):
if game_end == 1: # pipeline score over threshold
try:
if self.game.problem == "REGRESSION":
reward = 10 + (100 / self.game.getEvaluation(self.board))
# reward = 10 + (100 / self.game.getEvaluation(self.board))
reward = 10 + (self.game.getEvaluation(self.board)) ** 3 * 100
else:
reward = 10 + (self.game.getEvaluation(self.board)) ** 2 * 100
except Exception as e:
Expand Down
6 changes: 3 additions & 3 deletions alpha_automl/pipeline_search/agent_lab.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ def pipeline_search_rllib(game, time_bound, checkpoint_load_folder, checkpoint_s
"""
Search for pipelines using Rllib
"""
ray.init(local_mode=True, num_cpus=8, logging_level=logging.CRITICAL, log_to_driver=False)
ray.init(local_mode=True, logging_level=logging.CRITICAL)
num_cpus = int(ray.available_resources()["CPU"])

# load checkpoint or create a new one
algo = load_rllib_checkpoint(game, checkpoint_load_folder, num_rollout_workers=7)
algo = load_rllib_checkpoint(game, checkpoint_load_folder, num_rollout_workers=1)
logger.debug("Create Algo object done")

# train model
Expand Down Expand Up @@ -50,7 +50,7 @@ def load_rllib_checkpoint(game, checkpoint_load_folder, num_rollout_workers):
clip_param=0.3,
kl_coeff=0.3,
entropy_coeff=0.05,
train_batch_size=10000,
train_batch_size=5000,
)
)
config.lr = 1e-5
Expand Down
13 changes: 11 additions & 2 deletions alpha_automl/pipeline_synthesis/pipeline_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,17 @@


EXTRA_PARAMS = {
"lightgbm.LGBMClassifier": dict(verbose=-1),
"lightgbm.LGBMRegressor": dict(verbose=-1),
"lightgbm.LGBMClassifier": {'verbose': -1},
"lightgbm.LGBMRegressor": {'verbose': -1},
"catboost.CatBoostRegressor": {
'depth': 8,
'grow_policy': 'Depthwise',
'l2_leaf_reg': 2.7997999596449104,
'learning_rate': 0.031375015734637225,
'max_ctr_complexity': 2,
'one_hot_max_size': 3,
'logging_level': 'Silent'
},
}


Expand Down
2 changes: 1 addition & 1 deletion alpha_automl/resource/base_grammar.bnf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
S -> CLASSIFICATION_TASK | REGRESSION_TASK | CLUSTERING_TASK | TIME_SERIES_FORECAST_TASK | SEMISUPERVISED_TASK
CLASSIFICATION_TASK -> IMPUTER ENCODERS FEATURE_SCALER FEATURE_SELECTOR CLASSIFIER ENSEMBLER
REGRESSION_TASK -> IMPUTER ENCODERS FEATURE_SCALER FEATURE_SELECTOR REGRESSOR
REGRESSION_TASK -> IMPUTER ENCODERS FEATURE_SELECTOR FEATURE_SCALER REGRESSOR
CLUSTERING_TASK -> IMPUTER ENCODERS FEATURE_SCALER FEATURE_SELECTOR CLUSTERER
TIME_SERIES_FORECAST_TASK -> IMPUTER TIME_SERIES_FORECASTER | REGRESSION_TASK
SEMISUPERVISED_TASK -> IMPUTER ENCODERS FEATURE_SCALER SEMISUPERVISED_CLASSIFIER
Expand Down
11 changes: 4 additions & 7 deletions alpha_automl/resource/primitives_hierarchy.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,10 @@
"alpha_automl.builtin_primitives.datetime_encoder.DummyEncoder"
],
"FEATURE_SCALER": [
"sklearn.preprocessing.MaxAbsScaler",
"sklearn.preprocessing.RobustScaler",
"sklearn.preprocessing.StandardScaler"
"sklearn.preprocessing.RobustScaler"
],
"FEATURE_SELECTOR": [
"sklearn.feature_selection.GenericUnivariateSelect",
"sklearn.feature_selection.SelectPercentile",
"sklearn.feature_selection.SelectKBest"
"sklearn.feature_selection.SelectPercentile"
],
"IMPUTER": [
"sklearn.impute.SimpleImputer"
Expand Down Expand Up @@ -68,7 +64,8 @@
"sklearn.linear_model.RidgeCV",
"sklearn.linear_model.TheilSenRegressor",
"xgboost.XGBRegressor",
"lightgbm.LGBMRegressor"
"lightgbm.LGBMRegressor",
"catboost.CatBoostRegressor"
],
"TEXT_ENCODER": [
"sklearn.feature_extraction.text.CountVectorizer",
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ xgboost
lightgbm
numpy
typing-extensions==4.5.0
catboost
ray[rllib]

0 comments on commit 692ae55

Please sign in to comment.