changes made for grad prix

VIDA-NYU · May 2, 2024 · 692ae55 · 692ae55
1 parent 90c71b0
commit 692ae55
Show file tree

Hide file tree

Showing 7 changed files with 24 additions and 16 deletions.
diff --git a/alpha_automl/automl_manager.py b/alpha_automl/automl_manager.py
@@ -52,8 +52,8 @@ def search_pipelines(self, X, y, scoring, splitting_strategy, automl_hyperparams
     def _search_pipelines(self, automl_hyperparams):
         search_start_time = time.time()
         automl_hyperparams = self.check_automl_hyperparams(automl_hyperparams)
-        metadata = profile_data(self.X)
         X, y, is_sample = sample_dataset(self.X, self.y, SAMPLE_SIZE, self.task)
+        metadata = profile_data(X)
         internal_splitting_strategy = make_splitter(SPLITTING_STRATEGY)
         self.found_pipelines = 0
         need_rescoring = True

diff --git a/alpha_automl/pipeline_search/agent_environment.py b/alpha_automl/pipeline_search/agent_environment.py
@@ -30,7 +30,7 @@ def __init__(self, config: EnvContext):
         self.observation_space = Dict(
             {
                 "board": Box(
-                    0, 85, shape=(self.game.p + self.game.m,), dtype=np.uint8
+                    0, 90, shape=(self.game.p + self.game.m,), dtype=np.uint8
                 ),  # Ray env board contains pipeline and metadata
             }
         )
@@ -94,7 +94,8 @@ def step(self, action):
         if game_end == 1:  # pipeline score over threshold
             try:
                 if self.game.problem == "REGRESSION":
-                    reward = 10 + (100 / self.game.getEvaluation(self.board))
+                    # reward = 10 + (100 / self.game.getEvaluation(self.board))
+                    reward = 10 + (self.game.getEvaluation(self.board)) ** 3 * 100
                 else:
                     reward = 10 + (self.game.getEvaluation(self.board)) ** 2 * 100
             except Exception as e:

diff --git a/alpha_automl/pipeline_search/agent_lab.py b/alpha_automl/pipeline_search/agent_lab.py
@@ -18,11 +18,11 @@ def pipeline_search_rllib(game, time_bound, checkpoint_load_folder, checkpoint_s
     """
     Search for pipelines using Rllib
     """
-    ray.init(local_mode=True, num_cpus=8, logging_level=logging.CRITICAL, log_to_driver=False)
+    ray.init(local_mode=True, logging_level=logging.CRITICAL)
     num_cpus = int(ray.available_resources()["CPU"])
 
     # load checkpoint or create a new one
-    algo = load_rllib_checkpoint(game, checkpoint_load_folder, num_rollout_workers=7)
+    algo = load_rllib_checkpoint(game, checkpoint_load_folder, num_rollout_workers=1)
     logger.debug("Create Algo object done")
 
     # train model
@@ -50,7 +50,7 @@ def load_rllib_checkpoint(game, checkpoint_load_folder, num_rollout_workers):
             clip_param=0.3,
             kl_coeff=0.3,
             entropy_coeff=0.05,
-            train_batch_size=10000,
+            train_batch_size=5000,
         )
     )
     config.lr = 1e-5

diff --git a/alpha_automl/pipeline_synthesis/pipeline_builder.py b/alpha_automl/pipeline_synthesis/pipeline_builder.py
@@ -13,8 +13,17 @@
 
 
 EXTRA_PARAMS = {
-    "lightgbm.LGBMClassifier": dict(verbose=-1),
-    "lightgbm.LGBMRegressor": dict(verbose=-1),
+    "lightgbm.LGBMClassifier": {'verbose': -1},
+    "lightgbm.LGBMRegressor": {'verbose': -1},
+    "catboost.CatBoostRegressor": {
+        'depth': 8,
+        'grow_policy': 'Depthwise',
+        'l2_leaf_reg': 2.7997999596449104,
+        'learning_rate': 0.031375015734637225,
+        'max_ctr_complexity': 2,
+        'one_hot_max_size': 3,
+        'logging_level': 'Silent'
+    },
 }
 
 

diff --git a/alpha_automl/resource/base_grammar.bnf b/alpha_automl/resource/base_grammar.bnf
@@ -1,6 +1,6 @@
 S -> CLASSIFICATION_TASK | REGRESSION_TASK | CLUSTERING_TASK | TIME_SERIES_FORECAST_TASK | SEMISUPERVISED_TASK
 CLASSIFICATION_TASK -> IMPUTER ENCODERS FEATURE_SCALER FEATURE_SELECTOR CLASSIFIER ENSEMBLER
-REGRESSION_TASK -> IMPUTER ENCODERS FEATURE_SCALER FEATURE_SELECTOR REGRESSOR
+REGRESSION_TASK -> IMPUTER ENCODERS FEATURE_SELECTOR FEATURE_SCALER REGRESSOR
 CLUSTERING_TASK -> IMPUTER ENCODERS FEATURE_SCALER FEATURE_SELECTOR CLUSTERER
 TIME_SERIES_FORECAST_TASK -> IMPUTER TIME_SERIES_FORECASTER | REGRESSION_TASK
 SEMISUPERVISED_TASK -> IMPUTER ENCODERS FEATURE_SCALER SEMISUPERVISED_CLASSIFIER

diff --git a/alpha_automl/resource/primitives_hierarchy.json b/alpha_automl/resource/primitives_hierarchy.json
@@ -32,14 +32,10 @@
         "alpha_automl.builtin_primitives.datetime_encoder.DummyEncoder"
     ],
     "FEATURE_SCALER": [
-        "sklearn.preprocessing.MaxAbsScaler",
-        "sklearn.preprocessing.RobustScaler",
-        "sklearn.preprocessing.StandardScaler"
+        "sklearn.preprocessing.RobustScaler"
     ],
     "FEATURE_SELECTOR": [
-        "sklearn.feature_selection.GenericUnivariateSelect",
-        "sklearn.feature_selection.SelectPercentile",
-        "sklearn.feature_selection.SelectKBest"
+        "sklearn.feature_selection.SelectPercentile"
     ],
     "IMPUTER": [
         "sklearn.impute.SimpleImputer"
@@ -68,7 +64,8 @@
         "sklearn.linear_model.RidgeCV",
         "sklearn.linear_model.TheilSenRegressor",
         "xgboost.XGBRegressor",
-        "lightgbm.LGBMRegressor"
+        "lightgbm.LGBMRegressor",
+        "catboost.CatBoostRegressor"
     ],
     "TEXT_ENCODER": [
         "sklearn.feature_extraction.text.CountVectorizer",

diff --git a/requirements.txt b/requirements.txt
@@ -10,4 +10,5 @@ xgboost
 lightgbm
 numpy
 typing-extensions==4.5.0
+catboost
 ray[rllib]