raising some exceptions due to user mistake

arthurpaulino · Apr 12, 2019 · 52d2524 · 52d2524
1 parent 50b5652
commit 52d2524
Show file tree

Hide file tree

Showing 5 changed files with 46 additions and 21 deletions.
diff --git a/examples/README.md b/examples/README.md
@@ -26,8 +26,9 @@ without null values. The target column is called `target_class` and contains onl
 
    MiraiML can work with any model class that implements `fit(X, y)` and
    `predict(X)` in case of regression problems or `predict_proba(X)` in case of
-   classification problems, as long as these functions' parameters and returned
-   objects are built in the same pattern as those from [scikit-learn]([sklearn]).
+   classification problems, as long as these functions' returned objects are built
+   in the same pattern as those from [scikit-learn]([sklearn]). `X` and `y` will
+   be `numpy.ndarrays`.
 
    I chose this example because, in my experience, the best way to fit data with
    LightGBM is by splitting the data in **n** folds and using the smaller parts

diff --git a/examples/parameters_rules.py b/examples/parameters_rules.py
@@ -16,17 +16,16 @@
 # implements such parameters rules. The function receives a dictionary of parameters
 # and changes what is needed.
 
+# We just need to make sure that those parameters will exist in the set of
+# parameters tested by the engine, otherwise it will attempt to access an invalid
+# key on the dictionary.
+
 def logistic_regression_parameters_rules(parameters):
     if parameters['solver'] in ['newton-cg', 'sag', 'lbfgs']:
         parameters['penalty'] = 'l2'
 
-# We just need to make sure that those parameters will exist in the set of
-# parameters tested by the engine, otherwise it will scream some error messages
-# to let us know that we told it to access invalid keys on the dictionary.
-
-# Now we create the list of search spaces containing only one for some Logistic
-# Regression parameters.
-
+# Now we create the list of search spaces containing only one element to keep it
+# simple.
 search_spaces = [
     SearchSpace(
         model_class = LogisticRegression,

diff --git a/miraiml/__init__.py b/miraiml/__init__.py
@@ -15,4 +15,4 @@
 
 __all__ = ['SearchSpace', 'Config', 'Engine']
 
-__version__ = '0.0.1'
+__version__ = '0.0.2'
diff --git a/miraiml/core.py b/miraiml/core.py
@@ -183,6 +183,7 @@ def seek(self, id):
 
         :rtype: miraiml.core.BaseModel
         :returns: The next base model for exploration.
+        :raises: KeyError
         """
         if self.is_ready(id) and rnd.uniform(0, 1) > self.config.random_exploration_ratio:
             parameters, features = self.naive_search(id)
@@ -194,6 +195,7 @@ def seek(self, id):
             search_space.parameters_rules(parameters)
         except:
             print('Error on parameters rules for the id \'{}\'.'.format(id))
+            raise KeyError
         model_class = search_space.model_class
 
         return BaseModel(model_class, parameters, features)

diff --git a/miraiml/main.py b/miraiml/main.py
@@ -33,8 +33,8 @@ class SearchSpace:
         .. warning::
              Make sure that the parameters accessed in ``parameters_rules`` exist
              in the set of parameters defined on ``parameters_values``, otherwise
-             the engine will print some error messages to let us know that we
-             told it to access invalid keys on the dictionary.
+             the engine will attempt to access an invalid key on the dictionary of
+             parameters and then stop.
 
     :Example:
 
@@ -231,10 +231,15 @@ def shuffle_train_data(self, restart=False):
             after shuffling data or not.
         :type restart: bool
 
+        :raises: RuntimeError
+
         .. note::
             It's a good practice to shuffle the training data periodically to avoid
             overfitting on a certain folding pattern.
         """
+        if self.X_train is None:
+            raise RuntimeError('Update data before trying to shuffle it.')
+
         self.interrupt()
         if not self.X_train is None:
             seed = int(time.time())
@@ -264,7 +269,11 @@ def reconfigure(self, config, restart=False):
     def restart(self):
         """
         Interrupts the engine and starts again from last checkpoint (if any).
+
+        :raises: RuntimeError
         """
+        if self.X_train is None:
+            raise RuntimeError('Update data before restarting the engine.')
         self.interrupt()
         Thread(target=lambda: self.__main_loop__()).start()
 
@@ -296,13 +305,23 @@ def __main_loop__(self):
             if os.path.exists(base_model_path):
                 base_model = load(base_model_path)
             else:
-                base_model = self.mirai_seeker.seek(search_space.id)
+                try:
+                    base_model = self.mirai_seeker.seek(search_space.id)
+                except:
+                    print('Stopping the engine.')
+                    self.__is_running__ = False
+                    return
                 par_dump(base_model, base_model_path)
             self.base_models[id] = base_model
 
-            self.train_predictions_dict[id], self.test_predictions_dict[id],\
-                self.scores[id] = base_model.predict(self.X_train, self.y_train,
-                    self.X_test, self.config)
+            try:
+                self.train_predictions_dict[id], self.test_predictions_dict[id],\
+                    self.scores[id] = base_model.predict(self.X_train, self.y_train,
+                        self.X_test, self.config)
+            except:
+                print('Stopping the engine.')
+                self.__is_running__ = False
+                return
 
             if self.best_score is None or self.scores[id] > self.best_score:
                 self.best_score = self.scores[id]
@@ -338,11 +357,15 @@ def __main_loop__(self):
                     break
                 id = search_space.id
 
-                base_model = self.mirai_seeker.seek(id)
-
-                train_predictions, test_predictions, score = base_model.\
-                    predict(self.X_train, self.y_train, self.X_test,
-                        self.config)
+                try:
+                    base_model = self.mirai_seeker.seek(id)
+                    train_predictions, test_predictions, score = base_model.\
+                        predict(self.X_train, self.y_train, self.X_test,
+                            self.config)
+                except:
+                    print('Stopping the engine.')
+                    self.__is_running__ = False
+                    return
 
                 self.mirai_seeker.register_base_model(id, base_model, score)