Skip to content

Commit

Permalink
raising some exceptions due to user mistake
Browse files Browse the repository at this point in the history
  • Loading branch information
arthurpaulino committed Apr 12, 2019
1 parent 50b5652 commit 52d2524
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 21 deletions.
5 changes: 3 additions & 2 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@ without null values. The target column is called `target_class` and contains onl

MiraiML can work with any model class that implements `fit(X, y)` and
`predict(X)` in case of regression problems or `predict_proba(X)` in case of
classification problems, as long as these functions' parameters and returned
objects are built in the same pattern as those from [scikit-learn]([sklearn]).
classification problems, as long as these functions' returned objects are built
in the same pattern as those from [scikit-learn]([sklearn]). `X` and `y` will
be `numpy.ndarrays`.

I chose this example because, in my experience, the best way to fit data with
LightGBM is by splitting the data in **n** folds and using the smaller parts
Expand Down
13 changes: 6 additions & 7 deletions examples/parameters_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,16 @@
# implements such parameters rules. The function receives a dictionary of parameters
# and changes what is needed.

# We just need to make sure that those parameters will exist in the set of
# parameters tested by the engine, otherwise it will attempt to access an invalid
# key on the dictionary.

def logistic_regression_parameters_rules(parameters):
if parameters['solver'] in ['newton-cg', 'sag', 'lbfgs']:
parameters['penalty'] = 'l2'

# We just need to make sure that those parameters will exist in the set of
# parameters tested by the engine, otherwise it will scream some error messages
# to let us know that we told it to access invalid keys on the dictionary.

# Now we create the list of search spaces containing only one for some Logistic
# Regression parameters.

# Now we create the list of search spaces containing only one element to keep it
# simple.
search_spaces = [
SearchSpace(
model_class = LogisticRegression,
Expand Down
2 changes: 1 addition & 1 deletion miraiml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@

__all__ = ['SearchSpace', 'Config', 'Engine']

__version__ = '0.0.1'
__version__ = '0.0.2'
2 changes: 2 additions & 0 deletions miraiml/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ def seek(self, id):
:rtype: miraiml.core.BaseModel
:returns: The next base model for exploration.
:raises: KeyError
"""
if self.is_ready(id) and rnd.uniform(0, 1) > self.config.random_exploration_ratio:
parameters, features = self.naive_search(id)
Expand All @@ -194,6 +195,7 @@ def seek(self, id):
search_space.parameters_rules(parameters)
except:
print('Error on parameters rules for the id \'{}\'.'.format(id))
raise KeyError
model_class = search_space.model_class

return BaseModel(model_class, parameters, features)
Expand Down
45 changes: 34 additions & 11 deletions miraiml/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ class SearchSpace:
.. warning::
Make sure that the parameters accessed in ``parameters_rules`` exist
in the set of parameters defined on ``parameters_values``, otherwise
the engine will print some error messages to let us know that we
told it to access invalid keys on the dictionary.
the engine will attempt to access an invalid key on the dictionary of
parameters and then stop.
:Example:
Expand Down Expand Up @@ -231,10 +231,15 @@ def shuffle_train_data(self, restart=False):
after shuffling data or not.
:type restart: bool
:raises: RuntimeError
.. note::
It's a good practice to shuffle the training data periodically to avoid
overfitting on a certain folding pattern.
"""
if self.X_train is None:
raise RuntimeError('Update data before trying to shuffle it.')

self.interrupt()
if not self.X_train is None:
seed = int(time.time())
Expand Down Expand Up @@ -264,7 +269,11 @@ def reconfigure(self, config, restart=False):
def restart(self):
"""
Interrupts the engine and starts again from last checkpoint (if any).
:raises: RuntimeError
"""
if self.X_train is None:
raise RuntimeError('Update data before restarting the engine.')
self.interrupt()
Thread(target=lambda: self.__main_loop__()).start()

Expand Down Expand Up @@ -296,13 +305,23 @@ def __main_loop__(self):
if os.path.exists(base_model_path):
base_model = load(base_model_path)
else:
base_model = self.mirai_seeker.seek(search_space.id)
try:
base_model = self.mirai_seeker.seek(search_space.id)
except:
print('Stopping the engine.')
self.__is_running__ = False
return
par_dump(base_model, base_model_path)
self.base_models[id] = base_model

self.train_predictions_dict[id], self.test_predictions_dict[id],\
self.scores[id] = base_model.predict(self.X_train, self.y_train,
self.X_test, self.config)
try:
self.train_predictions_dict[id], self.test_predictions_dict[id],\
self.scores[id] = base_model.predict(self.X_train, self.y_train,
self.X_test, self.config)
except:
print('Stopping the engine.')
self.__is_running__ = False
return

if self.best_score is None or self.scores[id] > self.best_score:
self.best_score = self.scores[id]
Expand Down Expand Up @@ -338,11 +357,15 @@ def __main_loop__(self):
break
id = search_space.id

base_model = self.mirai_seeker.seek(id)

train_predictions, test_predictions, score = base_model.\
predict(self.X_train, self.y_train, self.X_test,
self.config)
try:
base_model = self.mirai_seeker.seek(id)
train_predictions, test_predictions, score = base_model.\
predict(self.X_train, self.y_train, self.X_test,
self.config)
except:
print('Stopping the engine.')
self.__is_running__ = False
return

self.mirai_seeker.register_base_model(id, base_model, score)

Expand Down

0 comments on commit 52d2524

Please sign in to comment.