Skip to content

Commit

Permalink
Merge pull request #29 from arthurpaulino/issue-21
Browse files Browse the repository at this point in the history
minor: implementing the on_improvement function
  • Loading branch information
arthurpaulino committed Apr 15, 2019
2 parents 0a4b0cd + 62416ff commit 159443f
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 8 deletions.
11 changes: 8 additions & 3 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,22 @@ without null values. The target column is called `target_class` and contains onl
This example shows the very basics of MiraiML, from importing the main classes
to fitting and predicting.

2. [Implementing parameters rules](parameters_rules.py)
2. [Implementing an `on_improvement` function](on_improvement.py)

If we want the engine to trigger a function when a better set of predictions
is found, we can define it and pass it to the Engine's constructor.

3. [Implementing parameters rules](parameters_rules.py)

When a certain combination of hyperparameters is prohibited, you can use
`parameters_rules` to avoid such conflicts.

3. [Ensembling models](ensembling.py)
4. [Ensembling models](ensembling.py)

This example shows MiraiML's capabilities to find smart weights when ensembling
various models.

4. [Wrapping a LightGBM model](lightgbm_wrapper.py) (requires
5. [Wrapping a LightGBM model](lightgbm_wrapper.py) (requires
[lightgbm][lightgbm_pypi])

MiraiML can work with any model class that implements `fit(X, y)` and
Expand Down
44 changes: 44 additions & 0 deletions examples/on_improvement.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from sklearn.model_selection import train_test_split
from miraiml import SearchSpace, Config, Engine
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import roc_auc_score
from time import sleep
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings('ignore')

# Let's use a single Naive Bayes classifier for this example.
search_spaces = [SearchSpace(model_class=GaussianNB, id='Gaussian NB')]

config = Config(
local_dir = 'miraiml_local_on_improvement',
problem_type = 'classification',
search_spaces = search_spaces,
score_function = roc_auc_score
)

# Simply printing the best score on improvement. This function must receive a
# dictionary, which is the return of the request_status method.
def on_improvement(status):
best_id = status['best_id']
scores = status['scores']
print('Best score:', scores[best_id])

# Instantiating the engine
engine = Engine(config, on_improvement=on_improvement)

# Loading data
data = pd.read_csv('pulsar_stars.csv')
train_data, test_data = train_test_split(data, stratify=data['target_class'],
test_size=0.2, random_state=0)
train_target = train_data.pop('target_class')
engine.load_data(train_data, train_target, test_data)

# Starting the engine
engine.restart()

# Let's watch the engine print the best score for 10 seconds
sleep(10)
engine.interrupt()
2 changes: 1 addition & 1 deletion miraiml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
>>> from miraiml import SearchSpace, Config, Engine
"""

__version__ = '3.0.5.2'
__version__ = '3.1.5.2'

from .main import SearchSpace, Config, Engine

Expand Down
34 changes: 30 additions & 4 deletions miraiml/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,11 @@ class Engine:
:type config: miraiml.Config
:param config: The configurations for the behavior of the engine.
:type on_improvement: function, optional, default=None
:param on_improvement: A function that will be executed everytime the engine
finds an improvement for the best id. It must receive a ``status``
parameter, which is the return of the method :func:`request_status`.
:raises: ``TypeError``
:Example:
Expand All @@ -243,22 +248,31 @@ class Engine:
from miraiml import Engine
engine = Engine(config)
def on_improvement(status):
best_id = status['best_id']
scores = status['scores']
print('Best score:', scores[best_id])
engine = Engine(config, on_improvement=on_improvement)
"""
def __init__(self, config):
self.__validate__(config)
def __init__(self, config, on_improvement=None):
self.__validate__(config, on_improvement)
self.config = config
self.on_improvement = on_improvement
self.__is_running__ = False
self.must_interrupt = False
self.mirai_seeker = None
self.models_dir = config.local_dir + 'models/'
self.train_data = None
self.ensembler = None

def __validate__(self, config):
def __validate__(self, config, on_improvement):
if type(config) != Config:
raise TypeError('miraiml.Engine\'s constructor requires an object'+\
' of miraiml.Config')
if type(on_improvement) != type(lambda: None) and\
type(on_improvement) != type(None):
raise TypeError('on_improvement must be None or a function')

def is_running(self):
"""
Expand Down Expand Up @@ -436,6 +450,9 @@ def __main_loop__(self):
self.best_score = score
self.best_id = ensemble_id

if not self.on_improvement is None:
self.on_improvement(self.request_status())

while not self.must_interrupt:
for search_space in self.config.search_spaces:
if self.must_interrupt:
Expand All @@ -459,6 +476,9 @@ def __main_loop__(self):
self.best_score = score
self.best_id = id

if not self.on_improvement is None:
self.on_improvement(self.request_status())

if will_ensemble:
self.train_predictions_df[ensemble_id],\
self.test_predictions_df[ensemble_id],\
Expand All @@ -467,6 +487,9 @@ def __main_loop__(self):
self.best_score = self.scores[ensemble_id]
self.best_id = ensemble_id

if not self.on_improvement is None:
self.on_improvement(self.request_status())

dump(base_model, self.models_dir + id)

if will_ensemble:
Expand All @@ -476,6 +499,9 @@ def __main_loop__(self):
self.best_score = score
self.best_id = ensemble_id

if not self.on_improvement is None:
self.on_improvement(self.request_status())

self.__is_running__ = False

def request_status(self):
Expand Down

0 comments on commit 159443f

Please sign in to comment.