Skip to content

Commit

Permalink
MAINT catch warnings and send to logger
Browse files Browse the repository at this point in the history
  • Loading branch information
mfeurer committed Sep 14, 2016
1 parent c76700b commit 5c884ee
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 31 deletions.
1 change: 0 additions & 1 deletion autosklearn/automl.py
Expand Up @@ -284,7 +284,6 @@ def _fit(self, datamanager):
try:
os.mkdir(self._backend.get_model_dir())
except OSError:
self._logger.warning("model directory already exists")
if not self._shared_mode:
raise

Expand Down
25 changes: 16 additions & 9 deletions autosklearn/ensemble_builder.py
Expand Up @@ -6,6 +6,7 @@
import re
import sys
import time
import warnings

import numpy as np
import pynisher
Expand Down Expand Up @@ -142,7 +143,11 @@ def main(self):
used_time = watch.wall_elapsed('ensemble_builder')
continue

watch.start_task('index_run' + str(index_run))
with warnings.catch_warnings():
warnings.simplefilter('ignore')
# TODO restructure time management in the ensemble builder,
# what is the time of index_run actually needed for?
watch.start_task('index_run' + str(index_run))
watch.start_task('ensemble_iter_' + str(num_iteration))

# List of num_runs (which are in the filename) which will be included
Expand Down Expand Up @@ -191,8 +196,8 @@ def main(self):

if self.ensemble_nbest is not None:
if score <= 0.001:
self.logger.error('Model only predicts at random: ' +
model_name + ' has score: ' + str(score))
self.logger.info('Model only predicts at random: ' +
model_name + ' has score: ' + str(score))
backup_num_runs.append((automl_seed, num_run))
# If we have less models in our ensemble than ensemble_nbest add
# the current model if it is better than random
Expand All @@ -208,10 +213,11 @@ def main(self):
# If the current model is better than the worst model in
# our ensemble replace it by the current model
if scores_nbest[idx] < score:
self.logger.debug('Worst model in our ensemble: %s with '
'score %f will be replaced by model %s '
'with score %f', model_names[idx],
scores_nbest[idx], model_name, score)
self.logger.info(
'Worst model in our ensemble: %s with score %f '
'will be replaced by model %s with score %f',
model_names[idx], scores_nbest[idx], model_name,
score)
# Exclude the old model
del scores_nbest[idx]
scores_nbest.append(score)
Expand All @@ -231,8 +237,9 @@ def main(self):
# Load all predictions that are better than random
if score <= 0.001:
# include_num_runs.append(True)
self.logger.error('Model only predicts at random: ' +
model_name + ' has score: ' + str(score))
self.logger.info('Model only predicts at random: ' +
model_name + ' has score: ' +
str(score))
backup_num_runs.append((automl_seed, num_run))
else:
include_num_runs.append((automl_seed, num_run))
Expand Down
13 changes: 13 additions & 0 deletions autosklearn/estimators.py
Expand Up @@ -2,11 +2,14 @@

import numpy as np
import six
import warnings

import autosklearn.automl
from autosklearn.constants import *
from autosklearn.util.backend import create
from sklearn.base import BaseEstimator
import sklearn.utils
import scipy.sparse


class AutoMLDecorator(object):
Expand Down Expand Up @@ -394,7 +397,17 @@ def fit(self, X, y,
feat_type=None,
dataset_name=None,
):
# From sklearn.tree.DecisionTreeClassifier
X = sklearn.utils.check_array(X, accept_sparse="csr",
force_all_finite=False)
if scipy.sparse.issparse(X):
X.sort_indices()
y = np.atleast_1d(y)
if y.ndim == 2 and y.shape[1] == 1:
warnings.warn("A column-vector y was passed when a 1d array was"
" expected. Please change the shape of y to "
"(n_samples,), for example using ravel().",
sklearn.utils.DataConversionWarning, stacklevel=2)

if y.ndim == 1:
# reshape is necessary to preserve the data contiguity against vs
Expand Down
58 changes: 37 additions & 21 deletions autosklearn/smbo.py
@@ -1,6 +1,7 @@
import os
import time
import traceback
import warnings

import numpy as np
import pynisher
Expand Down Expand Up @@ -254,6 +255,11 @@ def __init__(self, config_space, dataset_name,
not None else "")
self.logger = get_logger(logger_name)

def _send_warnings_to_log(self, message, category, filename, lineno,
file=None):
self.logger.debug('%s:%s: %s:%s', filename, lineno, category.__name__,
message)

def reset_data_manager(self, max_mem=None):
if max_mem is None:
max_mem = self.data_memory_limit
Expand Down Expand Up @@ -461,15 +467,18 @@ def collect_metalearning_suggestions(self, meta_base):
return metalearning_configurations

def _calculate_metafeatures(self):
meta_features = _calculate_metafeatures(
data_feat_type=self.datamanager.feat_type,
data_info_task=self.datamanager.info['task'],
x_train=self.datamanager.data['X_train'],
y_train=self.datamanager.data['Y_train'],
basename=self.dataset_name,
watcher=self.watcher,
logger=self.logger)
return meta_features
with warnings.catch_warnings():
warnings.showwarning = self._send_warnings_to_log

meta_features = _calculate_metafeatures(
data_feat_type=self.datamanager.feat_type,
data_info_task=self.datamanager.info['task'],
x_train=self.datamanager.data['X_train'],
y_train=self.datamanager.data['Y_train'],
basename=self.dataset_name,
watcher=self.watcher,
logger=self.logger)
return meta_features

def _calculate_metafeatures_with_limits(self, time_limit):
res = None
Expand All @@ -487,14 +496,17 @@ def _calculate_metafeatures_with_limits(self, time_limit):
return res

def _calculate_metafeatures_encoded(self):
meta_features_encoded = _calculate_metafeatures_encoded(
self.dataset_name,
self.datamanager.data['X_train'],
self.datamanager.data['Y_train'],
self.watcher,
self.datamanager.info['task'],
self.logger)
return meta_features_encoded
with warnings.catch_warnings():
warnings.showwarning = self._send_warnings_to_log

meta_features_encoded = _calculate_metafeatures_encoded(
self.dataset_name,
self.datamanager.data['X_train'],
self.datamanager.data['Y_train'],
self.watcher,
self.datamanager.info['task'],
self.logger)
return meta_features_encoded

def _calculate_metafeatures_encoded_with_limits(self, time_limit):
res = None
Expand Down Expand Up @@ -664,7 +676,9 @@ def run_smbo(self, max_iters=1000):
metafeature_calculation_time_limit)
meta_features_encoded = None
else:
self.datamanager.perform1HotEncoding()
with warnings.catch_warnings():
warnings.showwarning = self._send_warnings_to_log
self.datamanager.perform1HotEncoding()
meta_features_encoded = \
self._calculate_metafeatures_encoded_with_limits(
metafeature_calculation_time_limit)
Expand All @@ -686,8 +700,10 @@ def run_smbo(self, max_iters=1000):
features=list(meta_features.keys()))
all_metafeatures.fillna(all_metafeatures.mean(), inplace=True)

metalearning_configurations = self.collect_metalearning_suggestions(
meta_base)
with warnings.catch_warnings():
warnings.showwarning = self._send_warnings_to_log
metalearning_configurations = self.collect_metalearning_suggestions(
meta_base)
if metalearning_configurations is None:
metalearning_configurations = []
self.reset_data_manager()
Expand Down Expand Up @@ -941,7 +957,7 @@ def run_smbo(self, max_iters=1000):
logger=self.logger)
(duration, result, _, additional_run_info, status) = info
run_history.add(config=next_config, cost=result,
time=duration , status=status,
time=duration, status=status,
instance_id=instance_id, seed=seed)
run_history.update_cost(next_config, result)

Expand Down

0 comments on commit 5c884ee

Please sign in to comment.