MAINT catch warnings and send to logger

automl · Sep 14, 2016 · 5c884ee · 5c884ee
1 parent c76700b
commit 5c884ee
Show file tree

Hide file tree

Showing 4 changed files with 66 additions and 31 deletions.
diff --git a/autosklearn/automl.py b/autosklearn/automl.py
@@ -284,7 +284,6 @@ def _fit(self, datamanager):
             try:
                 os.mkdir(self._backend.get_model_dir())
             except OSError:
-                self._logger.warning("model directory already exists")
                 if not self._shared_mode:
                     raise
 

diff --git a/autosklearn/ensemble_builder.py b/autosklearn/ensemble_builder.py
@@ -6,6 +6,7 @@
 import re
 import sys
 import time
+import warnings
 
 import numpy as np
 import pynisher
@@ -142,7 +143,11 @@ def main(self):
                 used_time = watch.wall_elapsed('ensemble_builder')
                 continue
 
-            watch.start_task('index_run' + str(index_run))
+            with warnings.catch_warnings():
+                warnings.simplefilter('ignore')
+                # TODO restructure time management in the ensemble builder,
+                # what is the time of index_run actually needed for?
+                watch.start_task('index_run' + str(index_run))
             watch.start_task('ensemble_iter_' + str(num_iteration))
 
             # List of num_runs (which are in the filename) which will be included
@@ -191,8 +196,8 @@ def main(self):
 
                 if self.ensemble_nbest is not None:
                     if score <= 0.001:
-                        self.logger.error('Model only predicts at random: ' +
-                                      model_name + ' has score: ' + str(score))
+                        self.logger.info('Model only predicts at random: ' +
+                                         model_name + ' has score: ' + str(score))
                         backup_num_runs.append((automl_seed, num_run))
                     # If we have less models in our ensemble than ensemble_nbest add
                     # the current model if it is better than random
@@ -208,10 +213,11 @@ def main(self):
                         # If the current model is better than the worst model in
                         # our ensemble replace it by the current model
                         if scores_nbest[idx] < score:
-                            self.logger.debug('Worst model in our ensemble: %s with '
-                                          'score %f will be replaced by model %s '
-                                          'with score %f', model_names[idx],
-                                          scores_nbest[idx], model_name, score)
+                            self.logger.info(
+                                'Worst model in our ensemble: %s with score %f '
+                                'will be replaced by model %s with score %f',
+                                model_names[idx], scores_nbest[idx], model_name,
+                                score)
                             # Exclude the old model
                             del scores_nbest[idx]
                             scores_nbest.append(score)
@@ -231,8 +237,9 @@ def main(self):
                     # Load all predictions that are better than random
                     if score <= 0.001:
                         # include_num_runs.append(True)
-                        self.logger.error('Model only predicts at random: ' +
-                                      model_name + ' has score: ' + str(score))
+                        self.logger.info('Model only predicts at random: ' +
+                                         model_name + ' has score: ' +
+                                         str(score))
                         backup_num_runs.append((automl_seed, num_run))
                     else:
                         include_num_runs.append((automl_seed, num_run))

diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py
@@ -2,11 +2,14 @@
 
 import numpy as np
 import six
+import warnings
 
 import autosklearn.automl
 from autosklearn.constants import *
 from autosklearn.util.backend import create
 from sklearn.base import BaseEstimator
+import sklearn.utils
+import scipy.sparse
 
 
 class AutoMLDecorator(object):
@@ -394,7 +397,17 @@ def fit(self, X, y,
             feat_type=None,
             dataset_name=None,
             ):
+        # From sklearn.tree.DecisionTreeClassifier
+        X = sklearn.utils.check_array(X, accept_sparse="csr",
+                                      force_all_finite=False)
+        if scipy.sparse.issparse(X):
+            X.sort_indices()
         y = np.atleast_1d(y)
+        if y.ndim == 2 and y.shape[1] == 1:
+            warnings.warn("A column-vector y was passed when a 1d array was"
+                          " expected. Please change the shape of y to "
+                          "(n_samples,), for example using ravel().",
+                          sklearn.utils.DataConversionWarning, stacklevel=2)
 
         if y.ndim == 1:
             # reshape is necessary to preserve the data contiguity against vs

diff --git a/autosklearn/smbo.py b/autosklearn/smbo.py
@@ -1,6 +1,7 @@
 import os
 import time
 import traceback
+import warnings
 
 import numpy as np
 import pynisher
@@ -254,6 +255,11 @@ def __init__(self, config_space, dataset_name,
                                                            not None else "")
         self.logger = get_logger(logger_name)
 
+    def _send_warnings_to_log(self, message, category, filename, lineno,
+                              file=None):
+        self.logger.debug('%s:%s: %s:%s', filename, lineno, category.__name__,
+                          message)
+
     def reset_data_manager(self, max_mem=None):
         if max_mem is None:
             max_mem = self.data_memory_limit
@@ -461,15 +467,18 @@ def collect_metalearning_suggestions(self, meta_base):
         return metalearning_configurations
 
     def _calculate_metafeatures(self):
-        meta_features = _calculate_metafeatures(
-            data_feat_type=self.datamanager.feat_type,
-            data_info_task=self.datamanager.info['task'],
-            x_train=self.datamanager.data['X_train'],
-            y_train=self.datamanager.data['Y_train'],
-            basename=self.dataset_name,
-            watcher=self.watcher,
-            logger=self.logger)
-        return meta_features
+        with warnings.catch_warnings():
+            warnings.showwarning = self._send_warnings_to_log
+
+            meta_features = _calculate_metafeatures(
+                data_feat_type=self.datamanager.feat_type,
+                data_info_task=self.datamanager.info['task'],
+                x_train=self.datamanager.data['X_train'],
+                y_train=self.datamanager.data['Y_train'],
+                basename=self.dataset_name,
+                watcher=self.watcher,
+                logger=self.logger)
+            return meta_features
 
     def _calculate_metafeatures_with_limits(self, time_limit):
         res = None
@@ -487,14 +496,17 @@ def _calculate_metafeatures_with_limits(self, time_limit):
         return res
 
     def _calculate_metafeatures_encoded(self):
-        meta_features_encoded = _calculate_metafeatures_encoded(
-            self.dataset_name,
-            self.datamanager.data['X_train'],
-            self.datamanager.data['Y_train'],
-            self.watcher,
-            self.datamanager.info['task'],
-            self.logger)
-        return meta_features_encoded
+        with warnings.catch_warnings():
+            warnings.showwarning = self._send_warnings_to_log
+
+            meta_features_encoded = _calculate_metafeatures_encoded(
+                self.dataset_name,
+                self.datamanager.data['X_train'],
+                self.datamanager.data['Y_train'],
+                self.watcher,
+                self.datamanager.info['task'],
+                self.logger)
+            return meta_features_encoded
 
     def _calculate_metafeatures_encoded_with_limits(self, time_limit):
         res = None
@@ -664,7 +676,9 @@ def run_smbo(self, max_iters=1000):
                                 metafeature_calculation_time_limit)
             meta_features_encoded = None
         else:
-            self.datamanager.perform1HotEncoding()
+            with warnings.catch_warnings():
+                warnings.showwarning = self._send_warnings_to_log
+                self.datamanager.perform1HotEncoding()
             meta_features_encoded = \
                 self._calculate_metafeatures_encoded_with_limits(
                     metafeature_calculation_time_limit)
@@ -686,8 +700,10 @@ def run_smbo(self, max_iters=1000):
                 features=list(meta_features.keys()))
             all_metafeatures.fillna(all_metafeatures.mean(), inplace=True)
 
-            metalearning_configurations = self.collect_metalearning_suggestions(
-                meta_base)
+            with warnings.catch_warnings():
+                warnings.showwarning = self._send_warnings_to_log
+                metalearning_configurations = self.collect_metalearning_suggestions(
+                    meta_base)
             if metalearning_configurations is None:
                 metalearning_configurations = []
             self.reset_data_manager()
@@ -941,7 +957,7 @@ def run_smbo(self, max_iters=1000):
                                         logger=self.logger)
                 (duration, result, _, additional_run_info, status) = info
                 run_history.add(config=next_config, cost=result,
-                                time=duration , status=status,
+                                time=duration, status=status,
                                 instance_id=instance_id, seed=seed)
                 run_history.update_cost(next_config, result)