Modify verbose for logging messages

glemaitre · Jul 4, 2016 · 1c26708 · 1c26708
1 parent 912390f
commit 1c26708
Show file tree

Hide file tree

Showing 33 changed files with 133 additions and 278 deletions.
diff --git a/imblearn/base.py b/imblearn/base.py
@@ -29,7 +29,7 @@ class SamplerMixin(six.with_metaclass(ABCMeta, BaseEstimator)):
 
     _estimator_type = "sampler"
 
-    def __init__(self, ratio='auto', verbose=True):
+    def __init__(self, ratio='auto'):
         """Initialize this object and its instance variables.
 
         Parameters
@@ -40,20 +40,13 @@ def __init__(self, ratio='auto', verbose=True):
             of samples in the minority class over the the number of samples
             in the majority class.
 
-        random_state : int or None, optional (default=None)
-            Seed for random number generation.
-
-        verbose : bool, optional (default=True)
-            Boolean to either or not print information about the processing
-
         Returns
         -------
         None
 
         """
 
         self.ratio = ratio
-        self.verbose = verbose
         self.logger = logging.getLogger(__name__)
 
     def fit(self, X, y):
@@ -85,8 +78,7 @@ def fit(self, X, y):
         if hasattr(self, 'ratio'):
             self._validate_ratio()
 
-        if self.verbose:
-            print("Determining classes statistics... ", end="")
+        self.logger.info('Compute classes statistics ...')
 
         # Get all the unique elements in the target array
         uniques = np.unique(y)
@@ -110,9 +102,8 @@ def fit(self, X, y):
         self.min_c_ = min(self.stats_c_, key=self.stats_c_.get)
         self.maj_c_ = max(self.stats_c_, key=self.stats_c_.get)
 
-        if self.verbose:
-            print('{} classes detected: {}'.format(uniques.size,
-                                                   self.stats_c_))
+        self.logger.info('{} classes detected: {}'.format(uniques.size,
+                                                          self.stats_c_))
 
         # Check if the ratio provided at initialisation make sense
         if isinstance(self.ratio, float):

diff --git a/imblearn/combine/smote_enn.py b/imblearn/combine/smote_enn.py
@@ -26,9 +26,6 @@ class SMOTEENN(SamplerMixin):
         If None, the random number generator is the RandomState instance used
         by np.random.
 
-    verbose : bool, optional (default=True)
-        Whether or not to print information about the processing.
-
     k : int, optional (default=5)
         Number of nearest neighbours to used to construct synthetic
         samples.
@@ -88,12 +85,11 @@ class SMOTEENN(SamplerMixin):
 
     """
 
-    def __init__(self, ratio='auto', random_state=None, verbose=True,
+    def __init__(self, ratio='auto', random_state=None,
                  k=5, m=10, out_step=0.5, kind_smote='regular',
                  size_ngh=3, kind_enn='all', n_jobs=-1, **kwargs):
 
-        super(SMOTEENN, self).__init__(ratio=ratio,
-                                       verbose=verbose)
+        super(SMOTEENN, self).__init__(ratio=ratio)
         self.random_state = random_state
         self.k = k
         self.m = m
@@ -104,11 +100,10 @@ def __init__(self, ratio='auto', random_state=None, verbose=True,
         self.n_jobs = n_jobs
         self.kwargs = kwargs
         self.sm = SMOTE(ratio=self.ratio, random_state=self.random_state,
-                        verbose=self.verbose, k=self.k, m=self.m,
-                        out_step=self.out_step, kind=self.kind_smote,
-                        n_jobs=self.n_jobs, **self.kwargs)
+                        k=self.k, m=self.m, out_step=self.out_step,
+                        kind=self.kind_smote, n_jobs=self.n_jobs,
+                        **self.kwargs)
         self.enn = EditedNearestNeighbours(random_state=self.random_state,
-                                           verbose=self.verbose,
                                            size_ngh=self.size_ngh,
                                            kind_sel=self.kind_enn,
                                            n_jobs=self.n_jobs)

diff --git a/imblearn/combine/smote_tomek.py b/imblearn/combine/smote_tomek.py
@@ -27,9 +27,6 @@ class SMOTETomek(SamplerMixin):
         If None, the random number generator is the RandomState instance used
         by np.random.
 
-    verbose : bool, optional (default=True)
-        Whether or not to print information about the processing.
-
     k : int, optional (default=5)
         Number of nearest neighbours to used to construct synthetic
         samples.
@@ -88,10 +85,10 @@ class SMOTETomek(SamplerMixin):
 
     """
 
-    def __init__(self, ratio='auto', random_state=None, verbose=True,
+    def __init__(self, ratio='auto', random_state=None,
                  k=5, m=10, out_step=0.5, kind_smote='regular',
                  n_jobs=-1, **kwargs):
-        super(SMOTETomek, self).__init__(ratio=ratio, verbose=verbose)
+        super(SMOTETomek, self).__init__(ratio=ratio)
         self.random_state = random_state
         self.k = k
         self.m = m
@@ -100,11 +97,10 @@ def __init__(self, ratio='auto', random_state=None, verbose=True,
         self.n_jobs = n_jobs
         self.kwargs = kwargs
         self.sm = SMOTE(ratio=self.ratio, random_state=self.random_state,
-                        verbose=self.verbose, k=self.k, m=self.m,
-                        out_step=self.out_step, kind=self.kind_smote,
-                        n_jobs=self.n_jobs, **self.kwargs)
-        self.tomek = TomekLinks(random_state=self.random_state,
-                                verbose=self.verbose)
+                        k=self.k, m=self.m, out_step=self.out_step,
+                        kind=self.kind_smote, n_jobs=self.n_jobs,
+                        **self.kwargs)
+        self.tomek = TomekLinks(random_state=self.random_state)
 
     def fit(self, X, y):
         """Find the classes statistics before to perform sampling.

diff --git a/imblearn/ensemble/balance_cascade.py b/imblearn/ensemble/balance_cascade.py
@@ -37,9 +37,6 @@ class BalanceCascade(SamplerMixin):
         If None, the random number generator is the RandomState instance used
         by np.random.
 
-    verbose : bool, optional (default=True)
-        Whether or not to print information about the processing.
-
     n_max_subset : int or None, optional (default=None)
         Maximum number of subsets to generate. By default, all data from
         the training will be selected that could lead to a large number of
@@ -87,10 +84,9 @@ class BalanceCascade(SamplerMixin):
 
     """
     def __init__(self, ratio='auto', return_indices=False, random_state=None,
-                 verbose=True, n_max_subset=None, classifier='knn',
-                 bootstrap=True, **kwargs):
-        super(BalanceCascade, self).__init__(ratio=ratio,
-                                             verbose=verbose)
+                 n_max_subset=None, classifier='knn', bootstrap=True,
+                 **kwargs):
+        super(BalanceCascade, self).__init__(ratio=ratio)
         self.return_indices = return_indices
         self.random_state = random_state
         self.classifier = classifier
@@ -242,17 +238,17 @@ def _sample(self, X, y):
             # Find the misclassified index to keep them for the next round
             idx_mis_class = idx_sel_from_maj[np.nonzero(pred_label !=
                                                         N_y[idx_sel_from_maj])]
-            if self.verbose:
-                print("Elements misclassified: {}".format(idx_mis_class))
+            self.logger.debug('Elements misclassified: {}'.format(
+                idx_mis_class))
+
             # Count how many random element will be selected
             if self.ratio == 'auto':
                 num_samples = self.stats_c_[self.min_c_]
             else:
                 num_samples = int(self.stats_c_[self.min_c_] / self.ratio)
             num_samples -= idx_mis_class.size
 
-            if self.verbose:
-                print("Creation of the subset #{}".format(n_subsets))
+            self.logger.debug('Creation of the subset #{}'.format(n_subsets))
 
             # We found a new subset, increase the counter
             n_subsets += 1
@@ -279,13 +275,14 @@ def _sample(self, X, y):
                                                          idx_sel_from_maj),
                                                         axis=0))
 
-                    if self.verbose:
-                        print("Creation of the subset #" + str(n_subsets))
+                    self.logger.debug('Creation of the subset #{}'.format(
+                        n_subsets))
+
+                    # We found a new subset, increase the counter
+                    n_subsets += 1
 
-                        # We found a new subset, increase the counter
-                        n_subsets += 1
-                    if self.verbose:
-                        print('The number of subset achieved their maximum')
+                    self.logger.debug('The number of subset reached is'
+                                      ' maximum.')
 
             # Also check that we will have enough sample to extract at the
             # next round
@@ -307,14 +304,14 @@ def _sample(self, X, y):
                     idx_under.append(np.concatenate((idx_min,
                                                      idx_sel_from_maj),
                                                     axis=0))
-                if self.verbose:
-                    print("Creation of the subset #" + str(n_subsets))
+                self.logger.debug('Creation of the subset #{}'.format(
+                        n_subsets))
 
                 # We found a new subset, increase the counter
                 n_subsets += 1
 
-                if self.verbose:
-                    print('Not enough samples to continue creating subsets')
+                self.logger.debug('Not enough samples to continue creating'
+                                  ' subsets.')
 
         if self.return_indices:
             return (np.array(X_resampled), np.array(y_resampled),

diff --git a/imblearn/ensemble/easy_ensemble.py b/imblearn/ensemble/easy_ensemble.py
@@ -31,9 +31,6 @@ class EasyEnsemble(SamplerMixin):
         If None, the random number generator is the RandomState instance used
         by np.random.
 
-    verbose : bool, optional (default=True)
-        Whether or not to print information about the processing.
-
     replacement : bool, optional (default=False)
         Whether or not to sample randomly with replacement or not.
 
@@ -68,10 +65,9 @@ class EasyEnsemble(SamplerMixin):
 
     """
 
-    def __init__(self, ratio='auto', return_indices=False, verbose=True,
+    def __init__(self, ratio='auto', return_indices=False,
                  random_state=None, replacement=False, n_subsets=10):
-        super(EasyEnsemble, self).__init__(ratio=ratio,
-                                           verbose=verbose)
+        super(EasyEnsemble, self).__init__(ratio=ratio)
         self.return_indices = return_indices
         self.random_state = random_state
         self.replacement = replacement
@@ -108,14 +104,12 @@ def _sample(self, X, y):
             idx_under = []
 
         for s in range(self.n_subsets):
-            if self.verbose:
-                print("Creation of the set #{}".format(s))
+            self.logger.debug('Creation of the set #{}'.format(s))
 
             # Create the object for random under-sampling
             rus = RandomUnderSampler(ratio=self.ratio,
                                      return_indices=self.return_indices,
                                      random_state=self.random_state,
-                                     verbose=self.verbose,
                                      replacement=self.replacement)
             if self.return_indices:
                 sel_x, sel_y, sel_idx = rus.fit_sample(X, y)

diff --git a/imblearn/ensemble/tests/test_balance_cascade.py b/imblearn/ensemble/tests/test_balance_cascade.py
@@ -57,14 +57,12 @@ def test_bc_init():
 
     # Define a ratio
     ratio = 1.
-    verbose = True
-    bc = BalanceCascade(ratio=ratio, random_state=RND_SEED, verbose=verbose)
+    bc = BalanceCascade(ratio=ratio, random_state=RND_SEED)
 
     assert_equal(bc.ratio, ratio)
     assert_equal(bc.bootstrap, True)
     assert_equal(bc.n_max_subset, None)
     assert_equal(bc.random_state, RND_SEED)
-    assert_equal(bc.verbose, verbose)
 
 
 def test_bc_fit_single_class():

diff --git a/imblearn/ensemble/tests/test_easy_ensemble.py b/imblearn/ensemble/tests/test_easy_ensemble.py
@@ -57,14 +57,12 @@ def test_ee_init():
 
     # Define a ratio
     ratio = 1.
-    verbose = True
-    ee = EasyEnsemble(ratio=ratio, random_state=RND_SEED, verbose=verbose)
+    ee = EasyEnsemble(ratio=ratio, random_state=RND_SEED)
 
     assert_equal(ee.ratio, ratio)
     assert_equal(ee.replacement, False)
     assert_equal(ee.n_subsets, 10)
     assert_equal(ee.random_state, RND_SEED)
-    assert_equal(ee.verbose, verbose)
 
 
 def test_ee_fit_single_class():

diff --git a/imblearn/over_sampling/adasyn.py b/imblearn/over_sampling/adasyn.py
@@ -33,9 +33,6 @@ class ADASYN(SamplerMixin):
         If None, the random number generator is the RandomState instance used
         by np.random.
 
-    verbose : bool, optional (default=True)
-        Whether or not to print information about the processing.
-
     k : int, optional (default=5)
         Number of nearest neighbours to used to construct synthetic samples.
 
@@ -75,11 +72,9 @@ class ADASYN(SamplerMixin):
     def __init__(self,
                  ratio='auto',
                  random_state=None,
-                 verbose=True,
                  k=5,
                  n_jobs=1):
-        super(ADASYN, self).__init__(ratio=ratio,
-                                     verbose=verbose)
+        super(ADASYN, self).__init__(ratio=ratio)
         self.random_state = random_state
         self.k = k
         self.n_jobs = n_jobs
@@ -125,8 +120,8 @@ def _sample(self, X, y):
         X_min = X[y == self.min_c_]
 
         # Print if verbose is true
-        if self.verbose:
-            print('Finding the {} nearest neighbours...'.format(self.k))
+        self.logger.debug('Finding the {} nearest neighbours...'.format(
+            self.k))
 
         # Look for k-th nearest neighbours, excluding, of course, the
         # point itself.
@@ -156,7 +151,7 @@ def _sample(self, X, y):
                 X_resampled = np.vstack((X_resampled, x_gen))
                 y_resampled = np.hstack((y_resampled, self.min_c_))
 
-        if self.verbose:
-            print("Over-sampling performed: {}".format(Counter(y_resampled)))
+        self.logger.info('Over-sampling performed: {}'.format(Counter(
+            y_resampled)))
 
         return X_resampled, y_resampled
diff --git a/imblearn/over_sampling/random_over_sampler.py b/imblearn/over_sampling/random_over_sampler.py
@@ -26,9 +26,6 @@ class RandomOverSampler(SamplerMixin):
         of samples in the minority class over the the number of samples
         in the majority class.
 
-    verbose : bool, optional (default=True)
-        Whether or not to print information about the processing.
-
     random_state : int, RandomState instance or None, optional (default=None)
         If int, random_state is the seed used by the random number generator;
         If RandomState instance, random_state is the random number generator;
@@ -57,11 +54,9 @@ class RandomOverSampler(SamplerMixin):
 
     def __init__(self,
                  ratio='auto',
-                 verbose=True,
                  random_state=None):
 
-        super(RandomOverSampler, self).__init__(ratio=ratio,
-                                                verbose=verbose)
+        super(RandomOverSampler, self).__init__(ratio=ratio)
         self.random_state = random_state
 
     def _sample(self, X, y):
@@ -119,7 +114,7 @@ def _sample(self, X, y):
                                           y[y == key],
                                           y[y == key][indx]), axis=0)
 
-        if self.verbose:
-            print("Over-sampling performed: {}".format(Counter(y_resampled)))
+        self.logger.info('Over-sampling performed: {}'.format(Counter(
+            y_resampled)))
 
         return X_resampled, y_resampled