diff --git a/imblearn/base.py b/imblearn/base.py index 929c76968..1d65599c3 100644 --- a/imblearn/base.py +++ b/imblearn/base.py @@ -29,7 +29,7 @@ class SamplerMixin(six.with_metaclass(ABCMeta, BaseEstimator)): _estimator_type = "sampler" - def __init__(self, ratio='auto', verbose=True): + def __init__(self, ratio='auto'): """Initialize this object and its instance variables. Parameters @@ -40,12 +40,6 @@ def __init__(self, ratio='auto', verbose=True): of samples in the minority class over the the number of samples in the majority class. - random_state : int or None, optional (default=None) - Seed for random number generation. - - verbose : bool, optional (default=True) - Boolean to either or not print information about the processing - Returns ------- None @@ -53,7 +47,6 @@ def __init__(self, ratio='auto', verbose=True): """ self.ratio = ratio - self.verbose = verbose self.logger = logging.getLogger(__name__) def fit(self, X, y): @@ -85,8 +78,7 @@ def fit(self, X, y): if hasattr(self, 'ratio'): self._validate_ratio() - if self.verbose: - print("Determining classes statistics... ", end="") + self.logger.info('Compute classes statistics ...') # Get all the unique elements in the target array uniques = np.unique(y) @@ -110,9 +102,8 @@ def fit(self, X, y): self.min_c_ = min(self.stats_c_, key=self.stats_c_.get) self.maj_c_ = max(self.stats_c_, key=self.stats_c_.get) - if self.verbose: - print('{} classes detected: {}'.format(uniques.size, - self.stats_c_)) + self.logger.info('{} classes detected: {}'.format(uniques.size, + self.stats_c_)) # Check if the ratio provided at initialisation make sense if isinstance(self.ratio, float): diff --git a/imblearn/combine/smote_enn.py b/imblearn/combine/smote_enn.py index 95c0d83a3..185531e07 100644 --- a/imblearn/combine/smote_enn.py +++ b/imblearn/combine/smote_enn.py @@ -26,9 +26,6 @@ class SMOTEENN(SamplerMixin): If None, the random number generator is the RandomState instance used by np.random. - verbose : bool, optional (default=True) - Whether or not to print information about the processing. - k : int, optional (default=5) Number of nearest neighbours to used to construct synthetic samples. @@ -88,12 +85,11 @@ class SMOTEENN(SamplerMixin): """ - def __init__(self, ratio='auto', random_state=None, verbose=True, + def __init__(self, ratio='auto', random_state=None, k=5, m=10, out_step=0.5, kind_smote='regular', size_ngh=3, kind_enn='all', n_jobs=-1, **kwargs): - super(SMOTEENN, self).__init__(ratio=ratio, - verbose=verbose) + super(SMOTEENN, self).__init__(ratio=ratio) self.random_state = random_state self.k = k self.m = m @@ -104,11 +100,10 @@ def __init__(self, ratio='auto', random_state=None, verbose=True, self.n_jobs = n_jobs self.kwargs = kwargs self.sm = SMOTE(ratio=self.ratio, random_state=self.random_state, - verbose=self.verbose, k=self.k, m=self.m, - out_step=self.out_step, kind=self.kind_smote, - n_jobs=self.n_jobs, **self.kwargs) + k=self.k, m=self.m, out_step=self.out_step, + kind=self.kind_smote, n_jobs=self.n_jobs, + **self.kwargs) self.enn = EditedNearestNeighbours(random_state=self.random_state, - verbose=self.verbose, size_ngh=self.size_ngh, kind_sel=self.kind_enn, n_jobs=self.n_jobs) diff --git a/imblearn/combine/smote_tomek.py b/imblearn/combine/smote_tomek.py index 023871374..b311023dd 100644 --- a/imblearn/combine/smote_tomek.py +++ b/imblearn/combine/smote_tomek.py @@ -27,9 +27,6 @@ class SMOTETomek(SamplerMixin): If None, the random number generator is the RandomState instance used by np.random. - verbose : bool, optional (default=True) - Whether or not to print information about the processing. - k : int, optional (default=5) Number of nearest neighbours to used to construct synthetic samples. @@ -88,10 +85,10 @@ class SMOTETomek(SamplerMixin): """ - def __init__(self, ratio='auto', random_state=None, verbose=True, + def __init__(self, ratio='auto', random_state=None, k=5, m=10, out_step=0.5, kind_smote='regular', n_jobs=-1, **kwargs): - super(SMOTETomek, self).__init__(ratio=ratio, verbose=verbose) + super(SMOTETomek, self).__init__(ratio=ratio) self.random_state = random_state self.k = k self.m = m @@ -100,11 +97,10 @@ def __init__(self, ratio='auto', random_state=None, verbose=True, self.n_jobs = n_jobs self.kwargs = kwargs self.sm = SMOTE(ratio=self.ratio, random_state=self.random_state, - verbose=self.verbose, k=self.k, m=self.m, - out_step=self.out_step, kind=self.kind_smote, - n_jobs=self.n_jobs, **self.kwargs) - self.tomek = TomekLinks(random_state=self.random_state, - verbose=self.verbose) + k=self.k, m=self.m, out_step=self.out_step, + kind=self.kind_smote, n_jobs=self.n_jobs, + **self.kwargs) + self.tomek = TomekLinks(random_state=self.random_state) def fit(self, X, y): """Find the classes statistics before to perform sampling. diff --git a/imblearn/ensemble/balance_cascade.py b/imblearn/ensemble/balance_cascade.py index 8dfa5789d..dc6a71847 100644 --- a/imblearn/ensemble/balance_cascade.py +++ b/imblearn/ensemble/balance_cascade.py @@ -37,9 +37,6 @@ class BalanceCascade(SamplerMixin): If None, the random number generator is the RandomState instance used by np.random. - verbose : bool, optional (default=True) - Whether or not to print information about the processing. - n_max_subset : int or None, optional (default=None) Maximum number of subsets to generate. By default, all data from the training will be selected that could lead to a large number of @@ -87,10 +84,9 @@ class BalanceCascade(SamplerMixin): """ def __init__(self, ratio='auto', return_indices=False, random_state=None, - verbose=True, n_max_subset=None, classifier='knn', - bootstrap=True, **kwargs): - super(BalanceCascade, self).__init__(ratio=ratio, - verbose=verbose) + n_max_subset=None, classifier='knn', bootstrap=True, + **kwargs): + super(BalanceCascade, self).__init__(ratio=ratio) self.return_indices = return_indices self.random_state = random_state self.classifier = classifier @@ -242,8 +238,9 @@ def _sample(self, X, y): # Find the misclassified index to keep them for the next round idx_mis_class = idx_sel_from_maj[np.nonzero(pred_label != N_y[idx_sel_from_maj])] - if self.verbose: - print("Elements misclassified: {}".format(idx_mis_class)) + self.logger.debug('Elements misclassified: {}'.format( + idx_mis_class)) + # Count how many random element will be selected if self.ratio == 'auto': num_samples = self.stats_c_[self.min_c_] @@ -251,8 +248,7 @@ def _sample(self, X, y): num_samples = int(self.stats_c_[self.min_c_] / self.ratio) num_samples -= idx_mis_class.size - if self.verbose: - print("Creation of the subset #{}".format(n_subsets)) + self.logger.debug('Creation of the subset #{}'.format(n_subsets)) # We found a new subset, increase the counter n_subsets += 1 @@ -279,13 +275,14 @@ def _sample(self, X, y): idx_sel_from_maj), axis=0)) - if self.verbose: - print("Creation of the subset #" + str(n_subsets)) + self.logger.debug('Creation of the subset #{}'.format( + n_subsets)) + + # We found a new subset, increase the counter + n_subsets += 1 - # We found a new subset, increase the counter - n_subsets += 1 - if self.verbose: - print('The number of subset achieved their maximum') + self.logger.debug('The number of subset reached is' + ' maximum.') # Also check that we will have enough sample to extract at the # next round @@ -307,14 +304,14 @@ def _sample(self, X, y): idx_under.append(np.concatenate((idx_min, idx_sel_from_maj), axis=0)) - if self.verbose: - print("Creation of the subset #" + str(n_subsets)) + self.logger.debug('Creation of the subset #{}'.format( + n_subsets)) # We found a new subset, increase the counter n_subsets += 1 - if self.verbose: - print('Not enough samples to continue creating subsets') + self.logger.debug('Not enough samples to continue creating' + ' subsets.') if self.return_indices: return (np.array(X_resampled), np.array(y_resampled), diff --git a/imblearn/ensemble/easy_ensemble.py b/imblearn/ensemble/easy_ensemble.py index a60da2bd3..69a2b2802 100644 --- a/imblearn/ensemble/easy_ensemble.py +++ b/imblearn/ensemble/easy_ensemble.py @@ -31,9 +31,6 @@ class EasyEnsemble(SamplerMixin): If None, the random number generator is the RandomState instance used by np.random. - verbose : bool, optional (default=True) - Whether or not to print information about the processing. - replacement : bool, optional (default=False) Whether or not to sample randomly with replacement or not. @@ -68,10 +65,9 @@ class EasyEnsemble(SamplerMixin): """ - def __init__(self, ratio='auto', return_indices=False, verbose=True, + def __init__(self, ratio='auto', return_indices=False, random_state=None, replacement=False, n_subsets=10): - super(EasyEnsemble, self).__init__(ratio=ratio, - verbose=verbose) + super(EasyEnsemble, self).__init__(ratio=ratio) self.return_indices = return_indices self.random_state = random_state self.replacement = replacement @@ -108,14 +104,12 @@ def _sample(self, X, y): idx_under = [] for s in range(self.n_subsets): - if self.verbose: - print("Creation of the set #{}".format(s)) + self.logger.debug('Creation of the set #{}'.format(s)) # Create the object for random under-sampling rus = RandomUnderSampler(ratio=self.ratio, return_indices=self.return_indices, random_state=self.random_state, - verbose=self.verbose, replacement=self.replacement) if self.return_indices: sel_x, sel_y, sel_idx = rus.fit_sample(X, y) diff --git a/imblearn/ensemble/tests/test_balance_cascade.py b/imblearn/ensemble/tests/test_balance_cascade.py index faa31ae8d..c416ed3eb 100644 --- a/imblearn/ensemble/tests/test_balance_cascade.py +++ b/imblearn/ensemble/tests/test_balance_cascade.py @@ -57,14 +57,12 @@ def test_bc_init(): # Define a ratio ratio = 1. - verbose = True - bc = BalanceCascade(ratio=ratio, random_state=RND_SEED, verbose=verbose) + bc = BalanceCascade(ratio=ratio, random_state=RND_SEED) assert_equal(bc.ratio, ratio) assert_equal(bc.bootstrap, True) assert_equal(bc.n_max_subset, None) assert_equal(bc.random_state, RND_SEED) - assert_equal(bc.verbose, verbose) def test_bc_fit_single_class(): diff --git a/imblearn/ensemble/tests/test_easy_ensemble.py b/imblearn/ensemble/tests/test_easy_ensemble.py index e02e10517..763db4e65 100644 --- a/imblearn/ensemble/tests/test_easy_ensemble.py +++ b/imblearn/ensemble/tests/test_easy_ensemble.py @@ -57,14 +57,12 @@ def test_ee_init(): # Define a ratio ratio = 1. - verbose = True - ee = EasyEnsemble(ratio=ratio, random_state=RND_SEED, verbose=verbose) + ee = EasyEnsemble(ratio=ratio, random_state=RND_SEED) assert_equal(ee.ratio, ratio) assert_equal(ee.replacement, False) assert_equal(ee.n_subsets, 10) assert_equal(ee.random_state, RND_SEED) - assert_equal(ee.verbose, verbose) def test_ee_fit_single_class(): diff --git a/imblearn/over_sampling/adasyn.py b/imblearn/over_sampling/adasyn.py index f63248db4..f00e41b0f 100644 --- a/imblearn/over_sampling/adasyn.py +++ b/imblearn/over_sampling/adasyn.py @@ -33,9 +33,6 @@ class ADASYN(SamplerMixin): If None, the random number generator is the RandomState instance used by np.random. - verbose : bool, optional (default=True) - Whether or not to print information about the processing. - k : int, optional (default=5) Number of nearest neighbours to used to construct synthetic samples. @@ -75,11 +72,9 @@ class ADASYN(SamplerMixin): def __init__(self, ratio='auto', random_state=None, - verbose=True, k=5, n_jobs=1): - super(ADASYN, self).__init__(ratio=ratio, - verbose=verbose) + super(ADASYN, self).__init__(ratio=ratio) self.random_state = random_state self.k = k self.n_jobs = n_jobs @@ -125,8 +120,8 @@ def _sample(self, X, y): X_min = X[y == self.min_c_] # Print if verbose is true - if self.verbose: - print('Finding the {} nearest neighbours...'.format(self.k)) + self.logger.debug('Finding the {} nearest neighbours...'.format( + self.k)) # Look for k-th nearest neighbours, excluding, of course, the # point itself. @@ -156,7 +151,7 @@ def _sample(self, X, y): X_resampled = np.vstack((X_resampled, x_gen)) y_resampled = np.hstack((y_resampled, self.min_c_)) - if self.verbose: - print("Over-sampling performed: {}".format(Counter(y_resampled))) + self.logger.info('Over-sampling performed: {}'.format(Counter( + y_resampled))) return X_resampled, y_resampled diff --git a/imblearn/over_sampling/random_over_sampler.py b/imblearn/over_sampling/random_over_sampler.py index d3622b997..f38f2bcf4 100644 --- a/imblearn/over_sampling/random_over_sampler.py +++ b/imblearn/over_sampling/random_over_sampler.py @@ -26,9 +26,6 @@ class RandomOverSampler(SamplerMixin): of samples in the minority class over the the number of samples in the majority class. - verbose : bool, optional (default=True) - Whether or not to print information about the processing. - random_state : int, RandomState instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; @@ -57,11 +54,9 @@ class RandomOverSampler(SamplerMixin): def __init__(self, ratio='auto', - verbose=True, random_state=None): - super(RandomOverSampler, self).__init__(ratio=ratio, - verbose=verbose) + super(RandomOverSampler, self).__init__(ratio=ratio) self.random_state = random_state def _sample(self, X, y): @@ -119,7 +114,7 @@ def _sample(self, X, y): y[y == key], y[y == key][indx]), axis=0) - if self.verbose: - print("Over-sampling performed: {}".format(Counter(y_resampled))) + self.logger.info('Over-sampling performed: {}'.format(Counter( + y_resampled))) return X_resampled, y_resampled diff --git a/imblearn/over_sampling/smote.py b/imblearn/over_sampling/smote.py index bbd1e5efe..87bbd6c74 100644 --- a/imblearn/over_sampling/smote.py +++ b/imblearn/over_sampling/smote.py @@ -37,9 +37,6 @@ class SMOTE(SamplerMixin): If None, the random number generator is the RandomState instance used by np.random. - verbose : bool, optional (default=True) - Whether or not to print information about the processing. - k : int, optional (default=5) Number of nearest neighbours to used to construct synthetic samples. @@ -95,15 +92,13 @@ class SMOTE(SamplerMixin): def __init__(self, ratio='auto', random_state=None, - verbose=True, k=5, m=10, out_step=0.5, kind='regular', n_jobs=-1, **kwargs): - super(SMOTE, self).__init__(ratio=ratio, - verbose=verbose) + super(SMOTE, self).__init__(ratio=ratio) self.random_state = random_state self.kind = kind self.k = k @@ -234,8 +229,7 @@ def _make_samples(self, X, y_type, nn_data, nn_num, n_samples, # minority label y_new = np.array([y_type] * len(X_new)) - if self.verbose: - print("Generated {} new samples ...".format(len(X_new))) + self.logger.info('Generated {} new samples ...'.format(len(X_new))) return X_new, y_new @@ -282,9 +276,8 @@ def _sample(self, X, y): # If regular SMOTE is to be performed if self.kind == 'regular': - # Print if verbose is true# - if self.verbose: - print('Finding the {} nearest neighbours...'.format(self.k)) + self.logger.debug('Finding the {} nearest neighbours...'.format( + self.k)) # Look for k-th nearest neighbours, excluding, of course, the # point itself. @@ -296,10 +289,7 @@ def _sample(self, X, y): X_min, return_distance=False)[:, 1:] - # Print status if verbose is true - if self.verbose: - print("done!") - print("Creating synthetic samples...", end="") + self.logger.debug('Create synthetic samples ...') # --- Generating synthetic samples # Use static method make_samples to generate minority samples @@ -310,9 +300,6 @@ def _sample(self, X, y): num_samples, 1.0) - if self.verbose: - print("done!") - # Concatenate the newly generated samples to the original data set X_resampled = np.concatenate((X, X_new), axis=0) y_resampled = np.concatenate((y, y_new), axis=0) @@ -321,23 +308,19 @@ def _sample(self, X, y): if self.kind == 'borderline1' or self.kind == 'borderline2': - if self.verbose: - print("Finding the {} nearest neighbours...".format(self.m)) + self.logger.debug('Finding the {} nearest neighbours ...'.format( + self.m)) # Find the NNs for all samples in the data set. self.nearest_neighbour.fit(X) - if self.verbose: - print("done!") - # Boolean array with True for minority samples in danger danger_index = self._in_danger_noise(X_min, y, kind='danger') # If all minority samples are safe, return the original data set. if not any(danger_index): - if self.verbose: - print('There are no samples in danger. No borderline ' - 'synthetic samples created.') + self.logger.debug('There are no samples in danger. No' + ' borderline synthetic samples created.') # All are safe, nothing to be done here. return X, y @@ -430,17 +413,13 @@ def _sample(self, X, y): # First, find the nn of all the samples to identify samples # in danger and noisy ones - if self.verbose: - print("Finding the {} nearest neighbours...".format(self.m)) + self.logger.debug('Finding the {} nearest neighbours ...'.format( + self.m)) # As usual, fit a nearest neighbour model to the data self.nearest_neighbour.fit(X) - if self.verbose: - print("done!") - # Now, get rid of noisy support vectors - noise_bool = self._in_danger_noise(support_vector, y, kind='noise') # Remove noisy support vectors @@ -449,24 +428,22 @@ def _sample(self, X, y): kind='danger') safety_bool = np.logical_not(danger_bool) - if self.verbose: - print("Out of {0} support vectors, {1} are noisy, " - "{2} are in danger " - "and {3} are safe.".format(support_vector.shape[0], - noise_bool.sum().astype(int), - danger_bool.sum().astype(int), - safety_bool.sum().astype(int) - )) + self.logger.debug('Out of {0} support vectors, {1} are noisy, ' + '{2} are in danger ' + 'and {3} are safe.'.format( + support_vector.shape[0], + noise_bool.sum().astype(int), + danger_bool.sum().astype(int), + safety_bool.sum().astype(int))) - # Proceed to find support vectors NNs among the minority class - print("Finding the {} nearest neighbours...".format(self.k)) + # Proceed to find support vectors NNs among the minority class + self.logger.debug('Finding the {} nearest neighbours ...'.format( + self.k)) self.nearest_neighbour.set_params(**{'n_neighbors': self.k + 1}) self.nearest_neighbour.fit(X_min) - if self.verbose: - print("done!") - print("Creating synthetic samples...", end="") + self.logger.debug('Create synthetic samples ...') # Split the number of synthetic samples between interpolation and # extrapolation @@ -503,9 +480,6 @@ def _sample(self, X, y): int((1 - fractions) * num_samples), step_size=-self.out_step) - if self.verbose: - print("done!") - # Concatenate the newly generated samples to the original data set if (np.count_nonzero(danger_bool) > 0 and np.count_nonzero(safety_bool) > 0): diff --git a/imblearn/over_sampling/tests/test_adasyn.py b/imblearn/over_sampling/tests/test_adasyn.py index 74b0aafde..8976de402 100644 --- a/imblearn/over_sampling/tests/test_adasyn.py +++ b/imblearn/over_sampling/tests/test_adasyn.py @@ -56,13 +56,10 @@ def test_ada_init(): """Test the initialisation of the object""" # Define a ratio - verbose = True ratio = 'auto' - ada = ADASYN(ratio=ratio, random_state=RND_SEED, - verbose=verbose) + ada = ADASYN(ratio=ratio, random_state=RND_SEED) assert_equal(ada.random_state, RND_SEED) - assert_equal(ada.verbose, verbose) def test_ada_fit_single_class(): diff --git a/imblearn/over_sampling/tests/test_random_over_sampler.py b/imblearn/over_sampling/tests/test_random_over_sampler.py index f1418d985..0d2f0aac2 100644 --- a/imblearn/over_sampling/tests/test_random_over_sampler.py +++ b/imblearn/over_sampling/tests/test_random_over_sampler.py @@ -56,13 +56,10 @@ def test_ros_init(): """Test the initialisation of the object""" # Define a ratio - verbose = True ratio = 'auto' - ros = RandomOverSampler(ratio=ratio, random_state=RND_SEED, - verbose=verbose) + ros = RandomOverSampler(ratio=ratio, random_state=RND_SEED) assert_equal(ros.random_state, RND_SEED) - assert_equal(ros.verbose, verbose) def test_ros_fit_single_class(): diff --git a/imblearn/under_sampling/cluster_centroids.py b/imblearn/under_sampling/cluster_centroids.py index 3dcd4ad65..551173171 100644 --- a/imblearn/under_sampling/cluster_centroids.py +++ b/imblearn/under_sampling/cluster_centroids.py @@ -37,9 +37,6 @@ class ClusterCentroids(SamplerMixin): If None, the random number generator is the RandomState instance used by np.random. - verbose : bool, optional (default=True) - Whether or not to print information about the processing. - n_jobs : int, optional (default=-1) The number of threads to open if possible. @@ -67,10 +64,8 @@ class ClusterCentroids(SamplerMixin): """ - def __init__(self, ratio='auto', random_state=None, verbose=True, - n_jobs=-1, **kwargs): - super(ClusterCentroids, self).__init__(ratio=ratio, - verbose=verbose) + def __init__(self, ratio='auto', random_state=None, n_jobs=-1, **kwargs): + super(ClusterCentroids, self).__init__(ratio=ratio) self.random_state = random_state self.n_jobs = n_jobs self.kwargs = kwargs @@ -132,7 +127,7 @@ def _sample(self, X, y): num_samples)), axis=0) - if self.verbose: - print("Under-sampling performed: {}".format(Counter(y_resampled))) + self.logger.info('Under-sampling performed: {}'.format(Counter( + y_resampled))) return X_resampled, y_resampled diff --git a/imblearn/under_sampling/condensed_nearest_neighbour.py b/imblearn/under_sampling/condensed_nearest_neighbour.py index d6e66b03e..e477046dd 100644 --- a/imblearn/under_sampling/condensed_nearest_neighbour.py +++ b/imblearn/under_sampling/condensed_nearest_neighbour.py @@ -29,10 +29,6 @@ class CondensedNearestNeighbour(SamplerMixin): If None, the random number generator is the RandomState instance used by np.random. - verbose : bool, optional (default=True) - Whether or not to return the indices of the samples randomly - selected from the majority class. - size_ngh : int, optional (default=1) Size of the neighbourhood to consider to compute the average distance to the minority point samples. @@ -76,9 +72,9 @@ class CondensedNearestNeighbour(SamplerMixin): """ - def __init__(self, return_indices=False, random_state=None, verbose=True, + def __init__(self, return_indices=False, random_state=None, size_ngh=1, n_seeds_S=1, n_jobs=-1, **kwargs): - super(CondensedNearestNeighbour, self).__init__(verbose=verbose) + super(CondensedNearestNeighbour, self).__init__() self.return_indices = return_indices self.random_state = random_state @@ -200,8 +196,8 @@ def _sample(self, X, y): X_resampled = np.concatenate((X_resampled, sel_x), axis=0) y_resampled = np.concatenate((y_resampled, sel_y), axis=0) - if self.verbose: - print("Under-sampling performed: {}".format(Counter(y_resampled))) + self.logger.info('Under-sampling performed: {}'.format(Counter( + y_resampled))) # Check if the indices of the samples selected should be returned too if self.return_indices: diff --git a/imblearn/under_sampling/edited_nearest_neighbours.py b/imblearn/under_sampling/edited_nearest_neighbours.py index 24d2f86f0..e805bb7c3 100644 --- a/imblearn/under_sampling/edited_nearest_neighbours.py +++ b/imblearn/under_sampling/edited_nearest_neighbours.py @@ -33,9 +33,6 @@ class EditedNearestNeighbours(SamplerMixin): If None, the random number generator is the RandomState instance used by np.random. - verbose : bool, optional (default=True) - Whether or not to print information about the processing. - size_ngh : int, optional (default=3) Size of the neighbourhood to consider to compute the average distance to the minority point samples. @@ -80,9 +77,9 @@ class EditedNearestNeighbours(SamplerMixin): """ - def __init__(self, return_indices=False, random_state=None, verbose=True, + def __init__(self, return_indices=False, random_state=None, size_ngh=3, kind_sel='all', n_jobs=-1): - super(EditedNearestNeighbours, self).__init__(verbose=verbose) + super(EditedNearestNeighbours, self).__init__() self.return_indices = return_indices self.random_state = random_state self.size_ngh = size_ngh @@ -176,8 +173,8 @@ def _sample(self, X, y): X_resampled = np.concatenate((X_resampled, sel_x), axis=0) y_resampled = np.concatenate((y_resampled, sel_y), axis=0) - if self.verbose: - print("Under-sampling performed: {}".format(Counter(y_resampled))) + self.logger.info("Under-sampling performed: {}".format(Counter( + y_resampled))) # Check if the indices of the samples selected should be returned too if self.return_indices: @@ -203,9 +200,6 @@ class RepeatedEditedNearestNeighbours(SamplerMixin): If None, the random number generator is the RandomState instance used by np.random. - verbose : bool, optional (default=True) - Whether or not to print information about the processing. - size_ngh : int, optional (default=3) Size of the neighbourhood to consider to compute the average distance to the minority point samples. @@ -254,9 +248,9 @@ class RepeatedEditedNearestNeighbours(SamplerMixin): """ - def __init__(self, return_indices=False, random_state=None, verbose=True, + def __init__(self, return_indices=False, random_state=None, size_ngh=3, max_iter=100, kind_sel='all', n_jobs=-1): - super(RepeatedEditedNearestNeighbours, self).__init__(verbose=verbose) + super(RepeatedEditedNearestNeighbours, self).__init__() self.return_indices = return_indices self.random_state = random_state self.size_ngh = size_ngh @@ -265,8 +259,9 @@ def __init__(self, return_indices=False, random_state=None, verbose=True, self.max_iter = max_iter self.enn_ = EditedNearestNeighbours( return_indices=self.return_indices, - random_state=self.random_state, verbose=False, - size_ngh=self.size_ngh, kind_sel=self.kind_sel, + random_state=self.random_state, + size_ngh=self.size_ngh, + kind_sel=self.kind_sel, n_jobs=self.n_jobs) def fit(self, X, y): @@ -331,8 +326,7 @@ def _sample(self, X, y): for n_iter in range(self.max_iter): - if self.verbose: - print('Apply ENN iteration #{}'.format(n_iter + 1)) + self.logger.debug('Apply ENN iteration #{}'.format(n_iter + 1)) prev_len = y_.shape[0] if self.return_indices: @@ -344,8 +338,7 @@ def _sample(self, X, y): if prev_len == y_.shape[0]: break - if self.verbose: - print("Under-sampling performed: {}".format(Counter(y_))) + self.logger.info("Under-sampling performed: {}".format(Counter(y_))) X_resampled, y_resampled = X_, y_ diff --git a/imblearn/under_sampling/instance_hardness_threshold.py b/imblearn/under_sampling/instance_hardness_threshold.py index 9089e2c7f..bc2f21fd3 100644 --- a/imblearn/under_sampling/instance_hardness_threshold.py +++ b/imblearn/under_sampling/instance_hardness_threshold.py @@ -44,9 +44,6 @@ class InstanceHardnessThreshold(SamplerMixin): If None, the random number generator is the RandomState instance used by np.random. - verbose : bool, optional (default=True) - Whether or not to print information about the processing. - cv : int, optional (default=5) Number of folds to be used when estimating samples' instance hardness. @@ -86,10 +83,9 @@ class InstanceHardnessThreshold(SamplerMixin): """ def __init__(self, estimator='linear-svm', ratio='auto', - return_indices=False, random_state=None, verbose=True, - cv=5, n_jobs=-1, **kwargs): - super(InstanceHardnessThreshold, self).__init__(ratio=ratio, - verbose=verbose) + return_indices=False, random_state=None, cv=5, n_jobs=-1, + **kwargs): + super(InstanceHardnessThreshold, self).__init__(ratio=ratio) self.estimator = estimator self.return_indices = return_indices self.random_state = random_state @@ -192,8 +188,8 @@ def _sample(self, X, y): X_resampled = X[mask] y_resampled = y[mask] - if self.verbose: - print("Under-sampling performed: {}".format(Counter(y_resampled))) + self.logger.info('Under-sampling performed: {}'.format(Counter( + y_resampled))) # If we need to offer support for the indices if self.return_indices: diff --git a/imblearn/under_sampling/nearmiss.py b/imblearn/under_sampling/nearmiss.py index ac73487f7..71f29bdf3 100644 --- a/imblearn/under_sampling/nearmiss.py +++ b/imblearn/under_sampling/nearmiss.py @@ -32,9 +32,6 @@ class NearMiss(SamplerMixin): If None, the random number generator is the RandomState instance used by np.random. - verbose : bool, optional (default=True) - Whether or not to print information about the processing. - version : int, optional (default=1) Version of the NearMiss to use. Possible values are 1, 2 or 3. @@ -84,9 +81,8 @@ class NearMiss(SamplerMixin): """ def __init__(self, ratio='auto', return_indices=False, random_state=None, - verbose=True, version=1, size_ngh=3, ver3_samp_ngh=3, - n_jobs=-1, **kwargs): - super(NearMiss, self).__init__(ratio=ratio, verbose=verbose) + version=1, size_ngh=3, ver3_samp_ngh=3, n_jobs=-1, **kwargs): + super(NearMiss, self).__init__(ratio=ratio) self.return_indices = return_indices self.random_state = random_state self.version = version @@ -294,8 +290,8 @@ def _sample(self, X, y): X_resampled = np.concatenate((X_resampled, sel_x), axis=0) y_resampled = np.concatenate((y_resampled, sel_y), axis=0) - if self.verbose: - print("Under-sampling performed: {}".format(Counter(y_resampled))) + self.logger.info('Under-sampling performed: {}'.format(Counter( + y_resampled))) # Check if the indices of the samples selected should be returned too if self.return_indices: diff --git a/imblearn/under_sampling/neighbourhood_cleaning_rule.py b/imblearn/under_sampling/neighbourhood_cleaning_rule.py index dae666881..0a8e9d50c 100644 --- a/imblearn/under_sampling/neighbourhood_cleaning_rule.py +++ b/imblearn/under_sampling/neighbourhood_cleaning_rule.py @@ -27,9 +27,6 @@ class NeighbourhoodCleaningRule(SamplerMixin): If None, the random number generator is the RandomState instance used by np.random. - verbose : bool, optional (default=True) - Whether or not to print information about the processing. - size_ngh : int, optional (default=3) Size of the neighbourhood to consider in order to make the comparison between each samples and their NN. @@ -66,9 +63,9 @@ class NeighbourhoodCleaningRule(SamplerMixin): """ - def __init__(self, return_indices=False, random_state=None, verbose=True, - size_ngh=3, n_jobs=-1): - super(NeighbourhoodCleaningRule, self).__init__(verbose=verbose) + def __init__(self, return_indices=False, random_state=None, size_ngh=3, + n_jobs=-1): + super(NeighbourhoodCleaningRule, self).__init__() self.return_indices = return_indices self.random_state = random_state self.size_ngh = size_ngh @@ -170,8 +167,8 @@ def _sample(self, X, y): X_resampled = np.concatenate((X_resampled, sel_x), axis=0) y_resampled = np.concatenate((y_resampled, sel_y), axis=0) - if self.verbose: - print("Under-sampling performed: {}" + str(Counter(y_resampled))) + self.logger.info('Under-sampling performed: {}'.format(Counter( + y_resampled))) # Check if the indices of the samples selected should be returned too if self.return_indices: diff --git a/imblearn/under_sampling/one_sided_selection.py b/imblearn/under_sampling/one_sided_selection.py index 8976a88df..140383f2e 100644 --- a/imblearn/under_sampling/one_sided_selection.py +++ b/imblearn/under_sampling/one_sided_selection.py @@ -29,9 +29,6 @@ class OneSidedSelection(SamplerMixin): If None, the random number generator is the RandomState instance used by np.random. - verbose : bool, optional (default=True) - Whether or not to print information about the processing. - size_ngh : int, optional (default=1) Size of the neighbourhood to consider to compute the average distance to the minority point samples. @@ -71,9 +68,9 @@ class OneSidedSelection(SamplerMixin): """ - def __init__(self, return_indices=False, random_state=None, verbose=True, + def __init__(self, return_indices=False, random_state=None, size_ngh=1, n_seeds_S=1, n_jobs=-1, **kwargs): - super(OneSidedSelection, self).__init__(verbose=verbose) + super(OneSidedSelection, self).__init__() self.return_indices = return_indices self.random_state = random_state self.size_ngh = size_ngh @@ -174,14 +171,11 @@ def _sample(self, X, y): nns = nn.kneighbors(X_resampled, return_distance=False)[:, 1] # Send the information to is_tomek function to get boolean vector back - if self.verbose: - print("Looking for majority Tomek links...") - links = TomekLinks.is_tomek(y_resampled, nns, self.min_c_, - self.verbose) - - if self.verbose: - print("Under-sampling performed: {}".format(Counter( - y_resampled[np.logical_not(links)]))) + self.logger.debug('Looking for majority Tomek links ...') + links = TomekLinks.is_tomek(y_resampled, nns, self.min_c_) + + self.logger.info('Under-sampling performed: {}'.format(Counter( + y_resampled[np.logical_not(links)]))) # Check if the indices of the samples selected should be returned too if self.return_indices: diff --git a/imblearn/under_sampling/random_under_sampler.py b/imblearn/under_sampling/random_under_sampler.py index b4659657f..179190e29 100644 --- a/imblearn/under_sampling/random_under_sampler.py +++ b/imblearn/under_sampling/random_under_sampler.py @@ -35,9 +35,6 @@ class RandomUnderSampler(SamplerMixin): If None, the random number generator is the RandomState instance used by np.random. - verbose : bool, optional (default=True) - Whether or not to print information about the processing. - n_jobs : int, optional (default=-1) The number of threads to open if possible. @@ -62,9 +59,8 @@ class RandomUnderSampler(SamplerMixin): """ def __init__(self, ratio='auto', return_indices=False, random_state=None, - verbose=True, replacement=True): - super(RandomUnderSampler, self).__init__(ratio=ratio, - verbose=verbose) + replacement=True): + super(RandomUnderSampler, self).__init__(ratio=ratio) self.return_indices = return_indices self.random_state = random_state self.replacement = replacement @@ -134,8 +130,8 @@ def _sample(self, X, y): y_resampled = np.concatenate((y_resampled, y[y == key][indx]), axis=0) - if self.verbose: - print("Under-sampling performed: {}".format(Counter(y_resampled))) + self.logger.info("Under-sampling performed: {}".format( + Counter(y_resampled))) # Check if the indices of the samples selected should be returned as # well diff --git a/imblearn/under_sampling/tests/test_cluster_centroids.py b/imblearn/under_sampling/tests/test_cluster_centroids.py index 834662060..44c4bf0dd 100644 --- a/imblearn/under_sampling/tests/test_cluster_centroids.py +++ b/imblearn/under_sampling/tests/test_cluster_centroids.py @@ -57,11 +57,9 @@ def test_init(): # Define a ratio ratio = 1. - verbose = True - cc = ClusterCentroids(ratio=ratio, random_state=RND_SEED, verbose=verbose) + cc = ClusterCentroids(ratio=ratio, random_state=RND_SEED) assert_equal(cc.ratio, ratio) - assert_equal(cc.verbose, verbose) def test_cc_fit_single_class(): diff --git a/imblearn/under_sampling/tests/test_condensed_nearest_neighbour.py b/imblearn/under_sampling/tests/test_condensed_nearest_neighbour.py index fa0189365..3270148c3 100644 --- a/imblearn/under_sampling/tests/test_condensed_nearest_neighbour.py +++ b/imblearn/under_sampling/tests/test_condensed_nearest_neighbour.py @@ -31,13 +31,11 @@ def test_cnn_init(): """Test the initialisation of the object""" # Define a ratio - verbose = True - cnn = CondensedNearestNeighbour(random_state=RND_SEED, verbose=verbose) + cnn = CondensedNearestNeighbour(random_state=RND_SEED) assert_equal(cnn.size_ngh, 1) assert_equal(cnn.n_seeds_S, 1) assert_equal(cnn.n_jobs, -1) - assert_equal(cnn.verbose, verbose) def test_cnn_fit_single_class(): diff --git a/imblearn/under_sampling/tests/test_edited_nearest_neighbours.py b/imblearn/under_sampling/tests/test_edited_nearest_neighbours.py index d581238d1..8f6af38d6 100644 --- a/imblearn/under_sampling/tests/test_edited_nearest_neighbours.py +++ b/imblearn/under_sampling/tests/test_edited_nearest_neighbours.py @@ -31,14 +31,12 @@ def test_enn_init(): """Test the initialisation of the object""" # Define a ratio - verbose = True - enn = EditedNearestNeighbours(random_state=RND_SEED, verbose=verbose) + enn = EditedNearestNeighbours(random_state=RND_SEED) assert_equal(enn.size_ngh, 3) assert_equal(enn.kind_sel, 'all') assert_equal(enn.n_jobs, -1) assert_equal(enn.random_state, RND_SEED) - assert_equal(enn.verbose, verbose) def test_enn_fit_single_class(): diff --git a/imblearn/under_sampling/tests/test_instance_hardness_threshold.py b/imblearn/under_sampling/tests/test_instance_hardness_threshold.py index 354db975a..d4afbe227 100644 --- a/imblearn/under_sampling/tests/test_instance_hardness_threshold.py +++ b/imblearn/under_sampling/tests/test_instance_hardness_threshold.py @@ -69,15 +69,12 @@ def test_iht_init(): """Test the initialisation of the object""" # Define a ratio - verbose = True ratio = 'auto' iht = InstanceHardnessThreshold(ESTIMATOR, ratio=ratio, - random_state=RND_SEED, - verbose=verbose) + random_state=RND_SEED) assert_equal(iht.ratio, ratio) assert_equal(iht.random_state, RND_SEED) - assert_equal(iht.verbose, verbose) def test_iht_fit_single_class(): diff --git a/imblearn/under_sampling/tests/test_nearmiss_1.py b/imblearn/under_sampling/tests/test_nearmiss_1.py index 04c053e6c..dd11cdcd0 100644 --- a/imblearn/under_sampling/tests/test_nearmiss_1.py +++ b/imblearn/under_sampling/tests/test_nearmiss_1.py @@ -66,15 +66,13 @@ def test_nearmiss_init(): # Define a ratio ratio = 1. - verbose = True - nm1 = NearMiss(ratio=ratio, random_state=RND_SEED, verbose=verbose, + nm1 = NearMiss(ratio=ratio, random_state=RND_SEED, version=VERSION_NEARMISS) assert_equal(nm1.version, VERSION_NEARMISS) assert_equal(nm1.size_ngh, 3) assert_equal(nm1.ratio, ratio) assert_equal(nm1.random_state, RND_SEED) - assert_equal(nm1.verbose, verbose) def test_nearmiss_fit_single_class(): diff --git a/imblearn/under_sampling/tests/test_nearmiss_2.py b/imblearn/under_sampling/tests/test_nearmiss_2.py index df46eface..1213e5bd4 100644 --- a/imblearn/under_sampling/tests/test_nearmiss_2.py +++ b/imblearn/under_sampling/tests/test_nearmiss_2.py @@ -66,15 +66,13 @@ def test_nearmiss_init(): # Define a ratio ratio = 1. - verbose = True - nm2 = NearMiss(ratio=ratio, random_state=RND_SEED, verbose=verbose, + nm2 = NearMiss(ratio=ratio, random_state=RND_SEED, version=VERSION_NEARMISS) assert_equal(nm2.version, VERSION_NEARMISS) assert_equal(nm2.size_ngh, 3) assert_equal(nm2.ratio, ratio) assert_equal(nm2.random_state, RND_SEED) - assert_equal(nm2.verbose, verbose) def test_nearmiss_fit_single_class(): diff --git a/imblearn/under_sampling/tests/test_nearmiss_3.py b/imblearn/under_sampling/tests/test_nearmiss_3.py index 3b62b1eec..c83ea50d7 100644 --- a/imblearn/under_sampling/tests/test_nearmiss_3.py +++ b/imblearn/under_sampling/tests/test_nearmiss_3.py @@ -66,15 +66,13 @@ def test_nearmiss_init(): # Define a ratio ratio = 1. - verbose = True - nm3 = NearMiss(ratio=ratio, random_state=RND_SEED, verbose=verbose, + nm3 = NearMiss(ratio=ratio, random_state=RND_SEED, version=VERSION_NEARMISS) assert_equal(nm3.version, VERSION_NEARMISS) assert_equal(nm3.size_ngh, 3) assert_equal(nm3.ratio, ratio) assert_equal(nm3.random_state, RND_SEED) - assert_equal(nm3.verbose, verbose) def test_nearmiss_fit_single_class(): diff --git a/imblearn/under_sampling/tests/test_neighbourhood_cleaning_rule.py b/imblearn/under_sampling/tests/test_neighbourhood_cleaning_rule.py index e4ac8c65e..9945d2ad8 100644 --- a/imblearn/under_sampling/tests/test_neighbourhood_cleaning_rule.py +++ b/imblearn/under_sampling/tests/test_neighbourhood_cleaning_rule.py @@ -31,13 +31,11 @@ def test_ncr_init(): """Test the initialisation of the object""" # Define a ratio - verbose = True - ncr = NeighbourhoodCleaningRule(random_state=RND_SEED, verbose=verbose) + ncr = NeighbourhoodCleaningRule(random_state=RND_SEED) assert_equal(ncr.size_ngh, 3) assert_equal(ncr.n_jobs, -1) assert_equal(ncr.random_state, RND_SEED) - assert_equal(ncr.verbose, verbose) def test_ncr_fit_single_class(): diff --git a/imblearn/under_sampling/tests/test_one_sided_selection.py b/imblearn/under_sampling/tests/test_one_sided_selection.py index 5f63f0825..7f3112233 100644 --- a/imblearn/under_sampling/tests/test_one_sided_selection.py +++ b/imblearn/under_sampling/tests/test_one_sided_selection.py @@ -31,14 +31,12 @@ def test_oss_init(): """Test the initialisation of the object""" # Define a ratio - verbose = True - oss = OneSidedSelection(random_state=RND_SEED, verbose=verbose) + oss = OneSidedSelection(random_state=RND_SEED) assert_equal(oss.size_ngh, 1) assert_equal(oss.n_seeds_S, 1) assert_equal(oss.n_jobs, -1) assert_equal(oss.random_state, RND_SEED) - assert_equal(oss.verbose, verbose) def test_oss_fit_single_class(): diff --git a/imblearn/under_sampling/tests/test_random_under_sampler.py b/imblearn/under_sampling/tests/test_random_under_sampler.py index 8b17d6d66..cc1e0fc73 100644 --- a/imblearn/under_sampling/tests/test_random_under_sampler.py +++ b/imblearn/under_sampling/tests/test_random_under_sampler.py @@ -56,13 +56,10 @@ def test_rus_init(): """Test the initialisation of the object""" # Define a ratio - verbose = True ratio = 'auto' - rus = RandomUnderSampler(ratio=ratio, random_state=RND_SEED, - verbose=verbose) + rus = RandomUnderSampler(ratio=ratio, random_state=RND_SEED) assert_equal(rus.random_state, RND_SEED) - assert_equal(rus.verbose, verbose) def test_rus_fit_single_class(): diff --git a/imblearn/under_sampling/tests/test_repeated_edited_nearest_neighbours.py b/imblearn/under_sampling/tests/test_repeated_edited_nearest_neighbours.py index 512b2497d..2c959b09b 100644 --- a/imblearn/under_sampling/tests/test_repeated_edited_nearest_neighbours.py +++ b/imblearn/under_sampling/tests/test_repeated_edited_nearest_neighbours.py @@ -31,15 +31,12 @@ def test_renn_init(): """Test the initialisation of the object""" # Define a ratio - verbose = True - renn = RepeatedEditedNearestNeighbours(random_state=RND_SEED, - verbose=verbose) + renn = RepeatedEditedNearestNeighbours(random_state=RND_SEED) assert_equal(renn.size_ngh, 3) assert_equal(renn.kind_sel, 'all') assert_equal(renn.n_jobs, -1) assert_equal(renn.random_state, RND_SEED) - assert_equal(renn.verbose, verbose) def test_renn_iter_wrong(): diff --git a/imblearn/under_sampling/tests/test_tomek_links.py b/imblearn/under_sampling/tests/test_tomek_links.py index cba12b80f..e440193e4 100644 --- a/imblearn/under_sampling/tests/test_tomek_links.py +++ b/imblearn/under_sampling/tests/test_tomek_links.py @@ -31,12 +31,10 @@ def test_tl_init(): """Test the initialisation of the object""" # Define a ratio - verbose = True - tl = TomekLinks(random_state=RND_SEED, verbose=verbose) + tl = TomekLinks(random_state=RND_SEED) assert_equal(tl.n_jobs, -1) assert_equal(tl.random_state, RND_SEED) - assert_equal(tl.verbose, verbose) def test_tl_fit_single_class(): diff --git a/imblearn/under_sampling/tomek_links.py b/imblearn/under_sampling/tomek_links.py index 7c5fda327..d65d23d56 100644 --- a/imblearn/under_sampling/tomek_links.py +++ b/imblearn/under_sampling/tomek_links.py @@ -26,9 +26,6 @@ class TomekLinks(SamplerMixin): If None, the random number generator is the RandomState instance used by np.random. - verbose : bool, optional (default=True) - The number of threads to open if possible. - n_jobs : int, optional (default=-1) The number of threads to open if possible. @@ -60,15 +57,15 @@ class TomekLinks(SamplerMixin): """ - def __init__(self, return_indices=False, random_state=None, verbose=True, + def __init__(self, return_indices=False, random_state=None, n_jobs=-1): - super(TomekLinks, self).__init__(verbose=verbose) + super(TomekLinks, self).__init__() self.return_indices = return_indices self.random_state = random_state self.n_jobs = n_jobs @staticmethod - def is_tomek(y, nn_index, class_type, verbose=True): + def is_tomek(y, nn_index, class_type): """is_tomek uses the target vector and the first neighbour of every sample point and looks for Tomek pairs. Returning a boolean vector with True for majority Tomek links. @@ -115,9 +112,6 @@ def is_tomek(y, nn_index, class_type, verbose=True): links[ind] = True count += 1 - if verbose: - print("{} Tomek links found.".format(count)) - return links def _sample(self, X, y): @@ -151,13 +145,11 @@ def _sample(self, X, y): nns = nn.kneighbors(X, return_distance=False)[:, 1] # Send the information to is_tomek function to get boolean vector back - if self.verbose: - print("Looking for majority Tomek links...") - links = self.is_tomek(y, nns, self.min_c_, self.verbose) + self.logger.debug('Looking for majority Tomek links ...') + links = self.is_tomek(y, nns, self.min_c_) - if self.verbose: - print("Under-sampling performed: {}".format(Counter( - y[np.logical_not(links)]))) + self.logger.info('Under-sampling performed: {}'.format(Counter( + y[np.logical_not(links)]))) # Check if the indices of the samples selected should be returned too if self.return_indices: