Skip to content

Commit

Permalink
Modify verbose for logging messages
Browse files Browse the repository at this point in the history
  • Loading branch information
Guillaume Lemaitre committed Jul 4, 2016
1 parent 912390f commit 1c26708
Show file tree
Hide file tree
Showing 33 changed files with 133 additions and 278 deletions.
17 changes: 4 additions & 13 deletions imblearn/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class SamplerMixin(six.with_metaclass(ABCMeta, BaseEstimator)):

_estimator_type = "sampler"

def __init__(self, ratio='auto', verbose=True):
def __init__(self, ratio='auto'):
"""Initialize this object and its instance variables.
Parameters
Expand All @@ -40,20 +40,13 @@ def __init__(self, ratio='auto', verbose=True):
of samples in the minority class over the the number of samples
in the majority class.
random_state : int or None, optional (default=None)
Seed for random number generation.
verbose : bool, optional (default=True)
Boolean to either or not print information about the processing
Returns
-------
None
"""

self.ratio = ratio
self.verbose = verbose
self.logger = logging.getLogger(__name__)

def fit(self, X, y):
Expand Down Expand Up @@ -85,8 +78,7 @@ def fit(self, X, y):
if hasattr(self, 'ratio'):
self._validate_ratio()

if self.verbose:
print("Determining classes statistics... ", end="")
self.logger.info('Compute classes statistics ...')

# Get all the unique elements in the target array
uniques = np.unique(y)
Expand All @@ -110,9 +102,8 @@ def fit(self, X, y):
self.min_c_ = min(self.stats_c_, key=self.stats_c_.get)
self.maj_c_ = max(self.stats_c_, key=self.stats_c_.get)

if self.verbose:
print('{} classes detected: {}'.format(uniques.size,
self.stats_c_))
self.logger.info('{} classes detected: {}'.format(uniques.size,
self.stats_c_))

# Check if the ratio provided at initialisation make sense
if isinstance(self.ratio, float):
Expand Down
15 changes: 5 additions & 10 deletions imblearn/combine/smote_enn.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@ class SMOTEENN(SamplerMixin):
If None, the random number generator is the RandomState instance used
by np.random.
verbose : bool, optional (default=True)
Whether or not to print information about the processing.
k : int, optional (default=5)
Number of nearest neighbours to used to construct synthetic
samples.
Expand Down Expand Up @@ -88,12 +85,11 @@ class SMOTEENN(SamplerMixin):
"""

def __init__(self, ratio='auto', random_state=None, verbose=True,
def __init__(self, ratio='auto', random_state=None,
k=5, m=10, out_step=0.5, kind_smote='regular',
size_ngh=3, kind_enn='all', n_jobs=-1, **kwargs):

super(SMOTEENN, self).__init__(ratio=ratio,
verbose=verbose)
super(SMOTEENN, self).__init__(ratio=ratio)
self.random_state = random_state
self.k = k
self.m = m
Expand All @@ -104,11 +100,10 @@ def __init__(self, ratio='auto', random_state=None, verbose=True,
self.n_jobs = n_jobs
self.kwargs = kwargs
self.sm = SMOTE(ratio=self.ratio, random_state=self.random_state,
verbose=self.verbose, k=self.k, m=self.m,
out_step=self.out_step, kind=self.kind_smote,
n_jobs=self.n_jobs, **self.kwargs)
k=self.k, m=self.m, out_step=self.out_step,
kind=self.kind_smote, n_jobs=self.n_jobs,
**self.kwargs)
self.enn = EditedNearestNeighbours(random_state=self.random_state,
verbose=self.verbose,
size_ngh=self.size_ngh,
kind_sel=self.kind_enn,
n_jobs=self.n_jobs)
Expand Down
16 changes: 6 additions & 10 deletions imblearn/combine/smote_tomek.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,6 @@ class SMOTETomek(SamplerMixin):
If None, the random number generator is the RandomState instance used
by np.random.
verbose : bool, optional (default=True)
Whether or not to print information about the processing.
k : int, optional (default=5)
Number of nearest neighbours to used to construct synthetic
samples.
Expand Down Expand Up @@ -88,10 +85,10 @@ class SMOTETomek(SamplerMixin):
"""

def __init__(self, ratio='auto', random_state=None, verbose=True,
def __init__(self, ratio='auto', random_state=None,
k=5, m=10, out_step=0.5, kind_smote='regular',
n_jobs=-1, **kwargs):
super(SMOTETomek, self).__init__(ratio=ratio, verbose=verbose)
super(SMOTETomek, self).__init__(ratio=ratio)
self.random_state = random_state
self.k = k
self.m = m
Expand All @@ -100,11 +97,10 @@ def __init__(self, ratio='auto', random_state=None, verbose=True,
self.n_jobs = n_jobs
self.kwargs = kwargs
self.sm = SMOTE(ratio=self.ratio, random_state=self.random_state,
verbose=self.verbose, k=self.k, m=self.m,
out_step=self.out_step, kind=self.kind_smote,
n_jobs=self.n_jobs, **self.kwargs)
self.tomek = TomekLinks(random_state=self.random_state,
verbose=self.verbose)
k=self.k, m=self.m, out_step=self.out_step,
kind=self.kind_smote, n_jobs=self.n_jobs,
**self.kwargs)
self.tomek = TomekLinks(random_state=self.random_state)

def fit(self, X, y):
"""Find the classes statistics before to perform sampling.
Expand Down
39 changes: 18 additions & 21 deletions imblearn/ensemble/balance_cascade.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,6 @@ class BalanceCascade(SamplerMixin):
If None, the random number generator is the RandomState instance used
by np.random.
verbose : bool, optional (default=True)
Whether or not to print information about the processing.
n_max_subset : int or None, optional (default=None)
Maximum number of subsets to generate. By default, all data from
the training will be selected that could lead to a large number of
Expand Down Expand Up @@ -87,10 +84,9 @@ class BalanceCascade(SamplerMixin):
"""
def __init__(self, ratio='auto', return_indices=False, random_state=None,
verbose=True, n_max_subset=None, classifier='knn',
bootstrap=True, **kwargs):
super(BalanceCascade, self).__init__(ratio=ratio,
verbose=verbose)
n_max_subset=None, classifier='knn', bootstrap=True,
**kwargs):
super(BalanceCascade, self).__init__(ratio=ratio)
self.return_indices = return_indices
self.random_state = random_state
self.classifier = classifier
Expand Down Expand Up @@ -242,17 +238,17 @@ def _sample(self, X, y):
# Find the misclassified index to keep them for the next round
idx_mis_class = idx_sel_from_maj[np.nonzero(pred_label !=
N_y[idx_sel_from_maj])]
if self.verbose:
print("Elements misclassified: {}".format(idx_mis_class))
self.logger.debug('Elements misclassified: {}'.format(
idx_mis_class))

# Count how many random element will be selected
if self.ratio == 'auto':
num_samples = self.stats_c_[self.min_c_]
else:
num_samples = int(self.stats_c_[self.min_c_] / self.ratio)
num_samples -= idx_mis_class.size

if self.verbose:
print("Creation of the subset #{}".format(n_subsets))
self.logger.debug('Creation of the subset #{}'.format(n_subsets))

# We found a new subset, increase the counter
n_subsets += 1
Expand All @@ -279,13 +275,14 @@ def _sample(self, X, y):
idx_sel_from_maj),
axis=0))

if self.verbose:
print("Creation of the subset #" + str(n_subsets))
self.logger.debug('Creation of the subset #{}'.format(
n_subsets))

# We found a new subset, increase the counter
n_subsets += 1

# We found a new subset, increase the counter
n_subsets += 1
if self.verbose:
print('The number of subset achieved their maximum')
self.logger.debug('The number of subset reached is'
' maximum.')

# Also check that we will have enough sample to extract at the
# next round
Expand All @@ -307,14 +304,14 @@ def _sample(self, X, y):
idx_under.append(np.concatenate((idx_min,
idx_sel_from_maj),
axis=0))
if self.verbose:
print("Creation of the subset #" + str(n_subsets))
self.logger.debug('Creation of the subset #{}'.format(
n_subsets))

# We found a new subset, increase the counter
n_subsets += 1

if self.verbose:
print('Not enough samples to continue creating subsets')
self.logger.debug('Not enough samples to continue creating'
' subsets.')

if self.return_indices:
return (np.array(X_resampled), np.array(y_resampled),
Expand Down
12 changes: 3 additions & 9 deletions imblearn/ensemble/easy_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@ class EasyEnsemble(SamplerMixin):
If None, the random number generator is the RandomState instance used
by np.random.
verbose : bool, optional (default=True)
Whether or not to print information about the processing.
replacement : bool, optional (default=False)
Whether or not to sample randomly with replacement or not.
Expand Down Expand Up @@ -68,10 +65,9 @@ class EasyEnsemble(SamplerMixin):
"""

def __init__(self, ratio='auto', return_indices=False, verbose=True,
def __init__(self, ratio='auto', return_indices=False,
random_state=None, replacement=False, n_subsets=10):
super(EasyEnsemble, self).__init__(ratio=ratio,
verbose=verbose)
super(EasyEnsemble, self).__init__(ratio=ratio)
self.return_indices = return_indices
self.random_state = random_state
self.replacement = replacement
Expand Down Expand Up @@ -108,14 +104,12 @@ def _sample(self, X, y):
idx_under = []

for s in range(self.n_subsets):
if self.verbose:
print("Creation of the set #{}".format(s))
self.logger.debug('Creation of the set #{}'.format(s))

# Create the object for random under-sampling
rus = RandomUnderSampler(ratio=self.ratio,
return_indices=self.return_indices,
random_state=self.random_state,
verbose=self.verbose,
replacement=self.replacement)
if self.return_indices:
sel_x, sel_y, sel_idx = rus.fit_sample(X, y)
Expand Down
4 changes: 1 addition & 3 deletions imblearn/ensemble/tests/test_balance_cascade.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,12 @@ def test_bc_init():

# Define a ratio
ratio = 1.
verbose = True
bc = BalanceCascade(ratio=ratio, random_state=RND_SEED, verbose=verbose)
bc = BalanceCascade(ratio=ratio, random_state=RND_SEED)

assert_equal(bc.ratio, ratio)
assert_equal(bc.bootstrap, True)
assert_equal(bc.n_max_subset, None)
assert_equal(bc.random_state, RND_SEED)
assert_equal(bc.verbose, verbose)


def test_bc_fit_single_class():
Expand Down
4 changes: 1 addition & 3 deletions imblearn/ensemble/tests/test_easy_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,12 @@ def test_ee_init():

# Define a ratio
ratio = 1.
verbose = True
ee = EasyEnsemble(ratio=ratio, random_state=RND_SEED, verbose=verbose)
ee = EasyEnsemble(ratio=ratio, random_state=RND_SEED)

assert_equal(ee.ratio, ratio)
assert_equal(ee.replacement, False)
assert_equal(ee.n_subsets, 10)
assert_equal(ee.random_state, RND_SEED)
assert_equal(ee.verbose, verbose)


def test_ee_fit_single_class():
Expand Down
15 changes: 5 additions & 10 deletions imblearn/over_sampling/adasyn.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@ class ADASYN(SamplerMixin):
If None, the random number generator is the RandomState instance used
by np.random.
verbose : bool, optional (default=True)
Whether or not to print information about the processing.
k : int, optional (default=5)
Number of nearest neighbours to used to construct synthetic samples.
Expand Down Expand Up @@ -75,11 +72,9 @@ class ADASYN(SamplerMixin):
def __init__(self,
ratio='auto',
random_state=None,
verbose=True,
k=5,
n_jobs=1):
super(ADASYN, self).__init__(ratio=ratio,
verbose=verbose)
super(ADASYN, self).__init__(ratio=ratio)
self.random_state = random_state
self.k = k
self.n_jobs = n_jobs
Expand Down Expand Up @@ -125,8 +120,8 @@ def _sample(self, X, y):
X_min = X[y == self.min_c_]

# Print if verbose is true
if self.verbose:
print('Finding the {} nearest neighbours...'.format(self.k))
self.logger.debug('Finding the {} nearest neighbours...'.format(
self.k))

# Look for k-th nearest neighbours, excluding, of course, the
# point itself.
Expand Down Expand Up @@ -156,7 +151,7 @@ def _sample(self, X, y):
X_resampled = np.vstack((X_resampled, x_gen))
y_resampled = np.hstack((y_resampled, self.min_c_))

if self.verbose:
print("Over-sampling performed: {}".format(Counter(y_resampled)))
self.logger.info('Over-sampling performed: {}'.format(Counter(
y_resampled)))

return X_resampled, y_resampled
11 changes: 3 additions & 8 deletions imblearn/over_sampling/random_over_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@ class RandomOverSampler(SamplerMixin):
of samples in the minority class over the the number of samples
in the majority class.
verbose : bool, optional (default=True)
Whether or not to print information about the processing.
random_state : int, RandomState instance or None, optional (default=None)
If int, random_state is the seed used by the random number generator;
If RandomState instance, random_state is the random number generator;
Expand Down Expand Up @@ -57,11 +54,9 @@ class RandomOverSampler(SamplerMixin):

def __init__(self,
ratio='auto',
verbose=True,
random_state=None):

super(RandomOverSampler, self).__init__(ratio=ratio,
verbose=verbose)
super(RandomOverSampler, self).__init__(ratio=ratio)
self.random_state = random_state

def _sample(self, X, y):
Expand Down Expand Up @@ -119,7 +114,7 @@ def _sample(self, X, y):
y[y == key],
y[y == key][indx]), axis=0)

if self.verbose:
print("Over-sampling performed: {}".format(Counter(y_resampled)))
self.logger.info('Over-sampling performed: {}'.format(Counter(
y_resampled)))

return X_resampled, y_resampled
Loading

0 comments on commit 1c26708

Please sign in to comment.