Skip to content

Commit

Permalink
Finish the combine method
Browse files Browse the repository at this point in the history
  • Loading branch information
Guillaume Lemaitre committed Jun 29, 2016
1 parent 390e139 commit ef240ba
Show file tree
Hide file tree
Showing 10 changed files with 34 additions and 148 deletions.
86 changes: 10 additions & 76 deletions imblearn/combine/smote_enn.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@ class SMOTEENN(SamplerMixin):
number of samples in the minority class over the the number of
samples in the majority class.
random_state : int or None, optional (default=None)
Seed for random number generation.
random_state : int, RandomState instance or None, optional (default=None)
If int, random_state is the seed used by the random number generator;
If RandomState instance, random_state is the random number generator;
If None, the random number generator is the RandomState instance used
by np.random.
verbose : bool, optional (default=True)
Whether or not to print information about the processing.
Expand Down Expand Up @@ -60,15 +63,6 @@ class SMOTEENN(SamplerMixin):
Attributes
----------
ratio : str or float
If 'auto', the ratio will be defined automatically to balance
the dataset. Otherwise, the ratio is defined as the
number of samples in the minority class over the the number of
samples in the majority class.
random_state : int or None
Seed for random number generation.
min_c_ : str or int
The identifier of the minority class.
Expand Down Expand Up @@ -100,75 +94,21 @@ def __init__(self, ratio='auto', random_state=None, verbose=True,
k=5, m=10, out_step=0.5, kind_smote='regular',
size_ngh=3, kind_enn='all', n_jobs=-1, **kwargs):

"""Initialise the SMOTE ENN object.
Parameters
----------
ratio : str or float, optional (default='auto')
If 'auto', the ratio will be defined automatically to balance
the dataset. Otherwise, the ratio is defined as the
number of samples in the minority class over the the number of
samples in the majority class.
random_state : int or None, optional (default=None)
Seed for random number generation.
verbose : bool, optional (default=True)
Whether or not to print information about the processing.
k : int, optional (default=5)
Number of nearest neighbours to used to construct synthetic
samples.
m : int, optional (default=10)
Number of nearest neighbours to use to determine if a minority
sample is in danger.
out_step : float, optional (default=0.5)
Step size when extrapolating.
kind_smote : str, optional (default='regular')
The type of SMOTE algorithm to use one of the following
options: 'regular', 'borderline1', 'borderline2', 'svm'.
size_ngh : int, optional (default=3)
Size of the neighbourhood to consider to compute the average
distance to the minority point samples.
kind_sel : str, optional (default='all')
Strategy to use in order to exclude samples.
- If 'all', all neighbours will have to agree with the samples of
interest to not be excluded.
- If 'mode', the majority vote of the neighbours will be used in
order to exclude a sample.
n_jobs : int, optional (default=-1)
The number of threads to open if possible.
Returns
-------
None
"""
super(SMOTEENN, self).__init__(ratio=ratio, random_state=random_state,
super(SMOTEENN, self).__init__(ratio=ratio,
verbose=verbose)

self.random_state = random_state
self.k = k
self.m = m
self.out_step = out_step
self.kind_smote = kind_smote
self.size_ngh = size_ngh
self.kind_enn = kind_enn
self.n_jobs = n_jobs
self.kwargs = kwargs

self.sm = SMOTE(ratio=self.ratio, random_state=self.random_state,
verbose=self.verbose, k=self.k, m=self.m,
out_step=self.out_step, kind=self.kind_smote,
n_jobs=self.n_jobs, **self.kwargs)

self.size_ngh = size_ngh
self.kind_enn = kind_enn

self.enn = EditedNearestNeighbours(random_state=self.random_state,
verbose=self.verbose,
size_ngh=self.size_ngh,
Expand All @@ -192,8 +132,6 @@ def fit(self, X, y):
Return self.
"""
# Check the consistency of X and y
X, y = check_X_y(X, y)

super(SMOTEENN, self).fit(X, y)

Expand All @@ -202,7 +140,7 @@ def fit(self, X, y):

return self

def sample(self, X, y):
def _sample(self, X, y):
"""Resample the dataset.
Parameters
Expand All @@ -222,10 +160,6 @@ def sample(self, X, y):
The corresponding label of `X_resampled`
"""
# Check the consistency of X and y
X, y = check_X_y(X, y)

super(SMOTEENN, self).sample(X, y)

# Transform using SMOTE
X, y = self.sm.sample(X, y)
Expand Down
72 changes: 8 additions & 64 deletions imblearn/combine/smote_tomek.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@ class SMOTETomek(SamplerMixin):
number of samples in the minority class over the the number of
samples in the majority class.
random_state : int or None, optional (default=None)
Seed for random number generation.
random_state : int, RandomState instance or None, optional (default=None)
If int, random_state is the seed used by the random number generator;
If RandomState instance, random_state is the random number generator;
If None, the random number generator is the RandomState instance used
by np.random.
verbose : bool, optional (default=True)
Whether or not to print information about the processing.
Expand Down Expand Up @@ -61,15 +64,6 @@ class SMOTETomek(SamplerMixin):
Attributes
----------
ratio : str or float
If 'auto', the ratio will be defined automatically to balance
the dataset. Otherwise, the ratio is defined as the
number of samples in the minority class over the the number of
samples in the majority class.
random_state : int or None
Seed for random number generation.
min_c_ : str or int
The identifier of the minority class.
Expand Down Expand Up @@ -99,62 +93,18 @@ class SMOTETomek(SamplerMixin):
def __init__(self, ratio='auto', random_state=None, verbose=True,
k=5, m=10, out_step=0.5, kind_smote='regular',
n_jobs=-1, **kwargs):

"""Initialise the SMOTE Tomek links object.
Parameters
----------
ratio : str or float, optional (default='auto')
If 'auto', the ratio will be defined automatically to balance
the dataset. Otherwise, the ratio is defined as the
number of samples in the minority class over the the number of
samples in the majority class.
random_state : int or None, optional (default=None)
Seed for random number generation.
verbose : bool, optional (default=True)
Whether or not to print information about the processing.
k : int, optional (default=5)
Number of nearest neighbours to used to construct synthetic
samples.
m : int, optional (default=10)
Number of nearest neighbours to use to determine if a minority
sample is in danger.
out_step : float, optional (default=0.5)
Step size when extrapolating.
kind_smote : str, optional (default='regular')
The type of SMOTE algorithm to use one of the following
options: 'regular', 'borderline1', 'borderline2', 'svm'.
n_jobs : int, optional (default=-1)
Number of threads to run the algorithm when it is possible.
Returns
-------
None
"""
super(SMOTETomek, self).__init__(ratio=ratio,
random_state=random_state,
verbose=verbose)

super(SMOTETomek, self).__init__(ratio=ratio, verbose=verbose)
self.random_state = random_state
self.k = k
self.m = m
self.out_step = out_step
self.kind_smote = kind_smote
self.n_jobs = n_jobs
self.kwargs = kwargs

self.sm = SMOTE(ratio=self.ratio, random_state=self.random_state,
verbose=self.verbose, k=self.k, m=self.m,
out_step=self.out_step, kind=self.kind_smote,
n_jobs=self.n_jobs, **self.kwargs)

self.tomek = TomekLinks(random_state=self.random_state,
verbose=self.verbose)

Expand All @@ -175,8 +125,6 @@ def fit(self, X, y):
Return self.
"""
# Check the consistency of X and y
X, y = check_X_y(X, y)

super(SMOTETomek, self).fit(X, y)

Expand All @@ -185,7 +133,7 @@ def fit(self, X, y):

return self

def sample(self, X, y):
def _sample(self, X, y):
"""Resample the dataset.
Parameters
Expand All @@ -205,10 +153,6 @@ def sample(self, X, y):
The corresponding label of `X_resampled`
"""
# Check the consistency of X and y
X, y = check_X_y(X, y)

super(SMOTETomek, self).sample(X, y)

# Transform using SMOTE
X, y = self.sm.sample(X, y)
Expand Down
Binary file modified imblearn/combine/tests/data/smote_enn_reg_x.npy
Binary file not shown.
Binary file modified imblearn/combine/tests/data/smote_enn_reg_x_05.npy
Binary file not shown.
Binary file modified imblearn/combine/tests/data/smote_enn_reg_y.npy
Binary file not shown.
Binary file modified imblearn/combine/tests/data/smote_enn_reg_y_05.npy
Binary file not shown.
Binary file modified imblearn/combine/tests/data/smote_tomek_reg_x.npy
Binary file not shown.
Binary file modified imblearn/combine/tests/data/smote_tomek_reg_x_05.npy
Binary file not shown.
12 changes: 8 additions & 4 deletions imblearn/combine/tests/test_smote_enn.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,23 @@ def test_senn_bad_ratio():

# Define a negative ratio
ratio = -1.0
assert_raises(ValueError, SMOTEENN, ratio=ratio)
smote = SMOTEENN(ratio=ratio)
assert_raises(ValueError, smote.fit, X, Y)

# Define a ratio greater than 1
ratio = 100.0
assert_raises(ValueError, SMOTEENN, ratio=ratio)
smote = SMOTEENN(ratio=ratio)
assert_raises(ValueError, smote.fit, X, Y)

# Define ratio as an unknown string
ratio = 'rnd'
assert_raises(ValueError, SMOTEENN, ratio=ratio)
smote = SMOTEENN(ratio=ratio)
assert_raises(ValueError, smote.fit, X, Y)

# Define ratio as a list which is not supported
ratio = [.5, .5]
assert_raises(ValueError, SMOTEENN, ratio=ratio)
smote = SMOTEENN(ratio=ratio)
assert_raises(ValueError, smote.fit, X, Y)


def test_smote_fit_single_class():
Expand Down
12 changes: 8 additions & 4 deletions imblearn/combine/tests/test_smote_tomek.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,23 @@ def test_smote_bad_ratio():

# Define a negative ratio
ratio = -1.0
assert_raises(ValueError, SMOTETomek, ratio=ratio)
smote = SMOTETomek(ratio=ratio)
assert_raises(ValueError, smote.fit, X, Y)

# Define a ratio greater than 1
ratio = 100.0
assert_raises(ValueError, SMOTETomek, ratio=ratio)
smote = SMOTETomek(ratio=ratio)
assert_raises(ValueError, smote.fit, X, Y)

# Define ratio as an unknown string
ratio = 'rnd'
assert_raises(ValueError, SMOTETomek, ratio=ratio)
smote = SMOTETomek(ratio=ratio)
assert_raises(ValueError, smote.fit, X, Y)

# Define ratio as a list which is not supported
ratio = [.5, .5]
assert_raises(ValueError, SMOTETomek, ratio=ratio)
smote = SMOTETomek(ratio=ratio)
assert_raises(ValueError, smote.fit, X, Y)


def test_smote_fit_single_class():
Expand Down

0 comments on commit ef240ba

Please sign in to comment.