forked from scikit-learn-contrib/imbalanced-learn
-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Guillaume Lemaitre
committed
May 27, 2016
1 parent
cecad38
commit 921877f
Showing
25 changed files
with
534 additions
and
14 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
118 changes: 118 additions & 0 deletions
118
unbalanced_dataset/under_sampling/tests/test_edited_nearest_neighbours.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
"""Test the module condensed nearest neighbour.""" | ||
from __future__ import print_function | ||
|
||
import os | ||
|
||
import numpy as np | ||
from numpy.testing import assert_raises | ||
from numpy.testing import assert_equal | ||
from numpy.testing import assert_array_equal | ||
|
||
from sklearn.datasets import make_classification | ||
|
||
from unbalanced_dataset.under_sampling import EditedNearestNeighbours | ||
|
||
# Generate a global dataset to use | ||
RND_SEED = 0 | ||
X, Y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9], | ||
n_informative=3, n_redundant=1, flip_y=0, | ||
n_features=20, n_clusters_per_class=1, | ||
n_samples=5000, random_state=RND_SEED) | ||
|
||
|
||
def test_enn_init(): | ||
"""Test the initialisation of the object""" | ||
|
||
# Define a ratio | ||
verbose = True | ||
enn = EditedNearestNeighbours(random_state=RND_SEED, verbose=verbose) | ||
|
||
assert_equal(enn.size_ngh, 3) | ||
assert_equal(enn.kind_sel, 'all') | ||
assert_equal(enn.n_jobs, -1) | ||
assert_equal(enn.rs_, RND_SEED) | ||
assert_equal(enn.verbose, verbose) | ||
assert_equal(enn.min_c_, None) | ||
assert_equal(enn.maj_c_, None) | ||
assert_equal(enn.stats_c_, {}) | ||
|
||
|
||
def test_enn_fit_single_class(): | ||
"""Test either if an error when there is a single class""" | ||
|
||
# Create the object | ||
enn = EditedNearestNeighbours(random_state=RND_SEED) | ||
# Resample the data | ||
# Create a wrong y | ||
y_single_class = np.zeros((X.shape[0], )) | ||
assert_raises(RuntimeError, enn.fit, X, y_single_class) | ||
|
||
|
||
def test_enn_fit(): | ||
"""Test the fitting method""" | ||
|
||
# Create the object | ||
enn = EditedNearestNeighbours(random_state=RND_SEED) | ||
# Fit the data | ||
enn.fit(X, Y) | ||
|
||
# Check if the data information have been computed | ||
assert_equal(enn.min_c_, 0) | ||
assert_equal(enn.maj_c_, 1) | ||
assert_equal(enn.stats_c_[0], 500) | ||
assert_equal(enn.stats_c_[1], 4500) | ||
|
||
|
||
def test_enn_transform_wt_fit(): | ||
"""Test either if an error is raised when transform is called before | ||
fitting""" | ||
|
||
# Create the object | ||
enn = EditedNearestNeighbours(random_state=RND_SEED) | ||
assert_raises(RuntimeError, enn.transform, X, Y) | ||
|
||
|
||
def test_enn_fit_transform(): | ||
"""Test the fit transform routine""" | ||
|
||
# Resample the data | ||
enn = EditedNearestNeighbours(random_state=RND_SEED) | ||
X_resampled, y_resampled = enn.fit_transform(X, Y) | ||
|
||
currdir = os.path.dirname(os.path.abspath(__file__)) | ||
X_gt = np.load(os.path.join(currdir, 'data', 'enn_x.npy')) | ||
y_gt = np.load(os.path.join(currdir, 'data', 'enn_y.npy')) | ||
assert_array_equal(X_resampled, X_gt) | ||
assert_array_equal(y_resampled, y_gt) | ||
|
||
|
||
def test_enn_fit_transform_with_indices(): | ||
"""Test the fit transform routine with indices support""" | ||
|
||
# Resample the data | ||
enn = EditedNearestNeighbours(return_indices=True, random_state=RND_SEED) | ||
X_resampled, y_resampled, idx_under = enn.fit_transform(X, Y) | ||
|
||
currdir = os.path.dirname(os.path.abspath(__file__)) | ||
X_gt = np.load(os.path.join(currdir, 'data', 'enn_x.npy')) | ||
y_gt = np.load(os.path.join(currdir, 'data', 'enn_y.npy')) | ||
idx_gt = np.load(os.path.join(currdir, 'data', 'enn_idx.npy')) | ||
assert_array_equal(X_resampled, X_gt) | ||
assert_array_equal(y_resampled, y_gt) | ||
assert_array_equal(idx_under, idx_gt) | ||
|
||
|
||
def test_enn_fit_transform_mode(): | ||
"""Test the fit transform routine using the mode as selection""" | ||
|
||
# Resample the data | ||
enn = EditedNearestNeighbours(random_state=RND_SEED, kind_sel='mode') | ||
X_resampled, y_resampled = enn.fit_transform(X, Y) | ||
|
||
currdir = os.path.dirname(os.path.abspath(__file__)) | ||
np.save(os.path.join(currdir, 'data', 'enn_x_mode.npy'), X_resampled) | ||
np.save(os.path.join(currdir, 'data', 'enn_y_mode.npy'), y_resampled) | ||
X_gt = np.load(os.path.join(currdir, 'data', 'enn_x_mode.npy')) | ||
y_gt = np.load(os.path.join(currdir, 'data', 'enn_y_mode.npy')) | ||
assert_array_equal(X_resampled, X_gt) | ||
assert_array_equal(y_resampled, y_gt) |
97 changes: 94 additions & 3 deletions
97
unbalanced_dataset/under_sampling/tests/test_neighbourhood_cleaning_rule.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,101 @@ | ||
"""Test the module neighbourhood cleaning rule.""" | ||
from __future__ import print_function | ||
|
||
import os | ||
|
||
import numpy as np | ||
from numpy.testing import assert_raises | ||
from numpy.testing import assert_equal | ||
from numpy.testing import assert_array_equal | ||
|
||
from sklearn.datasets import make_classification | ||
|
||
from unbalanced_dataset.under_sampling import NeighbourhoodCleaningRule | ||
|
||
# Generate a global dataset to use | ||
RND_SEED = 0 | ||
X, Y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9], | ||
n_informative=3, n_redundant=1, flip_y=0, | ||
n_features=20, n_clusters_per_class=1, | ||
n_samples=5000, random_state=RND_SEED) | ||
|
||
|
||
def test_ncr_init(): | ||
"""Test the initialisation of the object""" | ||
|
||
# Define a ratio | ||
verbose = True | ||
ncr = NeighbourhoodCleaningRule(random_state=RND_SEED, verbose=verbose) | ||
|
||
assert_equal(ncr.size_ngh, 3) | ||
assert_equal(ncr.n_jobs, -1) | ||
assert_equal(ncr.rs_, RND_SEED) | ||
assert_equal(ncr.verbose, verbose) | ||
assert_equal(ncr.min_c_, None) | ||
assert_equal(ncr.maj_c_, None) | ||
assert_equal(ncr.stats_c_, {}) | ||
|
||
|
||
def test_ncr_fit_single_class(): | ||
"""Test either if an error when there is a single class""" | ||
|
||
# Create the object | ||
ncr = NeighbourhoodCleaningRule(random_state=RND_SEED) | ||
# Resample the data | ||
# Create a wrong y | ||
y_single_class = np.zeros((X.shape[0], )) | ||
assert_raises(RuntimeError, ncr.fit, X, y_single_class) | ||
|
||
|
||
def test_ncr_fit(): | ||
"""Test the fitting method""" | ||
|
||
# Create the object | ||
ncr = NeighbourhoodCleaningRule(random_state=RND_SEED) | ||
# Fit the data | ||
ncr.fit(X, Y) | ||
|
||
# Check if the data information have been computed | ||
assert_equal(ncr.min_c_, 0) | ||
assert_equal(ncr.maj_c_, 1) | ||
assert_equal(ncr.stats_c_[0], 500) | ||
assert_equal(ncr.stats_c_[1], 4500) | ||
|
||
|
||
def test_ncr_transform_wt_fit(): | ||
"""Test either if an error is raised when transform is called before | ||
fitting""" | ||
|
||
# Create the object | ||
ncr = NeighbourhoodCleaningRule(random_state=RND_SEED) | ||
assert_raises(RuntimeError, ncr.transform, X, Y) | ||
|
||
|
||
def test_ncr_fit_transform(): | ||
"""Test the fit transform routine""" | ||
|
||
# Resample the data | ||
ncr = NeighbourhoodCleaningRule(random_state=RND_SEED) | ||
X_resampled, y_resampled = ncr.fit_transform(X, Y) | ||
|
||
currdir = os.path.dirname(os.path.abspath(__file__)) | ||
X_gt = np.load(os.path.join(currdir, 'data', 'ncr_x.npy')) | ||
y_gt = np.load(os.path.join(currdir, 'data', 'ncr_y.npy')) | ||
assert_array_equal(X_resampled, X_gt) | ||
assert_array_equal(y_resampled, y_gt) | ||
|
||
|
||
def test_ncr_fit_transform_with_indices(): | ||
"""Test the fit transform routine with indices support""" | ||
|
||
def test_neighbourhood_cleaning_rule(): | ||
"""Test the neighbourhood cleaning rule function.""" | ||
# Resample the data | ||
ncr = NeighbourhoodCleaningRule(return_indices=True, random_state=RND_SEED) | ||
X_resampled, y_resampled, idx_under = ncr.fit_transform(X, Y) | ||
|
||
print('Test Neighbourhood Cleaning Rule') | ||
currdir = os.path.dirname(os.path.abspath(__file__)) | ||
X_gt = np.load(os.path.join(currdir, 'data', 'ncr_x.npy')) | ||
y_gt = np.load(os.path.join(currdir, 'data', 'ncr_y.npy')) | ||
idx_gt = np.load(os.path.join(currdir, 'data', 'ncr_idx.npy')) | ||
assert_array_equal(X_resampled, X_gt) | ||
assert_array_equal(y_resampled, y_gt) | ||
assert_array_equal(idx_under, idx_gt) |
100 changes: 96 additions & 4 deletions
100
unbalanced_dataset/under_sampling/tests/test_one_sided_selection.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,102 @@ | ||
"""Test the module one sided selection.""" | ||
"""Test the module one-sided selection.""" | ||
from __future__ import print_function | ||
|
||
import os | ||
|
||
import numpy as np | ||
from numpy.testing import assert_raises | ||
from numpy.testing import assert_equal | ||
from numpy.testing import assert_array_equal | ||
|
||
from sklearn.datasets import make_classification | ||
|
||
from unbalanced_dataset.under_sampling import OneSidedSelection | ||
|
||
# Generate a global dataset to use | ||
RND_SEED = 0 | ||
X, Y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9], | ||
n_informative=3, n_redundant=1, flip_y=0, | ||
n_features=20, n_clusters_per_class=1, | ||
n_samples=5000, random_state=RND_SEED) | ||
|
||
|
||
def test_oss_init(): | ||
"""Test the initialisation of the object""" | ||
|
||
# Define a ratio | ||
verbose = True | ||
oss = OneSidedSelection(random_state=RND_SEED, verbose=verbose) | ||
|
||
assert_equal(oss.size_ngh, 1) | ||
assert_equal(oss.n_seeds_S, 1) | ||
assert_equal(oss.n_jobs, -1) | ||
assert_equal(oss.rs_, RND_SEED) | ||
assert_equal(oss.verbose, verbose) | ||
assert_equal(oss.min_c_, None) | ||
assert_equal(oss.maj_c_, None) | ||
assert_equal(oss.stats_c_, {}) | ||
|
||
|
||
def test_oss_fit_single_class(): | ||
"""Test either if an error when there is a single class""" | ||
|
||
# Create the object | ||
oss = OneSidedSelection(random_state=RND_SEED) | ||
# Resample the data | ||
# Create a wrong y | ||
y_single_class = np.zeros((X.shape[0], )) | ||
assert_raises(RuntimeError, oss.fit, X, y_single_class) | ||
|
||
|
||
def test_oss_fit(): | ||
"""Test the fitting method""" | ||
|
||
# Create the object | ||
oss = OneSidedSelection(random_state=RND_SEED) | ||
# Fit the data | ||
oss.fit(X, Y) | ||
|
||
# Check if the data information have been computed | ||
assert_equal(oss.min_c_, 0) | ||
assert_equal(oss.maj_c_, 1) | ||
assert_equal(oss.stats_c_[0], 500) | ||
assert_equal(oss.stats_c_[1], 4500) | ||
|
||
|
||
def test_oss_transform_wt_fit(): | ||
"""Test either if an error is raised when transform is called before | ||
fitting""" | ||
|
||
# Create the object | ||
oss = OneSidedSelection(random_state=RND_SEED) | ||
assert_raises(RuntimeError, oss.transform, X, Y) | ||
|
||
|
||
def test_oss_fit_transform(): | ||
"""Test the fit transform routine""" | ||
|
||
# Resample the data | ||
oss = OneSidedSelection(random_state=RND_SEED) | ||
X_resampled, y_resampled = oss.fit_transform(X, Y) | ||
|
||
currdir = os.path.dirname(os.path.abspath(__file__)) | ||
X_gt = np.load(os.path.join(currdir, 'data', 'oss_x.npy')) | ||
y_gt = np.load(os.path.join(currdir, 'data', 'oss_y.npy')) | ||
assert_array_equal(X_resampled, X_gt) | ||
assert_array_equal(y_resampled, y_gt) | ||
|
||
|
||
def test_oss_fit_transform_with_indices(): | ||
"""Test the fit transform routine with indices support""" | ||
|
||
def test_one_sided_selection(): | ||
"""Test the one sided selection function.""" | ||
# Resample the data | ||
oss = OneSidedSelection(return_indices=True, random_state=RND_SEED) | ||
X_resampled, y_resampled, idx_under = oss.fit_transform(X, Y) | ||
|
||
print('Test One Sided Selection') | ||
currdir = os.path.dirname(os.path.abspath(__file__)) | ||
X_gt = np.load(os.path.join(currdir, 'data', 'oss_x.npy')) | ||
y_gt = np.load(os.path.join(currdir, 'data', 'oss_y.npy')) | ||
idx_gt = np.load(os.path.join(currdir, 'data', 'oss_idx.npy')) | ||
assert_array_equal(X_resampled, X_gt) | ||
assert_array_equal(y_resampled, y_gt) | ||
assert_array_equal(idx_under, idx_gt) |
Oops, something went wrong.