Finish under-sampling tests

glemaitre · May 27, 2016 · 921877f · 921877f
1 parent cecad38
commit 921877f
Show file tree

Hide file tree

Showing 25 changed files with 534 additions and 14 deletions.
diff --git a/unbalanced_dataset/under_sampling/edited_nearest_neighbours.py b/unbalanced_dataset/under_sampling/edited_nearest_neighbours.py
@@ -125,7 +125,7 @@ def __init__(self, return_indices=False, random_state=None, verbose=True,
         self.size_ngh = size_ngh
         possible_kind_sel = ('all', 'mode')
         if kind_sel not in possible_kind_sel:
-            raise ValueError('Unknown kind_sel parameters.')
+            raise NotImplementedError
         else:
             self.kind_sel = kind_sel
         self.n_jobs = n_jobs
@@ -227,6 +227,8 @@ def transform(self, X, y):
             elif self.kind_sel == 'all':
                 nnhood_label = (nnhood_label == key)
                 nnhood_bool = np.all(nnhood_label, axis=1)
+            else:
+                raise NotImplementedError
 
             # Get the samples which agree all together
             sel_x = np.squeeze(sub_samples_x[np.nonzero(nnhood_bool), :])

diff --git a/unbalanced_dataset/under_sampling/tests/data/enn_idx.npy b/unbalanced_dataset/under_sampling/tests/data/enn_idx.npy
diff --git a/unbalanced_dataset/under_sampling/tests/data/enn_x.npy b/unbalanced_dataset/under_sampling/tests/data/enn_x.npy
diff --git a/unbalanced_dataset/under_sampling/tests/data/enn_x_mode.npy b/unbalanced_dataset/under_sampling/tests/data/enn_x_mode.npy
diff --git a/unbalanced_dataset/under_sampling/tests/data/enn_y.npy b/unbalanced_dataset/under_sampling/tests/data/enn_y.npy
diff --git a/unbalanced_dataset/under_sampling/tests/data/enn_y_mode.npy b/unbalanced_dataset/under_sampling/tests/data/enn_y_mode.npy
diff --git a/unbalanced_dataset/under_sampling/tests/data/ncr_idx.npy b/unbalanced_dataset/under_sampling/tests/data/ncr_idx.npy
diff --git a/unbalanced_dataset/under_sampling/tests/data/ncr_x.npy b/unbalanced_dataset/under_sampling/tests/data/ncr_x.npy
diff --git a/unbalanced_dataset/under_sampling/tests/data/ncr_y.npy b/unbalanced_dataset/under_sampling/tests/data/ncr_y.npy
diff --git a/unbalanced_dataset/under_sampling/tests/data/oss_idx.npy b/unbalanced_dataset/under_sampling/tests/data/oss_idx.npy
diff --git a/unbalanced_dataset/under_sampling/tests/data/oss_x.npy b/unbalanced_dataset/under_sampling/tests/data/oss_x.npy
diff --git a/unbalanced_dataset/under_sampling/tests/data/oss_y.npy b/unbalanced_dataset/under_sampling/tests/data/oss_y.npy
diff --git a/unbalanced_dataset/under_sampling/tests/data/rus_idx.npy b/unbalanced_dataset/under_sampling/tests/data/rus_idx.npy
diff --git a/unbalanced_dataset/under_sampling/tests/data/rus_x.npy b/unbalanced_dataset/under_sampling/tests/data/rus_x.npy
diff --git a/unbalanced_dataset/under_sampling/tests/data/rus_x_05.npy b/unbalanced_dataset/under_sampling/tests/data/rus_x_05.npy
diff --git a/unbalanced_dataset/under_sampling/tests/data/rus_y.npy b/unbalanced_dataset/under_sampling/tests/data/rus_y.npy
diff --git a/unbalanced_dataset/under_sampling/tests/data/rus_y_05.npy b/unbalanced_dataset/under_sampling/tests/data/rus_y_05.npy
diff --git a/unbalanced_dataset/under_sampling/tests/data/tl_idx.npy b/unbalanced_dataset/under_sampling/tests/data/tl_idx.npy
diff --git a/unbalanced_dataset/under_sampling/tests/data/tl_x.npy b/unbalanced_dataset/under_sampling/tests/data/tl_x.npy
diff --git a/unbalanced_dataset/under_sampling/tests/data/tl_y.npy b/unbalanced_dataset/under_sampling/tests/data/tl_y.npy
diff --git a/unbalanced_dataset/under_sampling/tests/test_edited_nearest_neighbours.py b/unbalanced_dataset/under_sampling/tests/test_edited_nearest_neighbours.py
@@ -0,0 +1,118 @@
+"""Test the module condensed nearest neighbour."""
+from __future__ import print_function
+
+import os
+
+import numpy as np
+from numpy.testing import assert_raises
+from numpy.testing import assert_equal
+from numpy.testing import assert_array_equal
+
+from sklearn.datasets import make_classification
+
+from unbalanced_dataset.under_sampling import EditedNearestNeighbours
+
+# Generate a global dataset to use
+RND_SEED = 0
+X, Y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9],
+                           n_informative=3, n_redundant=1, flip_y=0,
+                           n_features=20, n_clusters_per_class=1,
+                           n_samples=5000, random_state=RND_SEED)
+
+
+def test_enn_init():
+    """Test the initialisation of the object"""
+
+    # Define a ratio
+    verbose = True
+    enn = EditedNearestNeighbours(random_state=RND_SEED, verbose=verbose)
+
+    assert_equal(enn.size_ngh, 3)
+    assert_equal(enn.kind_sel, 'all')
+    assert_equal(enn.n_jobs, -1)
+    assert_equal(enn.rs_, RND_SEED)
+    assert_equal(enn.verbose, verbose)
+    assert_equal(enn.min_c_, None)
+    assert_equal(enn.maj_c_, None)
+    assert_equal(enn.stats_c_, {})
+
+
+def test_enn_fit_single_class():
+    """Test either if an error when there is a single class"""
+
+    # Create the object
+    enn = EditedNearestNeighbours(random_state=RND_SEED)
+    # Resample the data
+    # Create a wrong y
+    y_single_class = np.zeros((X.shape[0], ))
+    assert_raises(RuntimeError, enn.fit, X, y_single_class)
+
+
+def test_enn_fit():
+    """Test the fitting method"""
+
+    # Create the object
+    enn = EditedNearestNeighbours(random_state=RND_SEED)
+    # Fit the data
+    enn.fit(X, Y)
+
+    # Check if the data information have been computed
+    assert_equal(enn.min_c_, 0)
+    assert_equal(enn.maj_c_, 1)
+    assert_equal(enn.stats_c_[0], 500)
+    assert_equal(enn.stats_c_[1], 4500)
+
+
+def test_enn_transform_wt_fit():
+    """Test either if an error is raised when transform is called before
+    fitting"""
+
+    # Create the object
+    enn = EditedNearestNeighbours(random_state=RND_SEED)
+    assert_raises(RuntimeError, enn.transform, X, Y)
+
+
+def test_enn_fit_transform():
+    """Test the fit transform routine"""
+
+    # Resample the data
+    enn = EditedNearestNeighbours(random_state=RND_SEED)
+    X_resampled, y_resampled = enn.fit_transform(X, Y)
+
+    currdir = os.path.dirname(os.path.abspath(__file__))
+    X_gt = np.load(os.path.join(currdir, 'data', 'enn_x.npy'))
+    y_gt = np.load(os.path.join(currdir, 'data', 'enn_y.npy'))
+    assert_array_equal(X_resampled, X_gt)
+    assert_array_equal(y_resampled, y_gt)
+
+
+def test_enn_fit_transform_with_indices():
+    """Test the fit transform routine with indices support"""
+
+    # Resample the data
+    enn = EditedNearestNeighbours(return_indices=True, random_state=RND_SEED)
+    X_resampled, y_resampled, idx_under = enn.fit_transform(X, Y)
+
+    currdir = os.path.dirname(os.path.abspath(__file__))
+    X_gt = np.load(os.path.join(currdir, 'data', 'enn_x.npy'))
+    y_gt = np.load(os.path.join(currdir, 'data', 'enn_y.npy'))
+    idx_gt = np.load(os.path.join(currdir, 'data', 'enn_idx.npy'))
+    assert_array_equal(X_resampled, X_gt)
+    assert_array_equal(y_resampled, y_gt)
+    assert_array_equal(idx_under, idx_gt)
+
+
+def test_enn_fit_transform_mode():
+    """Test the fit transform routine using the mode as selection"""
+
+    # Resample the data
+    enn = EditedNearestNeighbours(random_state=RND_SEED, kind_sel='mode')
+    X_resampled, y_resampled = enn.fit_transform(X, Y)
+
+    currdir = os.path.dirname(os.path.abspath(__file__))
+    np.save(os.path.join(currdir, 'data', 'enn_x_mode.npy'), X_resampled)
+    np.save(os.path.join(currdir, 'data', 'enn_y_mode.npy'), y_resampled)
+    X_gt = np.load(os.path.join(currdir, 'data', 'enn_x_mode.npy'))
+    y_gt = np.load(os.path.join(currdir, 'data', 'enn_y_mode.npy'))
+    assert_array_equal(X_resampled, X_gt)
+    assert_array_equal(y_resampled, y_gt)
diff --git a/unbalanced_dataset/under_sampling/tests/test_neighbourhood_cleaning_rule.py b/unbalanced_dataset/under_sampling/tests/test_neighbourhood_cleaning_rule.py
@@ -1,10 +1,101 @@
 """Test the module neighbourhood cleaning rule."""
 from __future__ import print_function
 
+import os
+
+import numpy as np
+from numpy.testing import assert_raises
+from numpy.testing import assert_equal
+from numpy.testing import assert_array_equal
+
+from sklearn.datasets import make_classification
+
 from unbalanced_dataset.under_sampling import NeighbourhoodCleaningRule
 
+# Generate a global dataset to use
+RND_SEED = 0
+X, Y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9],
+                           n_informative=3, n_redundant=1, flip_y=0,
+                           n_features=20, n_clusters_per_class=1,
+                           n_samples=5000, random_state=RND_SEED)
+
+
+def test_ncr_init():
+    """Test the initialisation of the object"""
+
+    # Define a ratio
+    verbose = True
+    ncr = NeighbourhoodCleaningRule(random_state=RND_SEED, verbose=verbose)
+
+    assert_equal(ncr.size_ngh, 3)
+    assert_equal(ncr.n_jobs, -1)
+    assert_equal(ncr.rs_, RND_SEED)
+    assert_equal(ncr.verbose, verbose)
+    assert_equal(ncr.min_c_, None)
+    assert_equal(ncr.maj_c_, None)
+    assert_equal(ncr.stats_c_, {})
+
+
+def test_ncr_fit_single_class():
+    """Test either if an error when there is a single class"""
+
+    # Create the object
+    ncr = NeighbourhoodCleaningRule(random_state=RND_SEED)
+    # Resample the data
+    # Create a wrong y
+    y_single_class = np.zeros((X.shape[0], ))
+    assert_raises(RuntimeError, ncr.fit, X, y_single_class)
+
+
+def test_ncr_fit():
+    """Test the fitting method"""
+
+    # Create the object
+    ncr = NeighbourhoodCleaningRule(random_state=RND_SEED)
+    # Fit the data
+    ncr.fit(X, Y)
+
+    # Check if the data information have been computed
+    assert_equal(ncr.min_c_, 0)
+    assert_equal(ncr.maj_c_, 1)
+    assert_equal(ncr.stats_c_[0], 500)
+    assert_equal(ncr.stats_c_[1], 4500)
+
+
+def test_ncr_transform_wt_fit():
+    """Test either if an error is raised when transform is called before
+    fitting"""
+
+    # Create the object
+    ncr = NeighbourhoodCleaningRule(random_state=RND_SEED)
+    assert_raises(RuntimeError, ncr.transform, X, Y)
+
+
+def test_ncr_fit_transform():
+    """Test the fit transform routine"""
+
+    # Resample the data
+    ncr = NeighbourhoodCleaningRule(random_state=RND_SEED)
+    X_resampled, y_resampled = ncr.fit_transform(X, Y)
+
+    currdir = os.path.dirname(os.path.abspath(__file__))
+    X_gt = np.load(os.path.join(currdir, 'data', 'ncr_x.npy'))
+    y_gt = np.load(os.path.join(currdir, 'data', 'ncr_y.npy'))
+    assert_array_equal(X_resampled, X_gt)
+    assert_array_equal(y_resampled, y_gt)
+
+
+def test_ncr_fit_transform_with_indices():
+    """Test the fit transform routine with indices support"""
 
-def test_neighbourhood_cleaning_rule():
-    """Test the neighbourhood cleaning rule function."""
+    # Resample the data
+    ncr = NeighbourhoodCleaningRule(return_indices=True, random_state=RND_SEED)
+    X_resampled, y_resampled, idx_under = ncr.fit_transform(X, Y)
 
-    print('Test Neighbourhood Cleaning Rule')
+    currdir = os.path.dirname(os.path.abspath(__file__))
+    X_gt = np.load(os.path.join(currdir, 'data', 'ncr_x.npy'))
+    y_gt = np.load(os.path.join(currdir, 'data', 'ncr_y.npy'))
+    idx_gt = np.load(os.path.join(currdir, 'data', 'ncr_idx.npy'))
+    assert_array_equal(X_resampled, X_gt)
+    assert_array_equal(y_resampled, y_gt)
+    assert_array_equal(idx_under, idx_gt)
diff --git a/unbalanced_dataset/under_sampling/tests/test_one_sided_selection.py b/unbalanced_dataset/under_sampling/tests/test_one_sided_selection.py
@@ -1,10 +1,102 @@
-"""Test the module one sided selection."""
+"""Test the module one-sided selection."""
 from __future__ import print_function
 
+import os
+
+import numpy as np
+from numpy.testing import assert_raises
+from numpy.testing import assert_equal
+from numpy.testing import assert_array_equal
+
+from sklearn.datasets import make_classification
+
 from unbalanced_dataset.under_sampling import OneSidedSelection
 
+# Generate a global dataset to use
+RND_SEED = 0
+X, Y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9],
+                           n_informative=3, n_redundant=1, flip_y=0,
+                           n_features=20, n_clusters_per_class=1,
+                           n_samples=5000, random_state=RND_SEED)
+
+
+def test_oss_init():
+    """Test the initialisation of the object"""
+
+    # Define a ratio
+    verbose = True
+    oss = OneSidedSelection(random_state=RND_SEED, verbose=verbose)
+
+    assert_equal(oss.size_ngh, 1)
+    assert_equal(oss.n_seeds_S, 1)
+    assert_equal(oss.n_jobs, -1)
+    assert_equal(oss.rs_, RND_SEED)
+    assert_equal(oss.verbose, verbose)
+    assert_equal(oss.min_c_, None)
+    assert_equal(oss.maj_c_, None)
+    assert_equal(oss.stats_c_, {})
+
+
+def test_oss_fit_single_class():
+    """Test either if an error when there is a single class"""
+
+    # Create the object
+    oss = OneSidedSelection(random_state=RND_SEED)
+    # Resample the data
+    # Create a wrong y
+    y_single_class = np.zeros((X.shape[0], ))
+    assert_raises(RuntimeError, oss.fit, X, y_single_class)
+
+
+def test_oss_fit():
+    """Test the fitting method"""
+
+    # Create the object
+    oss = OneSidedSelection(random_state=RND_SEED)
+    # Fit the data
+    oss.fit(X, Y)
+
+    # Check if the data information have been computed
+    assert_equal(oss.min_c_, 0)
+    assert_equal(oss.maj_c_, 1)
+    assert_equal(oss.stats_c_[0], 500)
+    assert_equal(oss.stats_c_[1], 4500)
+
+
+def test_oss_transform_wt_fit():
+    """Test either if an error is raised when transform is called before
+    fitting"""
+
+    # Create the object
+    oss = OneSidedSelection(random_state=RND_SEED)
+    assert_raises(RuntimeError, oss.transform, X, Y)
+
+
+def test_oss_fit_transform():
+    """Test the fit transform routine"""
+
+    # Resample the data
+    oss = OneSidedSelection(random_state=RND_SEED)
+    X_resampled, y_resampled = oss.fit_transform(X, Y)
+
+    currdir = os.path.dirname(os.path.abspath(__file__))
+    X_gt = np.load(os.path.join(currdir, 'data', 'oss_x.npy'))
+    y_gt = np.load(os.path.join(currdir, 'data', 'oss_y.npy'))
+    assert_array_equal(X_resampled, X_gt)
+    assert_array_equal(y_resampled, y_gt)
+
+
+def test_oss_fit_transform_with_indices():
+    """Test the fit transform routine with indices support"""
 
-def test_one_sided_selection():
-    """Test the one sided selection function."""
+    # Resample the data
+    oss = OneSidedSelection(return_indices=True, random_state=RND_SEED)
+    X_resampled, y_resampled, idx_under = oss.fit_transform(X, Y)
 
-    print('Test One Sided Selection')
+    currdir = os.path.dirname(os.path.abspath(__file__))
+    X_gt = np.load(os.path.join(currdir, 'data', 'oss_x.npy'))
+    y_gt = np.load(os.path.join(currdir, 'data', 'oss_y.npy'))
+    idx_gt = np.load(os.path.join(currdir, 'data', 'oss_idx.npy'))
+    assert_array_equal(X_resampled, X_gt)
+    assert_array_equal(y_resampled, y_gt)
+    assert_array_equal(idx_under, idx_gt)