Skip to content

Commit

Permalink
Finish under-sampling tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Guillaume Lemaitre committed May 27, 2016
1 parent cecad38 commit 921877f
Show file tree
Hide file tree
Showing 25 changed files with 534 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def __init__(self, return_indices=False, random_state=None, verbose=True,
self.size_ngh = size_ngh
possible_kind_sel = ('all', 'mode')
if kind_sel not in possible_kind_sel:
raise ValueError('Unknown kind_sel parameters.')
raise NotImplementedError
else:
self.kind_sel = kind_sel
self.n_jobs = n_jobs
Expand Down Expand Up @@ -227,6 +227,8 @@ def transform(self, X, y):
elif self.kind_sel == 'all':
nnhood_label = (nnhood_label == key)
nnhood_bool = np.all(nnhood_label, axis=1)
else:
raise NotImplementedError

# Get the samples which agree all together
sel_x = np.squeeze(sub_samples_x[np.nonzero(nnhood_bool), :])
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""Test the module condensed nearest neighbour."""
from __future__ import print_function

import os

import numpy as np
from numpy.testing import assert_raises
from numpy.testing import assert_equal
from numpy.testing import assert_array_equal

from sklearn.datasets import make_classification

from unbalanced_dataset.under_sampling import EditedNearestNeighbours

# Generate a global dataset to use
RND_SEED = 0
X, Y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9],
n_informative=3, n_redundant=1, flip_y=0,
n_features=20, n_clusters_per_class=1,
n_samples=5000, random_state=RND_SEED)


def test_enn_init():
"""Test the initialisation of the object"""

# Define a ratio
verbose = True
enn = EditedNearestNeighbours(random_state=RND_SEED, verbose=verbose)

assert_equal(enn.size_ngh, 3)
assert_equal(enn.kind_sel, 'all')
assert_equal(enn.n_jobs, -1)
assert_equal(enn.rs_, RND_SEED)
assert_equal(enn.verbose, verbose)
assert_equal(enn.min_c_, None)
assert_equal(enn.maj_c_, None)
assert_equal(enn.stats_c_, {})


def test_enn_fit_single_class():
"""Test either if an error when there is a single class"""

# Create the object
enn = EditedNearestNeighbours(random_state=RND_SEED)
# Resample the data
# Create a wrong y
y_single_class = np.zeros((X.shape[0], ))
assert_raises(RuntimeError, enn.fit, X, y_single_class)


def test_enn_fit():
"""Test the fitting method"""

# Create the object
enn = EditedNearestNeighbours(random_state=RND_SEED)
# Fit the data
enn.fit(X, Y)

# Check if the data information have been computed
assert_equal(enn.min_c_, 0)
assert_equal(enn.maj_c_, 1)
assert_equal(enn.stats_c_[0], 500)
assert_equal(enn.stats_c_[1], 4500)


def test_enn_transform_wt_fit():
"""Test either if an error is raised when transform is called before
fitting"""

# Create the object
enn = EditedNearestNeighbours(random_state=RND_SEED)
assert_raises(RuntimeError, enn.transform, X, Y)


def test_enn_fit_transform():
"""Test the fit transform routine"""

# Resample the data
enn = EditedNearestNeighbours(random_state=RND_SEED)
X_resampled, y_resampled = enn.fit_transform(X, Y)

currdir = os.path.dirname(os.path.abspath(__file__))
X_gt = np.load(os.path.join(currdir, 'data', 'enn_x.npy'))
y_gt = np.load(os.path.join(currdir, 'data', 'enn_y.npy'))
assert_array_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)


def test_enn_fit_transform_with_indices():
"""Test the fit transform routine with indices support"""

# Resample the data
enn = EditedNearestNeighbours(return_indices=True, random_state=RND_SEED)
X_resampled, y_resampled, idx_under = enn.fit_transform(X, Y)

currdir = os.path.dirname(os.path.abspath(__file__))
X_gt = np.load(os.path.join(currdir, 'data', 'enn_x.npy'))
y_gt = np.load(os.path.join(currdir, 'data', 'enn_y.npy'))
idx_gt = np.load(os.path.join(currdir, 'data', 'enn_idx.npy'))
assert_array_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
assert_array_equal(idx_under, idx_gt)


def test_enn_fit_transform_mode():
"""Test the fit transform routine using the mode as selection"""

# Resample the data
enn = EditedNearestNeighbours(random_state=RND_SEED, kind_sel='mode')
X_resampled, y_resampled = enn.fit_transform(X, Y)

currdir = os.path.dirname(os.path.abspath(__file__))
np.save(os.path.join(currdir, 'data', 'enn_x_mode.npy'), X_resampled)
np.save(os.path.join(currdir, 'data', 'enn_y_mode.npy'), y_resampled)
X_gt = np.load(os.path.join(currdir, 'data', 'enn_x_mode.npy'))
y_gt = np.load(os.path.join(currdir, 'data', 'enn_y_mode.npy'))
assert_array_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
Original file line number Diff line number Diff line change
@@ -1,10 +1,101 @@
"""Test the module neighbourhood cleaning rule."""
from __future__ import print_function

import os

import numpy as np
from numpy.testing import assert_raises
from numpy.testing import assert_equal
from numpy.testing import assert_array_equal

from sklearn.datasets import make_classification

from unbalanced_dataset.under_sampling import NeighbourhoodCleaningRule

# Generate a global dataset to use
RND_SEED = 0
X, Y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9],
n_informative=3, n_redundant=1, flip_y=0,
n_features=20, n_clusters_per_class=1,
n_samples=5000, random_state=RND_SEED)


def test_ncr_init():
"""Test the initialisation of the object"""

# Define a ratio
verbose = True
ncr = NeighbourhoodCleaningRule(random_state=RND_SEED, verbose=verbose)

assert_equal(ncr.size_ngh, 3)
assert_equal(ncr.n_jobs, -1)
assert_equal(ncr.rs_, RND_SEED)
assert_equal(ncr.verbose, verbose)
assert_equal(ncr.min_c_, None)
assert_equal(ncr.maj_c_, None)
assert_equal(ncr.stats_c_, {})


def test_ncr_fit_single_class():
"""Test either if an error when there is a single class"""

# Create the object
ncr = NeighbourhoodCleaningRule(random_state=RND_SEED)
# Resample the data
# Create a wrong y
y_single_class = np.zeros((X.shape[0], ))
assert_raises(RuntimeError, ncr.fit, X, y_single_class)


def test_ncr_fit():
"""Test the fitting method"""

# Create the object
ncr = NeighbourhoodCleaningRule(random_state=RND_SEED)
# Fit the data
ncr.fit(X, Y)

# Check if the data information have been computed
assert_equal(ncr.min_c_, 0)
assert_equal(ncr.maj_c_, 1)
assert_equal(ncr.stats_c_[0], 500)
assert_equal(ncr.stats_c_[1], 4500)


def test_ncr_transform_wt_fit():
"""Test either if an error is raised when transform is called before
fitting"""

# Create the object
ncr = NeighbourhoodCleaningRule(random_state=RND_SEED)
assert_raises(RuntimeError, ncr.transform, X, Y)


def test_ncr_fit_transform():
"""Test the fit transform routine"""

# Resample the data
ncr = NeighbourhoodCleaningRule(random_state=RND_SEED)
X_resampled, y_resampled = ncr.fit_transform(X, Y)

currdir = os.path.dirname(os.path.abspath(__file__))
X_gt = np.load(os.path.join(currdir, 'data', 'ncr_x.npy'))
y_gt = np.load(os.path.join(currdir, 'data', 'ncr_y.npy'))
assert_array_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)


def test_ncr_fit_transform_with_indices():
"""Test the fit transform routine with indices support"""

def test_neighbourhood_cleaning_rule():
"""Test the neighbourhood cleaning rule function."""
# Resample the data
ncr = NeighbourhoodCleaningRule(return_indices=True, random_state=RND_SEED)
X_resampled, y_resampled, idx_under = ncr.fit_transform(X, Y)

print('Test Neighbourhood Cleaning Rule')
currdir = os.path.dirname(os.path.abspath(__file__))
X_gt = np.load(os.path.join(currdir, 'data', 'ncr_x.npy'))
y_gt = np.load(os.path.join(currdir, 'data', 'ncr_y.npy'))
idx_gt = np.load(os.path.join(currdir, 'data', 'ncr_idx.npy'))
assert_array_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
assert_array_equal(idx_under, idx_gt)
100 changes: 96 additions & 4 deletions unbalanced_dataset/under_sampling/tests/test_one_sided_selection.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,102 @@
"""Test the module one sided selection."""
"""Test the module one-sided selection."""
from __future__ import print_function

import os

import numpy as np
from numpy.testing import assert_raises
from numpy.testing import assert_equal
from numpy.testing import assert_array_equal

from sklearn.datasets import make_classification

from unbalanced_dataset.under_sampling import OneSidedSelection

# Generate a global dataset to use
RND_SEED = 0
X, Y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9],
n_informative=3, n_redundant=1, flip_y=0,
n_features=20, n_clusters_per_class=1,
n_samples=5000, random_state=RND_SEED)


def test_oss_init():
"""Test the initialisation of the object"""

# Define a ratio
verbose = True
oss = OneSidedSelection(random_state=RND_SEED, verbose=verbose)

assert_equal(oss.size_ngh, 1)
assert_equal(oss.n_seeds_S, 1)
assert_equal(oss.n_jobs, -1)
assert_equal(oss.rs_, RND_SEED)
assert_equal(oss.verbose, verbose)
assert_equal(oss.min_c_, None)
assert_equal(oss.maj_c_, None)
assert_equal(oss.stats_c_, {})


def test_oss_fit_single_class():
"""Test either if an error when there is a single class"""

# Create the object
oss = OneSidedSelection(random_state=RND_SEED)
# Resample the data
# Create a wrong y
y_single_class = np.zeros((X.shape[0], ))
assert_raises(RuntimeError, oss.fit, X, y_single_class)


def test_oss_fit():
"""Test the fitting method"""

# Create the object
oss = OneSidedSelection(random_state=RND_SEED)
# Fit the data
oss.fit(X, Y)

# Check if the data information have been computed
assert_equal(oss.min_c_, 0)
assert_equal(oss.maj_c_, 1)
assert_equal(oss.stats_c_[0], 500)
assert_equal(oss.stats_c_[1], 4500)


def test_oss_transform_wt_fit():
"""Test either if an error is raised when transform is called before
fitting"""

# Create the object
oss = OneSidedSelection(random_state=RND_SEED)
assert_raises(RuntimeError, oss.transform, X, Y)


def test_oss_fit_transform():
"""Test the fit transform routine"""

# Resample the data
oss = OneSidedSelection(random_state=RND_SEED)
X_resampled, y_resampled = oss.fit_transform(X, Y)

currdir = os.path.dirname(os.path.abspath(__file__))
X_gt = np.load(os.path.join(currdir, 'data', 'oss_x.npy'))
y_gt = np.load(os.path.join(currdir, 'data', 'oss_y.npy'))
assert_array_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)


def test_oss_fit_transform_with_indices():
"""Test the fit transform routine with indices support"""

def test_one_sided_selection():
"""Test the one sided selection function."""
# Resample the data
oss = OneSidedSelection(return_indices=True, random_state=RND_SEED)
X_resampled, y_resampled, idx_under = oss.fit_transform(X, Y)

print('Test One Sided Selection')
currdir = os.path.dirname(os.path.abspath(__file__))
X_gt = np.load(os.path.join(currdir, 'data', 'oss_x.npy'))
y_gt = np.load(os.path.join(currdir, 'data', 'oss_y.npy'))
idx_gt = np.load(os.path.join(currdir, 'data', 'oss_idx.npy'))
assert_array_equal(X_resampled, X_gt)
assert_array_equal(y_resampled, y_gt)
assert_array_equal(idx_under, idx_gt)
Loading

0 comments on commit 921877f

Please sign in to comment.