# Test a new oversampler

Edit the occurances of ```SMOTE_template``` according to your naming and execute the notebook. If the oversampling technique handles all edge cases properly, all tests should pass.

In [1]:
import numpy as np

import smote_variants as sv

from _SMOTE_template import SMOTE_template

In [None]:
oversampler = SMOTE_template

In [2]:
loaders = [sv.datasets.load_1_dim,
            sv.datasets.load_illustration_2_class,
            sv.datasets.load_normal,
            sv.datasets.load_same_num,
            sv.datasets.load_some_min_some_maj,
            sv.datasets.load_1_min_1_maj,
            sv.datasets.load_1_min_some_maj,
            sv.datasets.load_2_min_some_maj,
            sv.datasets.load_3_min_some_maj,
            sv.datasets.load_4_min_some_maj,
            sv.datasets.load_5_min_some_maj,
            sv.datasets.load_repeated,
            sv.datasets.load_all_min_noise,
            sv.datasets.load_separable,
            sv.datasets.load_linearly_dependent,
            sv.datasets.load_alternating,
            sv.datasets.load_high_dim]

In [4]:
for loader in loaders:
    print('testing ', loader.__name__)
    
    dataset = loader()
    X = dataset['data']
    y = dataset['target']
    
    oversampler_obj = oversampler()
    
    X_samp, y_samp = oversampler_obj.sample(X, y)
    
    assert np.unique(y_samp).shape[0] == 2
    assert X_samp.shape[0] > 0
    assert X_samp.shape[1] == oversampler_obj.preprocessing_transform(X).shape[1]
    

testing  load_1_dim
testing  load_illustration_2_class
testing  load_normal
testing  load_same_num
testing  load_some_min_some_maj
testing  load_1_min_1_maj
testing  load_1_min_some_maj
testing  load_2_min_some_maj
testing  load_3_min_some_maj
testing  load_4_min_some_maj
testing  load_5_min_some_maj
testing  load_repeated
testing  load_all_min_noise
testing  load_separable
testing  load_linearly_dependent
testing  load_alternating
testing  load_high_dim


In [5]:
def test_reproducibility(oversampler):
    """
    Tests the reproducibility of oversampling.

    Args:
        smote_obj (obj): an oversampling obj
    """
    
    dataset = sv.datasets.load_normal()
    X_normal = dataset['data']
    y_normal = dataset['target']
    
    smote_obj = oversampler()
    
    X_orig = X_normal.copy()
    y_orig = y_normal.copy()

    X_a, y_a = smote_obj.__class__(random_state=5).sample(X_normal, y_normal)
    oversampler = smote_obj.__class__(random_state=5)
    X_b, y_b = oversampler.sample(X_normal, y_normal)
    X_c, y_c = smote_obj.__class__(**oversampler.get_params()).sample(X_normal, y_normal)

    assert np.array_equal(X_a, X_b)
    assert np.array_equal(X_b, X_c)
    assert np.array_equal(X_orig, X_normal)

    assert np.array_equal(y_a, y_b)
    assert np.array_equal(y_b, y_c)
    assert np.array_equal(y_orig, y_normal)

In [6]:
test_reproducibility(oversampler)

In [7]:
def test_parameters(oversampler):
    """
    Test the parameterization.

    Args:
        smote_obj (obj): an oversampling object
    """
    smote_obj = oversampler()
    
    random_state = np.random.RandomState(5)

    par_comb = smote_obj.__class__.parameter_combinations()

    original_parameters = random_state.choice(par_comb)
    oversampler = smote_obj.__class__(**original_parameters)
    parameters = oversampler.get_params()

    assert all(v == parameters[k] for k, v in original_parameters.items())


In [8]:
test_parameters(oversampler)